mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-04-21 22:27:27 +02:00
Add raw token ids support to OAI logit bias. Fix token counting for turbo models
This commit is contained in:
parent
9611e31481
commit
b167eb9e22
@ -1750,6 +1750,15 @@
|
|||||||
Add bias entry
|
Add bias entry
|
||||||
</div>
|
</div>
|
||||||
<div class="openai_logit_bias_list"></div>
|
<div class="openai_logit_bias_list"></div>
|
||||||
|
<div class="m-t-1">
|
||||||
|
<small>
|
||||||
|
<i class="fa-solid fa-lightbulb"></i>
|
||||||
|
|
||||||
|
<span data-i18n="Most tokens have a leading space.">
|
||||||
|
Most tokens have a leading space.
|
||||||
|
</span>
|
||||||
|
</small>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -4316,7 +4325,7 @@
|
|||||||
|
|
||||||
<div id="openai_logit_bias_template" class="template_element">
|
<div id="openai_logit_bias_template" class="template_element">
|
||||||
<div class="openai_logit_bias_form">
|
<div class="openai_logit_bias_form">
|
||||||
<input class="openai_logit_bias_text text_pole" data-i18n="[placeholder]Type here..." placeholder="type here..." />
|
<input class="openai_logit_bias_text text_pole" data-i18n="[placeholder]Text or token ids" placeholder="Text or [token ids]" />
|
||||||
<input class="openai_logit_bias_value text_pole" type="number" min="-100" value="0" max="100" />
|
<input class="openai_logit_bias_value text_pole" type="number" min="-100" value="0" max="100" />
|
||||||
<i class="menu_button fa-solid fa-xmark openai_logit_bias_remove"></i>
|
<i class="menu_button fa-solid fa-xmark openai_logit_bias_remove"></i>
|
||||||
</form>
|
</form>
|
||||||
|
@ -182,6 +182,7 @@ export function getTokenizerModel() {
|
|||||||
return oai_settings.openai_model;
|
return oai_settings.openai_model;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const turbo0301Tokenizer = 'gpt-3.5-turbo-0301';
|
||||||
const turboTokenizer = 'gpt-3.5-turbo';
|
const turboTokenizer = 'gpt-3.5-turbo';
|
||||||
const gpt4Tokenizer = 'gpt-4';
|
const gpt4Tokenizer = 'gpt-4';
|
||||||
const gpt2Tokenizer = 'gpt2';
|
const gpt2Tokenizer = 'gpt2';
|
||||||
@ -197,6 +198,9 @@ export function getTokenizerModel() {
|
|||||||
if (oai_settings.windowai_model.includes('gpt-4')) {
|
if (oai_settings.windowai_model.includes('gpt-4')) {
|
||||||
return gpt4Tokenizer;
|
return gpt4Tokenizer;
|
||||||
}
|
}
|
||||||
|
else if (oai_settings.windowai_model.includes('gpt-3.5-turbo-0301')) {
|
||||||
|
return turbo0301Tokenizer;
|
||||||
|
}
|
||||||
else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) {
|
else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) {
|
||||||
return turboTokenizer;
|
return turboTokenizer;
|
||||||
}
|
}
|
||||||
@ -213,6 +217,9 @@ export function getTokenizerModel() {
|
|||||||
if (oai_settings.openrouter_model.includes('gpt-4')) {
|
if (oai_settings.openrouter_model.includes('gpt-4')) {
|
||||||
return gpt4Tokenizer;
|
return gpt4Tokenizer;
|
||||||
}
|
}
|
||||||
|
else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo-0301')) {
|
||||||
|
return turbo0301Tokenizer;
|
||||||
|
}
|
||||||
else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) {
|
else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) {
|
||||||
return turboTokenizer;
|
return turboTokenizer;
|
||||||
}
|
}
|
||||||
|
24
server.js
24
server.js
@ -2812,7 +2812,7 @@ app.post("/openai_bias", jsonParser, async function (request, response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const tokens = tokenizer.encode(entry.text);
|
const tokens = getEntryTokens(entry.text);
|
||||||
|
|
||||||
for (const token of tokens) {
|
for (const token of tokens) {
|
||||||
result[token] = entry.value;
|
result[token] = entry.value;
|
||||||
@ -2825,6 +2825,28 @@ app.post("/openai_bias", jsonParser, async function (request, response) {
|
|||||||
// not needed for cached tokenizers
|
// not needed for cached tokenizers
|
||||||
//tokenizer.free();
|
//tokenizer.free();
|
||||||
return response.send(result);
|
return response.send(result);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets tokenids for a given entry
|
||||||
|
* @param {string} text Entry text
|
||||||
|
* @returns {Uint32Array} Array of token ids
|
||||||
|
*/
|
||||||
|
function getEntryTokens(text) {
|
||||||
|
// Get raw token ids from JSON array
|
||||||
|
if (text.trim().startsWith('[') && text.trim().endsWith(']')) {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(text);
|
||||||
|
if (Array.isArray(json) && json.every(x => typeof x === 'number')) {
|
||||||
|
return new Uint32Array(json);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// ignore
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, get token ids from tokenizer
|
||||||
|
return tokenizer.encode(text);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
function convertChatMLPrompt(messages) {
|
function convertChatMLPrompt(messages) {
|
||||||
|
@ -95,6 +95,10 @@ function getTokenizerModel(requestModel) {
|
|||||||
return 'gpt-4';
|
return 'gpt-4';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (requestModel.includes('gpt-3.5-turbo-0301')) {
|
||||||
|
return 'gpt-3.5-turbo-0301';
|
||||||
|
}
|
||||||
|
|
||||||
if (requestModel.includes('gpt-3.5-turbo')) {
|
if (requestModel.includes('gpt-3.5-turbo')) {
|
||||||
return 'gpt-3.5-turbo';
|
return 'gpt-3.5-turbo';
|
||||||
}
|
}
|
||||||
@ -296,8 +300,8 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
return res.send({ "token_count": num_tokens });
|
return res.send({ "token_count": num_tokens });
|
||||||
}
|
}
|
||||||
|
|
||||||
const tokensPerName = model.includes('gpt-4') ? 1 : -1;
|
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
|
||||||
const tokensPerMessage = model.includes('gpt-4') ? 3 : 4;
|
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
|
||||||
const tokensPadding = 3;
|
const tokensPadding = 3;
|
||||||
|
|
||||||
const tokenizer = getTiktokenTokenizer(model);
|
const tokenizer = getTiktokenTokenizer(model);
|
||||||
@ -319,7 +323,7 @@ function registerEndpoints(app, jsonParser) {
|
|||||||
|
|
||||||
// NB: Since 2023-10-14, the GPT-3.5 Turbo 0301 model shoves in 7-9 extra tokens to every message.
|
// NB: Since 2023-10-14, the GPT-3.5 Turbo 0301 model shoves in 7-9 extra tokens to every message.
|
||||||
// More details: https://community.openai.com/t/gpt-3-5-turbo-0301-showing-different-behavior-suddenly/431326/14
|
// More details: https://community.openai.com/t/gpt-3-5-turbo-0301-showing-different-behavior-suddenly/431326/14
|
||||||
if (queryModel.endsWith('-0301')) {
|
if (queryModel.includes('gpt-3.5-turbo-0301')) {
|
||||||
num_tokens += 9;
|
num_tokens += 9;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user