Add raw token ids support to OAI logit bias. Fix token counting for turbo models

2025-06-05 21:59:27 +02:00 · 2023-10-19 13:37:08 +03:00
parent 9611e31481
commit b167eb9e22
4 changed files with 47 additions and 5 deletions
--- a/public/index.html
+++ b/public/index.html
@@ -1750,6 +1750,15 @@
                                                Add bias entry
                                            </div>
                                            <div class="openai_logit_bias_list"></div>
                                            <div class="m-t-1">
                                                <small>
                                                    <i class="fa-solid fa-lightbulb"></i>
                                                    &nbsp;
                                                    <span data-i18n="Most tokens have a leading space.">
                                                        Most tokens have a leading space.
                                                    </span>
                                                </small>
                                            </div>
                                        </div>
                                    </div>
                                </div>
@@ -4316,7 +4325,7 @@
    <div id="openai_logit_bias_template" class="template_element">
        <div class="openai_logit_bias_form">
-            <input class="openai_logit_bias_text text_pole" data-i18n="[placeholder]Type here..." placeholder="type here..." />
+            <input class="openai_logit_bias_text text_pole" data-i18n="[placeholder]Text or token ids" placeholder="Text or [token ids]" />
            <input class="openai_logit_bias_value text_pole" type="number" min="-100" value="0" max="100" />
            <i class="menu_button fa-solid fa-xmark openai_logit_bias_remove"></i>
            </form>
--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@@ -182,6 +182,7 @@ export function getTokenizerModel() {
        return oai_settings.openai_model;
    }
    const turbo0301Tokenizer = 'gpt-3.5-turbo-0301';
    const turboTokenizer = 'gpt-3.5-turbo';
    const gpt4Tokenizer = 'gpt-4';
    const gpt2Tokenizer = 'gpt2';
@@ -197,6 +198,9 @@ export function getTokenizerModel() {
        if (oai_settings.windowai_model.includes('gpt-4')) {
            return gpt4Tokenizer;
        }
        else if (oai_settings.windowai_model.includes('gpt-3.5-turbo-0301')) {
            return turbo0301Tokenizer;
        }
        else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) {
            return turboTokenizer;
        }
@@ -213,6 +217,9 @@ export function getTokenizerModel() {
        if (oai_settings.openrouter_model.includes('gpt-4')) {
            return gpt4Tokenizer;
        }
        else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo-0301')) {
            return turbo0301Tokenizer;
        }
        else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) {
            return turboTokenizer;
        }
--- a/server.js
+++ b/server.js
@@ -2812,7 +2812,7 @@ app.post("/openai_bias", jsonParser, async function (request, response) {
        }
        try {
-            const tokens = tokenizer.encode(entry.text);
+            const tokens = getEntryTokens(entry.text);
            for (const token of tokens) {
                result[token] = entry.value;
@@ -2825,6 +2825,28 @@ app.post("/openai_bias", jsonParser, async function (request, response) {
    // not needed for cached tokenizers
    //tokenizer.free();
    return response.send(result);
    /**
     * Gets tokenids for a given entry
     * @param {string} text Entry text
     * @returns {Uint32Array} Array of token ids
     */
    function getEntryTokens(text) {
        // Get raw token ids from JSON array
        if (text.trim().startsWith('[') && text.trim().endsWith(']')) {
            try {
                const json = JSON.parse(text);
                if (Array.isArray(json) && json.every(x => typeof x === 'number')) {
                    return new Uint32Array(json);
                }
            } catch {
                // ignore
            }
        }
        // Otherwise, get token ids from tokenizer
        return tokenizer.encode(text);
    }
 });
 function convertChatMLPrompt(messages) {
--- a/src/tokenizers.js
+++ b/src/tokenizers.js
@@ -95,6 +95,10 @@ function getTokenizerModel(requestModel) {
        return 'gpt-4';
    }
    if (requestModel.includes('gpt-3.5-turbo-0301')) {
        return 'gpt-3.5-turbo-0301';
    }
    if (requestModel.includes('gpt-3.5-turbo')) {
        return 'gpt-3.5-turbo';
    }
@@ -296,8 +300,8 @@ function registerEndpoints(app, jsonParser) {
            return res.send({ "token_count": num_tokens });
        }
-        const tokensPerName = model.includes('gpt-4') ? 1 : -1;
+        const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
-        const tokensPerMessage = model.includes('gpt-4') ? 3 : 4;
+        const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
        const tokensPadding = 3;
        const tokenizer = getTiktokenTokenizer(model);
@@ -319,7 +323,7 @@ function registerEndpoints(app, jsonParser) {
        // NB: Since 2023-10-14, the GPT-3.5 Turbo 0301 model shoves in 7-9 extra tokens to every message.
        // More details: https://community.openai.com/t/gpt-3-5-turbo-0301-showing-different-behavior-suddenly/431326/14
-        if (queryModel.endsWith('-0301')) {
+        if (queryModel.includes('gpt-3.5-turbo-0301')) {
            num_tokens += 9;
        }