Fix Qwen and Command tokenizers not used for logit bias

2025-03-10 00:50:11 +01:00 · 2024-09-17 13:01:19 +00:00 · 2024-09-17 13:01:19 +00:00 · 0207794a2b
commit 0207794a2b
parent 0b0bd27321
2 changed files with 7 additions and 3 deletions
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@ -15,7 +15,7 @@ import { BIAS_CACHE, createNewLogitBiasEntry, displayLogitBias, getLogitBiasList
 import { power_user, registerDebugFunction } from './power-user.js';
 import { getEventSourceStream } from './sse-stream.js';
 import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer } from './textgen-models.js';
-import { SENTENCEPIECE_TOKENIZERS, TEXTGEN_TOKENIZERS, getTextTokens, tokenizers } from './tokenizers.js';
+import { ENCODE_TOKENIZERS, TEXTGEN_TOKENIZERS, getTextTokens, tokenizers } from './tokenizers.js';
 import { getSortableDelay, onlyUnique } from './utils.js';

 export {
@ -353,7 +353,7 @@ function getTokenizerForTokenIds() {
        return tokenizers.API_CURRENT;
    }

-    if (SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer)) {
+    if (ENCODE_TOKENIZERS.includes(power_user.tokenizer)) {
        return power_user.tokenizer;
    }

--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@ -33,18 +33,22 @@ export const tokenizers = {
    BEST_MATCH: 99,
 };

-export const SENTENCEPIECE_TOKENIZERS = [
+// A list of local tokenizers that support encoding and decoding token ids.
+export const ENCODE_TOKENIZERS = [
    tokenizers.LLAMA,
    tokenizers.MISTRAL,
    tokenizers.YI,
    tokenizers.LLAMA3,
    tokenizers.GEMMA,
    tokenizers.JAMBA,
+    tokenizers.QWEN2,
+    tokenizers.COMMAND_R,
    // uncomment when NovelAI releases Kayra and Clio weights, lol
    //tokenizers.NERD,
    //tokenizers.NERD2,
 ];

+// A list of Text Completion sources that support remote tokenization.
 export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, VLLM, APHRODITE];

 const TOKENIZER_URLS = {