From 0207794a2b1ae02f71a84dad0e496d712b559c8b Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 17 Sep 2024 13:01:19 +0000 Subject: [PATCH] Fix Qwen and Command tokenizers not used for logit bias --- public/scripts/textgen-settings.js | 4 ++-- public/scripts/tokenizers.js | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index e2554e7b5..1033ee005 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -15,7 +15,7 @@ import { BIAS_CACHE, createNewLogitBiasEntry, displayLogitBias, getLogitBiasList import { power_user, registerDebugFunction } from './power-user.js'; import { getEventSourceStream } from './sse-stream.js'; import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer } from './textgen-models.js'; -import { SENTENCEPIECE_TOKENIZERS, TEXTGEN_TOKENIZERS, getTextTokens, tokenizers } from './tokenizers.js'; +import { ENCODE_TOKENIZERS, TEXTGEN_TOKENIZERS, getTextTokens, tokenizers } from './tokenizers.js'; import { getSortableDelay, onlyUnique } from './utils.js'; export { @@ -353,7 +353,7 @@ function getTokenizerForTokenIds() { return tokenizers.API_CURRENT; } - if (SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer)) { + if (ENCODE_TOKENIZERS.includes(power_user.tokenizer)) { return power_user.tokenizer; } diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 17d57cc5a..773d4e408 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -33,18 +33,22 @@ export const tokenizers = { BEST_MATCH: 99, }; -export const SENTENCEPIECE_TOKENIZERS = [ +// A list of local tokenizers that support encoding and decoding token ids. +export const ENCODE_TOKENIZERS = [ tokenizers.LLAMA, tokenizers.MISTRAL, tokenizers.YI, tokenizers.LLAMA3, tokenizers.GEMMA, tokenizers.JAMBA, + tokenizers.QWEN2, + tokenizers.COMMAND_R, // uncomment when NovelAI releases Kayra and Clio weights, lol //tokenizers.NERD, //tokenizers.NERD2, ]; +// A list of Text Completion sources that support remote tokenization. export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, VLLM, APHRODITE]; const TOKENIZER_URLS = {