diff --git a/public/index.html b/public/index.html index ecad679cc..5872d9d18 100644 --- a/public/index.html +++ b/public/index.html @@ -1306,7 +1306,7 @@

Banned Tokens -
+

diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 6cc76db50..eaec3dcc7 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -14,7 +14,7 @@ import { power_user, registerDebugFunction, } from "./power-user.js"; -import { getTextTokens, tokenizers } from "./tokenizers.js"; +import { SENTENCEPIECE_TOKENIZERS, getTextTokens, tokenizers } from "./tokenizers.js"; import { onlyUnique } from "./utils.js"; export { @@ -187,6 +187,7 @@ function getCustomTokenBans() { return ''; } + const tokenizer = SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer) ? power_user.tokenizer : tokenizers.LLAMA; const result = []; const sequences = textgenerationwebui_settings.banned_tokens .split('\n') @@ -218,7 +219,7 @@ function getCustomTokenBans() { } } else { try { - const tokens = getTextTokens(tokenizers.LLAMA, line); + const tokens = getTextTokens(tokenizer, line); result.push(...tokens); } catch { console.log(`Could not tokenize raw text: ${line}`); diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index c6ab38cad..079cbf33b 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -22,6 +22,15 @@ export const tokenizers = { BEST_MATCH: 99, }; +export const SENTENCEPIECE_TOKENIZERS = [ + tokenizers.LLAMA, + tokenizers.MISTRAL, + tokenizers.YI, + // uncomment when NovelAI releases Kayra and Clio weights, lol + //tokenizers.NERD, + //tokenizers.NERD2, +]; + const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" }); let tokenCache = {};