From 1ebfddf07ee4d9a185a96dd4325e66356c763345 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 21 Nov 2023 01:04:27 +0200 Subject: [PATCH] Use mistral and yi tokenizers for custom token bans --- public/index.html | 2 +- public/scripts/textgen-settings.js | 5 +++-- public/scripts/tokenizers.js | 9 +++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index ecad679cc..5872d9d18 100644 --- a/public/index.html +++ b/public/index.html @@ -1306,7 +1306,7 @@

Banned Tokens -
+

diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 6cc76db50..eaec3dcc7 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -14,7 +14,7 @@ import { power_user, registerDebugFunction, } from "./power-user.js"; -import { getTextTokens, tokenizers } from "./tokenizers.js"; +import { SENTENCEPIECE_TOKENIZERS, getTextTokens, tokenizers } from "./tokenizers.js"; import { onlyUnique } from "./utils.js"; export { @@ -187,6 +187,7 @@ function getCustomTokenBans() { return ''; } + const tokenizer = SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer) ? power_user.tokenizer : tokenizers.LLAMA; const result = []; const sequences = textgenerationwebui_settings.banned_tokens .split('\n') @@ -218,7 +219,7 @@ function getCustomTokenBans() { } } else { try { - const tokens = getTextTokens(tokenizers.LLAMA, line); + const tokens = getTextTokens(tokenizer, line); result.push(...tokens); } catch { console.log(`Could not tokenize raw text: ${line}`); diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index c6ab38cad..079cbf33b 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -22,6 +22,15 @@ export const tokenizers = { BEST_MATCH: 99, }; +export const SENTENCEPIECE_TOKENIZERS = [ + tokenizers.LLAMA, + tokenizers.MISTRAL, + tokenizers.YI, + // uncomment when NovelAI releases Kayra and Clio weights, lol + //tokenizers.NERD, + //tokenizers.NERD2, +]; + const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" }); let tokenCache = {};