From 1ebfddf07ee4d9a185a96dd4325e66356c763345 Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Tue, 21 Nov 2023 01:04:27 +0200
Subject: [PATCH] Use mistral and yi tokenizers for custom token bans
---
public/index.html | 2 +-
public/scripts/textgen-settings.js | 5 +++--
public/scripts/tokenizers.js | 9 +++++++++
3 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/public/index.html b/public/index.html
index ecad679cc..5872d9d18 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1306,7 +1306,7 @@
Banned Tokens
-
+
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index 6cc76db50..eaec3dcc7 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -14,7 +14,7 @@ import {
power_user,
registerDebugFunction,
} from "./power-user.js";
-import { getTextTokens, tokenizers } from "./tokenizers.js";
+import { SENTENCEPIECE_TOKENIZERS, getTextTokens, tokenizers } from "./tokenizers.js";
import { onlyUnique } from "./utils.js";
export {
@@ -187,6 +187,7 @@ function getCustomTokenBans() {
return '';
}
+ const tokenizer = SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer) ? power_user.tokenizer : tokenizers.LLAMA;
const result = [];
const sequences = textgenerationwebui_settings.banned_tokens
.split('\n')
@@ -218,7 +219,7 @@ function getCustomTokenBans() {
}
} else {
try {
- const tokens = getTextTokens(tokenizers.LLAMA, line);
+ const tokens = getTextTokens(tokenizer, line);
result.push(...tokens);
} catch {
console.log(`Could not tokenize raw text: ${line}`);
diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js
index c6ab38cad..079cbf33b 100644
--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@@ -22,6 +22,15 @@ export const tokenizers = {
BEST_MATCH: 99,
};
+export const SENTENCEPIECE_TOKENIZERS = [
+ tokenizers.LLAMA,
+ tokenizers.MISTRAL,
+ tokenizers.YI,
+ // uncomment when NovelAI releases Kayra and Clio weights, lol
+ //tokenizers.NERD,
+ //tokenizers.NERD2,
+];
+
const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" });
let tokenCache = {};