Use mistral and yi tokenizers for custom token bans
This commit is contained in:
parent
9b75e49b54
commit
1ebfddf07e
|
@ -1306,7 +1306,7 @@
|
||||||
<hr data-newbie-hidden class="width100p">
|
<hr data-newbie-hidden class="width100p">
|
||||||
<h4 class="range-block-title justifyCenter">
|
<h4 class="range-block-title justifyCenter">
|
||||||
<span data-i18n="Banned Tokens">Banned Tokens</span>
|
<span data-i18n="Banned Tokens">Banned Tokens</span>
|
||||||
<div class="margin5 fa-solid fa-circle-info opacity50p " title="LLaMA models only. Sequences you don't want to appear in the output. One per line. Text or [token ids]. Most tokens have a leading space."></div>
|
<div class="margin5 fa-solid fa-circle-info opacity50p " title="LLaMA / Mistral / Yi models only. Make sure to select an appropriate tokenizer first. Sequences you don't want to appear in the output. One per line. Text or [token ids]. Most tokens have a leading space. Use token counter if unsure."></div>
|
||||||
</h4>
|
</h4>
|
||||||
<div class="wide100p">
|
<div class="wide100p">
|
||||||
<textarea id="banned_tokens_textgenerationwebui" class="text_pole textarea_compact" name="banned_tokens_textgenerationwebui" rows="3" placeholder="Example: some text [42, 69, 1337]"></textarea>
|
<textarea id="banned_tokens_textgenerationwebui" class="text_pole textarea_compact" name="banned_tokens_textgenerationwebui" rows="3" placeholder="Example: some text [42, 69, 1337]"></textarea>
|
||||||
|
|
|
@ -14,7 +14,7 @@ import {
|
||||||
power_user,
|
power_user,
|
||||||
registerDebugFunction,
|
registerDebugFunction,
|
||||||
} from "./power-user.js";
|
} from "./power-user.js";
|
||||||
import { getTextTokens, tokenizers } from "./tokenizers.js";
|
import { SENTENCEPIECE_TOKENIZERS, getTextTokens, tokenizers } from "./tokenizers.js";
|
||||||
import { onlyUnique } from "./utils.js";
|
import { onlyUnique } from "./utils.js";
|
||||||
|
|
||||||
export {
|
export {
|
||||||
|
@ -187,6 +187,7 @@ function getCustomTokenBans() {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const tokenizer = SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer) ? power_user.tokenizer : tokenizers.LLAMA;
|
||||||
const result = [];
|
const result = [];
|
||||||
const sequences = textgenerationwebui_settings.banned_tokens
|
const sequences = textgenerationwebui_settings.banned_tokens
|
||||||
.split('\n')
|
.split('\n')
|
||||||
|
@ -218,7 +219,7 @@ function getCustomTokenBans() {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
const tokens = getTextTokens(tokenizers.LLAMA, line);
|
const tokens = getTextTokens(tokenizer, line);
|
||||||
result.push(...tokens);
|
result.push(...tokens);
|
||||||
} catch {
|
} catch {
|
||||||
console.log(`Could not tokenize raw text: ${line}`);
|
console.log(`Could not tokenize raw text: ${line}`);
|
||||||
|
|
|
@ -22,6 +22,15 @@ export const tokenizers = {
|
||||||
BEST_MATCH: 99,
|
BEST_MATCH: 99,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
export const SENTENCEPIECE_TOKENIZERS = [
|
||||||
|
tokenizers.LLAMA,
|
||||||
|
tokenizers.MISTRAL,
|
||||||
|
tokenizers.YI,
|
||||||
|
// uncomment when NovelAI releases Kayra and Clio weights, lol
|
||||||
|
//tokenizers.NERD,
|
||||||
|
//tokenizers.NERD2,
|
||||||
|
];
|
||||||
|
|
||||||
const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" });
|
const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" });
|
||||||
|
|
||||||
let tokenCache = {};
|
let tokenCache = {};
|
||||||
|
|
Loading…
Reference in New Issue