Use mistral and yi tokenizers for custom token bans

This commit is contained in:
Cohee
2023-11-21 01:04:27 +02:00
parent 9b75e49b54
commit 1ebfddf07e
3 changed files with 13 additions and 3 deletions

View File

@ -14,7 +14,7 @@ import {
power_user,
registerDebugFunction,
} from "./power-user.js";
import { getTextTokens, tokenizers } from "./tokenizers.js";
import { SENTENCEPIECE_TOKENIZERS, getTextTokens, tokenizers } from "./tokenizers.js";
import { onlyUnique } from "./utils.js";
export {
@ -187,6 +187,7 @@ function getCustomTokenBans() {
return '';
}
const tokenizer = SENTENCEPIECE_TOKENIZERS.includes(power_user.tokenizer) ? power_user.tokenizer : tokenizers.LLAMA;
const result = [];
const sequences = textgenerationwebui_settings.banned_tokens
.split('\n')
@ -218,7 +219,7 @@ function getCustomTokenBans() {
}
} else {
try {
const tokens = getTextTokens(tokenizers.LLAMA, line);
const tokens = getTextTokens(tokenizer, line);
result.push(...tokens);
} catch {
console.log(`Could not tokenize raw text: ${line}`);