#1180 Add custom token bans. Return grammar strings

This commit is contained in:
Cohee 2023-09-27 22:09:09 +03:00
parent 5857823c3b
commit 306cf51da4
2 changed files with 75 additions and 12 deletions

View File

@ -1148,10 +1148,10 @@
<span data-i18n="Banned Tokens">Banned Tokens</span>
</div>
<div class="toggle-description justifyLeft" data-i18n="Sequences you don't want to appear in the output. One per line.">
Sequences you don't want to appear in the output. One per line.
Sequences you don't want to appear in the output. One per line. Text or [token ids].
</div>
<div class="wide100p">
<textarea id="nai_banned_tokens" class="text_pole textarea_compact" name="nai_banned_tokens" rows="2" placeholder=""></textarea>
<textarea id="nai_banned_tokens" class="text_pole textarea_compact" name="nai_banned_tokens" rows="3" placeholder="Example:&#10;some text&#10;[42, 69, 1337]"></textarea>
</div>
</div>
<div class="range-block">
@ -1493,6 +1493,26 @@
</label>
</div>
<hr>
<div class="range-block">
<h4 class="range-block-title justifyLeft">
<span data-i18n="Banned Tokens">Banned Tokens</span>
<span data-i18n="LLaMA models">(LLaMA models)</span>
</h4>
<div class="toggle-description justifyLeft" data-i18n="Sequences you don't want to appear in the output. One per line.">
Sequences you don't want to appear in the output. One per line. Text or [token ids].
</div>
<div class="wide100p">
<textarea id="banned_tokens_textgenerationwebui" class="text_pole textarea_compact" name="banned_tokens_textgenerationwebui" rows="3" placeholder="Example:&#10;some text&#10;[42, 69, 1337]"></textarea>
</div>
<small>
<i class="fa-solid fa-lightbulb"></i>
&nbsp;
<span data-i18n="Most tokens have a leading space.">
Most tokens have a leading space.
</span>
</small>
</div>
<hr>
<div class="range-block">
<h4 class="range-block-title justifyLeft" data-i18n="CFG Scale">
CFG Scale
@ -1513,12 +1533,12 @@
<span data-i18n="Negative Prompt">Negative Prompt</span>
</div>
<div class="wide100p">
<textarea id="negative_prompt_textgenerationwebui" class="text_pole textarea_compact" name="negative_prompt" rows="2" data-i18n="[placeholder]Add text here that would make the AI generate things you don't want in your outputs." placeholder="Add text here that would make the AI generate things you don't want in your outputs."></textarea>
<textarea id="negative_prompt_textgenerationwebui" class="text_pole textarea_compact" name="negative_prompt" rows="3" data-i18n="[placeholder]Add text here that would make the AI generate things you don't want in your outputs." placeholder="Add text here that would make the AI generate things you don't want in your outputs."></textarea>
</div>
</div>
<small class="margin-bot-10px" data-i18n="Used if CFG Scale is unset globally, per chat or character">
<small data-i18n="Used if CFG Scale is unset globally, per chat or character">
Used if CFG Scale is unset globally, per chat or character
</small>
</div>
<hr>
<h4><span data-i18n="Beam search">Beam search</span></h4>
<div class="range-block">
@ -1622,10 +1642,10 @@
</div>
</div>
<hr>
<div class="displayNone" id="grammar_block_ooba">
<div id="grammar_block_ooba">
<h4 data-i18n="Grammar">Grammar</h4>
<div class="range-block">
<textarea id="grammar_file_textgenerationwebui" rows="2" class="text_pole textarea_compact monospace"></textarea>
<textarea id="grammar_string_textgenerationwebui" rows="2" class="text_pole textarea_compact monospace"></textarea>
<div class="toggle-description justifyLeft">
<span data-i18n="Type in the desired custom grammar (GBNF).">
Type in the desired custom grammar (<a href="https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md" target="_blank">GBNF</a>).

View File

@ -9,6 +9,8 @@ import {
import {
power_user,
} from "./power-user.js";
import { getTextTokens, tokenizers } from "./tokenizers.js";
import { onlyUnique } from "./utils.js";
export {
textgenerationwebui_settings,
@ -50,7 +52,8 @@ const textgenerationwebui_settings = {
mirostat_eta: 0.1,
guidance_scale: 1,
negative_prompt: '',
grammar_file: '',
grammar_string: '',
banned_tokens: '',
};
export let textgenerationwebui_presets = [];
@ -86,7 +89,8 @@ const setting_names = [
"mirostat_eta",
"guidance_scale",
"negative_prompt",
//"grammar_file",
"grammar_string",
"banned_tokens",
];
function selectPreset(name) {
@ -126,6 +130,44 @@ function convertPresets(presets) {
return Array.isArray(presets) ? presets.map(JSON.parse) : [];
}
/**
* @returns {string} String with comma-separated banned token IDs
*/
function getCustomTokenBans() {
if (!textgenerationwebui_settings.banned_tokens) {
return '';
}
const sequences = textgenerationwebui_settings.banned_tokens.split('\n');
const result = [];
for (const line of sequences) {
// Raw token ids, JSON serialized
if (line.startsWith('[') && line.endsWith(']')) {
try {
const tokens = JSON.parse(line);
if (Array.isArray(tokens) && tokens.every(t => Number.isInteger(t))) {
result.push(...tokens);
} else {
throw new Error('Not an array of integers');
}
} catch (err) {
console.log(`Failed to parse bad word token list: ${line}`, err);
}
} else {
try {
const tokens = getTextTokens(tokenizers.LLAMA, line);
result.push(...tokens);
} catch {
console.log(`Could not tokenize raw text: ${line}`);
}
}
}
return result.filter(onlyUnique).map(x => String(x)).join(',');
}
function loadTextGenSettings(data, settings) {
textgenerationwebui_presets = convertPresets(data.textgenerationwebui_presets);
textgenerationwebui_preset_names = data.textgenerationwebui_preset_names ?? [];
@ -268,6 +310,7 @@ export function getTextGenGenerationData(finalPrompt, this_amount_gen, isImperso
'mirostat_mode': textgenerationwebui_settings.mirostat_mode,
'mirostat_tau': textgenerationwebui_settings.mirostat_tau,
'mirostat_eta': textgenerationwebui_settings.mirostat_eta,
//'grammar_file': textgenerationwebui_settings.grammar_file,
'grammar_string': textgenerationwebui_settings.grammar_string,
'custom_token_bans': getCustomTokenBans(),
};
}