Add configurable token padding #239

This commit is contained in:
SillyLossy
2023-05-05 21:24:36 +03:00
parent ce4d7dc90e
commit a3758482a4
4 changed files with 32 additions and 4 deletions

View File

@ -1185,6 +1185,15 @@
<option value="3">Sentencepiece (LLaMA)</option>
</select>
</div>
<div class="range-block">
<div class="range-block-title justifyLeft">
Token Padding
<a href="/notes#tokenpadding" class="notes-link" target="_blank">
<span class="note-link-span">?</span>
</a>
</div>
<input id="token_padding" class="text_pole" type="number" min="-2048" max="2048" />
</div>
<label class="checkbox_label" for="always-force-name2-checkbox">
<input id="always-force-name2-checkbox" type="checkbox" />
Always add character's name to prompt

View File

@ -422,6 +422,8 @@ To import Character.AI chats, use this tool: [https://github.com/0x000011b/chara
## Tokenizer
**Important: This section doesn't apply to OpenAI API. SillyTavern will always use a matching tokenizer for OpenAI models.**
A tokenizer is a tool that breaks down a piece of text into smaller units called tokens. These tokens can be individual words or even parts of words, such as prefixes, suffixes, or punctuation. A rule of thumb is that one token generally corresponds to 3~4 characters of text.
SillyTavern can use the following tokenizers while forming a request to the AI backend:
@ -431,6 +433,18 @@ SillyTavern can use the following tokenizers while forming a request to the AI b
3. (Legacy) GPT-2/3 tokenizer. Used by original TavernAI. **Pick this if you're unsure.** More info: [gpt-2-3-tokenizer](https://github.com/josephrocca/gpt-2-3-tokenizer).
4. Sentencepiece tokenizer. Used by LLaMA model family: Alpaca, Vicuna, Koala, etc. **Pick if you use a LLaMA model.**
## Token Padding
**Important: This section doesn't apply to OpenAI API. SillyTavern will always use a matching tokenizer for OpenAI models.**
SillyTavern cannot use a proper tokenizer provided by the model running on a remote instance of KoboldAI or Oobabooga's TextGen, so all token counts assumed during prompt generation are estimated based on the selected [tokenizer](#Tokenizer) type.
Since the results of tokenization can be inaccurate on context sizes close to the model-defined maximum, some parts of the prompt may be trimmed or dropped, which may negatively affect the coherence of character definitions.
To prevent this, SillyTavern allocates a portion of the context size as padding to avoid adding more chat items than the model can accommodate. If you find that some part of the prompt is trimmed even with the most-matching tokenizer selected, adjust the padding so the description is not truncated.
You can input negative values for reverse padding, which allows allocating more than the set maximum amount of tokens.
## Advanced Formatting
The settings provided in this section allow for more control over the prompt building strategy. Most specifics of the prompt building depend on whether a Pygmalion model is selected or special formatting is force-enabled. The core differences between the formatting schemas are listed below.

View File

@ -472,7 +472,6 @@ var preset_settings = "gui";
var user_avatar = "you.png";
var amount_gen = 80; //default max length of AI generated responses
var max_context = 2048;
let padding_tokens = 64; // reserved tokens to prevent prompt overflow
var is_pygmalion = false;
var tokens_already_generated = 0;
@ -1783,7 +1782,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
allAnchors,
quiet_prompt,
].join('').replace(/\r/gm, '');
return getTokenCount(encodeString, padding_tokens) < this_max_context;
return getTokenCount(encodeString, power_user.token_padding) < this_max_context;
}
// Force pinned examples into the context
@ -1936,7 +1935,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
allAnchors,
quiet_prompt,
].join('').replace(/\r/gm, '');
let thisPromtContextSize = getTokenCount(prompt, padding_tokens);
let thisPromtContextSize = getTokenCount(prompt, power_user.token_padding);
if (thisPromtContextSize > this_max_context) { //if the prepared prompt is larger than the max context size...
if (count_exm_add > 0) { // ..and we have example mesages..
@ -2013,7 +2012,6 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject,
if (power_user.collapse_newlines) {
finalPromt = collapseNewlines(finalPromt);
}
//console.log(`---Calculated Prompt Tokens: ${getTokenCount(finalPromt, padding_tokens)}`);
let this_amount_gen = parseInt(amount_gen); // how many tokens the AI will be requested to generate
let this_settings = koboldai_settings[koboldai_setting_names[preset_settings]];

View File

@ -57,6 +57,7 @@ const send_on_enter_options = {
let power_user = {
tokenizer: tokenizers.CLASSIC,
token_padding: 64,
collapse_newlines: false,
pygmalion_formatting: pygmalion_options.AUTO,
pin_examples: false,
@ -377,6 +378,7 @@ function loadPowerUserSettings(settings, data) {
$(`input[name="avatar_style"][value="${power_user.avatar_style}"]`).prop("checked", true);
$(`input[name="chat_display"][value="${power_user.chat_display}"]`).prop("checked", true);
$(`input[name="sheld_width"][value="${power_user.sheld_width}"]`).prop("checked", true);
$("#token_padding").val(power_user.token_padding);
$("#font_scale").val(power_user.font_scale);
$("#font_scale_counter").text(power_user.font_scale);
@ -755,6 +757,11 @@ $(document).ready(() => {
saveSettingsDebounced();
});
$("#token_padding").on("input", function () {
power_user.token_padding = Number($(this).val());
saveSettingsDebounced();
});
$(window).on('focus', function () {
browser_has_focus = true;
});