Textgen: Add speculative_ngram for TabbyAPI

Speculative ngram allows for a different method of speculative
decoding. Using a draft model is still preferred.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri 2024-05-21 23:35:29 -04:00
parent f5fccc0387
commit a12df762a0
2 changed files with 11 additions and 0 deletions

View File

@ -1405,6 +1405,13 @@
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Use the temperature sampler last" title="Use the temperature sampler last. This is almost always the sensible thing to do.&#13;When enabled: sample the set of plausible tokens first, then apply temperature to adjust their relative probabilities (technically, logits).&#13;When disabled: apply temperature to adjust the relative probabilities of ALL tokens first, then sample plausible tokens from that.&#13;Disabling Temperature Last boosts the probabilities in the tail of the distribution, which tends to amplify the chances of getting an incoherent response."></div> <div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Use the temperature sampler last" title="Use the temperature sampler last. This is almost always the sensible thing to do.&#13;When enabled: sample the set of plausible tokens first, then apply temperature to adjust their relative probabilities (technically, logits).&#13;When disabled: apply temperature to adjust the relative probabilities of ALL tokens first, then sample plausible tokens from that.&#13;Disabling Temperature Last boosts the probabilities in the tail of the distribution, which tends to amplify the chances of getting an incoherent response."></div>
</label> </label>
</label> </label>
<label data-tg-type="tabby" class="checkbox_label flexGrow flexShrink" for="speculative_ngram_textgenerationwebui">
<input type="checkbox" id="speculative_ngram_textgenerationwebui" />
<label>
<small data-i18n="Speculative Ngram">Speculative Ngram</small>
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Use a different speculative decoding method without a draft model" title="Use a different speculative decoding method without a draft model.&#13;Using a draft model is preferred. Speculative ngram is not as effective."></div>
</label>
</label>
<label data-tg-type="vllm, aphrodite" class="checkbox_label" for="spaces_between_special_tokens_textgenerationwebui"> <label data-tg-type="vllm, aphrodite" class="checkbox_label" for="spaces_between_special_tokens_textgenerationwebui">
<input type="checkbox" id="spaces_between_special_tokens_textgenerationwebui" /> <input type="checkbox" id="spaces_between_special_tokens_textgenerationwebui" />

View File

@ -139,6 +139,7 @@ const settings = {
//best_of_aphrodite: 1, //best_of_aphrodite: 1,
ignore_eos_token: false, ignore_eos_token: false,
spaces_between_special_tokens: true, spaces_between_special_tokens: true,
speculative_ngram: false,
//logits_processors_aphrodite: [], //logits_processors_aphrodite: [],
//log_probs_aphrodite: 0, //log_probs_aphrodite: 0,
//prompt_log_probs_aphrodite: 0, //prompt_log_probs_aphrodite: 0,
@ -214,6 +215,7 @@ export const setting_names = [
//'best_of_aphrodite', //'best_of_aphrodite',
'ignore_eos_token', 'ignore_eos_token',
'spaces_between_special_tokens', 'spaces_between_special_tokens',
'speculative_ngram',
//'logits_processors_aphrodite', //'logits_processors_aphrodite',
//'log_probs_aphrodite', //'log_probs_aphrodite',
//'prompt_log_probs_aphrodite' //'prompt_log_probs_aphrodite'
@ -657,6 +659,7 @@ jQuery(function () {
'freq_pen_textgenerationwebui': 0, 'freq_pen_textgenerationwebui': 0,
'presence_pen_textgenerationwebui': 0, 'presence_pen_textgenerationwebui': 0,
'no_repeat_ngram_size_textgenerationwebui': 0, 'no_repeat_ngram_size_textgenerationwebui': 0,
'speculative_ngram_textgenerationwebui': false,
'min_length_textgenerationwebui': 0, 'min_length_textgenerationwebui': 0,
'num_beams_textgenerationwebui': 1, 'num_beams_textgenerationwebui': 1,
'length_penalty_textgenerationwebui': 1, 'length_penalty_textgenerationwebui': 1,
@ -1060,6 +1063,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
'no_repeat_ngram_size': settings.type === OOBA ? settings.no_repeat_ngram_size : undefined, 'no_repeat_ngram_size': settings.type === OOBA ? settings.no_repeat_ngram_size : undefined,
'penalty_alpha': settings.type === OOBA ? settings.penalty_alpha : undefined, 'penalty_alpha': settings.type === OOBA ? settings.penalty_alpha : undefined,
'temperature_last': (settings.type === OOBA || settings.type === APHRODITE || settings.type == TABBY) ? settings.temperature_last : undefined, 'temperature_last': (settings.type === OOBA || settings.type === APHRODITE || settings.type == TABBY) ? settings.temperature_last : undefined,
'speculative_ngram': settings.type === TABBY ? settings.speculative_ngram : undefined,
'do_sample': settings.type === OOBA ? settings.do_sample : undefined, 'do_sample': settings.type === OOBA ? settings.do_sample : undefined,
'seed': settings.seed, 'seed': settings.seed,
'guidance_scale': cfgValues?.guidanceScale?.value ?? settings.guidance_scale ?? 1, 'guidance_scale': cfgValues?.guidanceScale?.value ?? settings.guidance_scale ?? 1,