From efd477da04a3c916124eeaacd78a74a6febdf0f1 Mon Sep 17 00:00:00 2001
From: AlpinDale <alpindale@gmail.com>
Date: Fri, 13 Sep 2024 10:34:06 +0000
Subject: [PATCH 1/8] chore: slight refactor of aphrodite samplers

---
 public/index.html                  | 38 +++++++++++------------
 public/scripts/textgen-settings.js | 49 ++++++++++++++++++++----------
 2 files changed, 52 insertions(+), 35 deletions(-)
diff --git a/public/index.html b/public/index.html
index 6f8c87112..c4b299fbe 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1189,7 +1189,7 @@
                                         <input class="neo-range-slider" type="range" id="tfs_textgenerationwebui" name="volume" min="0" max="1" step="0.01">
                                         <input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="tfs_textgenerationwebui" id="tfs_counter_textgenerationwebui">
                                     </div>
-                                    <div data-tg-type="ooba,mancer" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba,mancer,aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                         <small>
                                             <span data-i18n="Epsilon Cutoff">Epsilon Cutoff</span>
                                             <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Epsilon cutoff sets a probability floor below which tokens are excluded from being sampled" title="Epsilon cutoff sets a probability floor below which tokens are excluded from being sampled.&#13;In units of 1e-4; a reasonable value is 3.&#13;Set to 0 to disable."></div>
@@ -1197,7 +1197,7 @@
                                         <input class="neo-range-slider" type="range" id="epsilon_cutoff_textgenerationwebui" name="volume" min="0" max="9" step="0.01">
                                         <input class="neo-range-input" type="number" min="0" max="9" step="0.01" data-for="epsilon_cutoff_textgenerationwebui" id="epsilon_cutoff_counter_textgenerationwebui">
                                     </div>
-                                    <div data-tg-type="ooba,mancer" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba,mancer,aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                         <small>
                                             <span data-i18n="Eta Cutoff">Eta Cutoff</span>
                                             <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Eta_Cutoff_desc" title="Eta cutoff is the main parameter of the special Eta Sampling technique.&#13;In units of 1e-4; a reasonable value is 3.&#13;Set to 0 to disable.&#13;See the paper Truncation Sampling as Language Model Desmoothing by Hewitt et al. (2022) for details."></div>
@@ -1250,7 +1250,7 @@
                                         <input class="neo-range-slider" type="range" id="skew_textgenerationwebui" name="volume" min="-5" max="5" step="0.01" />
                                         <input class="neo-range-input" type="number" min="-5" max="5" step="0.01" data-for="skew_textgenerationwebui" id="skew_counter_textgenerationwebui">
                                     </div>
-                                    <div data-tg-type="mancer, ooba, tabby, dreamgen, infermaticai" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="mancer, ooba, tabby, dreamgen, infermaticai, aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                         <small data-i18n="Min Length">Min Length</small>
                                         <input class="neo-range-slider" type="range" id="min_length_textgenerationwebui" name="volume" min="0" max="2000" step="1" />
                                         <input class="neo-range-input" type="number" min="0" max="2000" step="1" data-for="min_length_textgenerationwebui" id="min_length_counter_textgenerationwebui">
@@ -1339,7 +1339,7 @@
                                             </div>
                                         </div>
                                     </div>
-                                    <div data-tg-type="ooba, mancer, koboldcpp, tabby, llamacpp, aphrodite" id="dynatemp_block_ooba" class="wide100p">
+                                    <div data-tg-type="ooba, mancer, koboldcpp, tabby, llamacpp" id="dynatemp_block_ooba" class="wide100p">
                                         <h4 class="wide100p textAlignCenter">
                                             <div class="flex-container alignitemscenter justifyCenter">
                                                 <div class="checkbox_label" for="dynatemp_textgenerationwebui">
@@ -1367,7 +1367,7 @@
                                             </div>
                                         </div>
                                     </div>
-                                    <div data-tg-type="ooba,aphrodite,infermaticai,koboldcpp,llamacpp,mancer,ollama,tabby" id="mirostat_block_ooba" class="wide100p">
+                                    <div data-tg-type="ooba,infermaticai,koboldcpp,llamacpp,mancer,ollama,tabby" id="mirostat_block_ooba" class="wide100p">
                                         <h4 class="wide100p textAlignCenter">
                                             <label data-i18n="Mirostat (mode=1 is only for llama.cpp)">Mirostat</label>
                                             <div class=" fa-solid fa-circle-info opacity50p " data-i18n="[title]Mirostat_desc" title="Mirostat is a thermostat for output perplexity.&#13;Mirostat matches the output perplexity to that of the input, thus avoiding the repetition trap&#13;(where, as the autoregressive inference produces text, the perplexity of the output tends toward zero)&#13;and the confusion trap (where the perplexity diverges).&#13;For details, see the paper Mirostat: A Neural Text Decoding Algorithm that Directly Controls Perplexity by Basu et al. (2020).&#13;Mode chooses the Mirostat version. 0=disable, 1=Mirostat 1.0 (llama.cpp only), 2=Mirostat 2.0."></div>
@@ -1396,7 +1396,7 @@
                                             </div>
                                         </div>
                                     </div>
-                                    <div data-tg-type="ooba, vllm" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
+                                    <div data-tg-type="ooba, vllm, aphrodite" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
                                         <h4 class="wide100p textAlignCenter">
                                             <label>
                                                 <span data-i18n="Beam search">Beam Search</span>
@@ -1537,18 +1537,7 @@
                                             </div>
                                         </div>
                                     </div>
-                                    <div id="json_schema_block" data-tg-type="tabby, llamacpp" class="wide100p">
-                                        <hr class="wide100p">
-                                        <h4 class="wide100p textAlignCenter"><span data-i18n="JSON Schema">JSON Schema</span>
-                                            <a href="https://json-schema.org/learn/getting-started-step-by-step" target="_blank">
-                                                <small>
-                                                    <div class="fa-solid fa-circle-question note-link-span"></div>
-                                                </small>
-                                            </a>
-                                        </h4>
-                                        <textarea id="tabby_json_schema" rows="4" class="text_pole textarea_compact monospace" data-i18n="[placeholder]Type in the desired JSON schema" placeholder="Type in the desired JSON schema"></textarea>
-                                    </div>
-                                    <div id="grammar_block_ooba" class="wide100p">
+                                    <div id="grammar_block_ooba" data-tg-type="ooba,aphrodite" class="wide100p">
                                         <hr class="wide100p">
                                         <h4 class="wide100p textAlignCenter">
                                             <label>
@@ -1563,6 +1552,17 @@
                                         </h4>
                                         <textarea id="grammar_string_textgenerationwebui" rows="4" class="text_pole textarea_compact monospace" data-i18n="[placeholder]Type in the desired custom grammar" placeholder="Type in the desired custom grammar"></textarea>
                                     </div>
+                                    <div id="json_schema_block" data-tg-type="tabby, llamacpp, aphrodite" class="wide100p">
+                                        <hr class="wide100p">
+                                        <h4 class="wide100p textAlignCenter"><span data-i18n="JSON Schema">JSON Schema</span>
+                                            <a href="https://json-schema.org/learn/getting-started-step-by-step" target="_blank">
+                                                <small>
+                                                    <div class="fa-solid fa-circle-question note-link-span"></div>
+                                                </small>
+                                            </a>
+                                        </h4>
+                                        <textarea id="tabby_json_schema" rows="4" class="text_pole textarea_compact monospace" data-i18n="[placeholder]Type in the desired JSON schema" placeholder="Type in the desired JSON schema"></textarea>
+                                    </div>
                                     <div id="sampler_order_block_kcpp" data-tg-type="koboldcpp" class="range-block flexFlowColumn wide100p">
                                         <hr class="wide100p">
                                         <div class="range-block-title">
@@ -2425,7 +2425,7 @@
                                 <div data-tg-type="openrouter" class="menu_button menu_button_icon openrouter_authorize" title="Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai" data-i18n="Authorize;[title]Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai">Authorize</div>
                                 <div class="api_loading menu_button menu_button_icon" data-i18n="Cancel">Cancel</div>
                             </div>
-                            <label data-tg-type="ooba,aphrodite" class="checkbox_label margin-bot-10px" for="legacy_api_textgenerationwebui">
+                            <label data-tg-type="ooba" class="checkbox_label margin-bot-10px" for="legacy_api_textgenerationwebui">
                                 <input type="checkbox" id="legacy_api_textgenerationwebui" />
                                 <span data-i18n="Legacy API (pre-OAI, no streaming)">Legacy API (pre-OAI, no streaming)</span>
                             </label>
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index b3d8dea5b..0c2d65519 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -123,6 +123,7 @@ const settings = {
     rep_pen_slope: 1,
     no_repeat_ngram_size: 0,
     penalty_alpha: 0,
+    use_beam_search: false,
     num_beams: 1,
     length_penalty: 1,
     min_length: 0,
@@ -162,14 +163,9 @@ const settings = {
     banned_tokens: '',
     sampler_priority: OOBA_DEFAULT_ORDER,
     samplers: LLAMACPP_DEFAULT_ORDER,
-    //n_aphrodite: 1,
-    //best_of_aphrodite: 1,
     ignore_eos_token: false,
     spaces_between_special_tokens: true,
     speculative_ngram: false,
-    //logits_processors_aphrodite: [],
-    //log_probs_aphrodite: 0,
-    //prompt_log_probs_aphrodite: 0,
     type: textgen_types.OOBA,
     mancer_model: 'mytholite',
     togetherai_model: 'Gryphe/MythoMax-L2-13b',
@@ -191,6 +187,8 @@ const settings = {
     openrouter_allow_fallbacks: true,
     xtc_threshold: 0.1,
     xtc_probability: 0,
+    include_stop_str_in_output: false,
+    guided_regex: '',
 };
 
 export let textgenerationwebui_banned_in_macros = [];
@@ -250,14 +248,9 @@ export const setting_names = [
     'json_schema',
     'banned_tokens',
     'legacy_api',
-    //'n_aphrodite',
-    //'best_of_aphrodite',
     'ignore_eos_token',
     'spaces_between_special_tokens',
     'speculative_ngram',
-    //'logits_processors_aphrodite',
-    //'log_probs_aphrodite',
-    //'prompt_log_probs_aphrodite'
     'sampler_order',
     'sampler_priority',
     'samplers',
@@ -268,6 +261,8 @@ export const setting_names = [
     'openrouter_allow_fallbacks',
     'xtc_threshold',
     'xtc_probability',
+    'include_stop_str_in_output',
+    'guided_regex',
 ];
 
 const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba');
@@ -1208,13 +1203,35 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
     };
     const aphroditeParams = {
         'n': canMultiSwipe ? settings.n : 1,
-        'best_of': canMultiSwipe ? settings.n : 1,
+        'frequency_penalty': settings.freq_pen,
+        'presence_penalty': settings.presence_pen,
+        'repetition_penalty': settings.rep_pen,
+        'seed': settings.seed,
+        'stop': settings.stopping_strings,
+        'temperature': settings.temp,
+        'temperature_last': settings.temperature_last,
+        'top_p': settings.top_p,
+        'top_k': settings.top_k,
+        'top_a': settings.top_a,
+        'min_p': settings.min_p,
+        'tfs': settings.tfs,
+        'eta_cutoff': settings.eta_cutoff,
+        'epsilon_cutoff': settings.epsilon_cutoff,
+        'typical_p': settings.typical_p,
+        'smoothing_factor': settings.smoothing_factor,
+        'smoothing_curve': settings.smoothing_curve,
+        'use_beam_search': settings.use_beam_search,
+        'length_penalty': settings.length_penalty,
+        'early_stopping': settings.early_stopping,
         'ignore_eos': settings.ignore_eos_token,
+        'min_tokens': settings.min_length,
+        'skip_special_tokens': settings.skip_special_tokens,
+        'include_stop_str_in_output': settings.include_stop_str_in_output,
         'spaces_between_special_tokens': settings.spaces_between_special_tokens,
-        'grammar': settings.grammar_string,
-        //'logits_processors': settings.logits_processors_aphrodite,
-        //'logprobs': settings.log_probs_aphrodite,
-        //'prompt_logprobs': settings.prompt_log_probs_aphrodite,
+        'guided_grammar': settings.grammar_string,
+        'guided_json': settings.json_schema,
+
+
     };
 
     if (settings.type === OPENROUTER) {
@@ -1254,7 +1271,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
             break;
 
         case APHRODITE:
-            params = Object.assign(params, aphroditeParams);
+            params = Object.assign(aphroditeParams);
             break;
 
         default:

From 9c94348491fd63662b643130c5f09123c5ee456c Mon Sep 17 00:00:00 2001
From: AlpinDale <alpindale@gmail.com>
Date: Sat, 14 Sep 2024 12:38:19 +0000
Subject: [PATCH 2/8] clean up

---
 public/index.html                  | 27 +++++++++++++++++++--------
 public/scripts/textgen-settings.js | 10 ++++------
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/public/index.html b/public/index.html
index c4b299fbe..366b0b053 100644
--- a/public/index.html
+++ b/public/index.html
@@ -215,7 +215,7 @@
                                         <label class="checkbox_label">
                                             <input id="max_context_unlocked" type="checkbox" />
                                             <small><span data-i18n="unlocked">Unlocked</span>
-                                                <div id="max_context_unlocked_warning" class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Only enable this if your model supports context sizes greater than 4096 tokens" title="Only enable this if your model supports context sizes greater than 4096 tokens.&#13;Increase only if you know what you're doing."></div>
+                                                <div id="max_context_unlocked_warning" class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Only enable this if your model supports context sizes greater than 8192 tokens" title="Only enable this if your model supports context sizes greater than 4096 tokens.&#13;Increase only if you know what you're doing."></div>
                                             </small>
                                         </label>
                                     </div>
@@ -1396,28 +1396,36 @@
                                             </div>
                                         </div>
                                     </div>
-                                    <div data-tg-type="ooba, vllm, aphrodite" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
+                                    <div data-tg-type="ooba, vllm" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
                                         <h4 class="wide100p textAlignCenter">
                                             <label>
                                                 <span data-i18n="Beam search">Beam Search</span>
-                                                <div class=" fa-solid fa-circle-info opacity50p " title="Helpful tip coming soon." data-i18n="[title]Helpful tip coming soon."></div>
+                                                <div class=" fa-solid fa-circle-info opacity50p " title="A greedy, brute-force algorithm used in LLM sampling to find the most likely sequence of words or tokens. It expands multiple candidate sequences at once, maintaining a fixed number (beam width) of top sequences at each step." data-i18n="[title]A greedy, brute-force algorithm used in LLM sampling to find the most likely sequence of words or tokens. It expands multiple candidate sequences at once, maintaining a fixed number (beam width) of top sequences at each step."></div>
                                             </label>
                                         </h4>
                                         <div class="flex-container flexFlowRow alignitemscenter gap10px flexShrink">
                                             <div class="alignitemscenter flex-container marginBot5 flexFlowColumn flexGrow flexShrink gap0">
-                                                <small data-i18n="Number of Beams"># of Beams</small>
+                                                <label>
+                                                    <small data-i18n="# of Beams"># of Beams</small>
+                                                    <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]The number of sequences generated at each step with Beam Search." title="The number of sequences generated at each step with Beam Search."></div>
+                                                </label>
                                                 <input class="neo-range-slider" type="range" id="num_beams_textgenerationwebui" name="volume" min="1" max="20" step="1" />
                                                 <input class="neo-range-input" type="number" min="1" max="20" step="1" data-for="num_beams_textgenerationwebui" id="num_beams_counter_textgenerationwebui">
                                             </div>
                                             <div class="alignitemscenter flex-container marginBot5 flexFlowColumn flexGrow flexShrink gap0">
-                                                <small data-i18n="Length Penalty">Length Penalty</small>
+                                                <label>
+                                                    <small data-i18n="Length Penalty">Length Penalty</small>
+                                                    <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Penalize sequences based on their length." title="Penalize sequences based on their length."></div>
+                                                </label>
                                                 <input class="neo-range-slider" type="range" id="length_penalty_textgenerationwebui" name="volume" min="-5" max="5" step="0.1" />
                                                 <input class="neo-range-input" type="number" min="-5" max="5" step="0.1" data-for="length_penalty_textgenerationwebui" id="length_penalty_counter_textgenerationwebui">
                                             </div>
                                             <div class="">
                                                 <label class="checkbox_label" for="early_stopping_textgenerationwebui">
-                                                    <input type="checkbox" id="early_stopping_textgenerationwebui" />
-                                                    <small data-i18n="Early Stopping">Early Stopping</small>
+                                                    <input type="checkbox" id="early_stopping_textgenerationwebui" checked="false" />
+                                                    <small data-i18n="Early Stopping">Early Stopping
+                                                        <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Controls the stopping condition for beam search. If checked, the generation stops as soon as there are '# of Beams' sequences. If not checked, a heuristic is applied and the generation is stopped when it's very unlikely to find better candidates." title="Controls the stopping condition for beam search. If checked, the generation stops as soon as there are '# of Beams' sequences. If not checked, a heuristic is applied and the generation is stopped when it's very unlikely to find better candidates."></div>
+                                                    </small>
                                                 </label>
                                             </div>
                                         </div>
@@ -1487,7 +1495,10 @@
                                         </div>
                                     </div>
                                     <div data-tg-type="mancer, ooba, koboldcpp, vllm, aphrodite, llamacpp, ollama, infermaticai, huggingface" class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
-                                        <small data-i18n="Seed" class="textAlignCenter">Seed</small>
+                                        <label>
+                                            <small data-i18n="Seed">Seed</small>
+                                            <div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Seed_desc" title="A random seed to use for deterministic and reproducable outputs. Set to -1 to use a random seed."></div>
+                                        </label>
                                         <input type="number" id="seed_textgenerationwebui" class="text_pole textAlignCenter" min="-1" value="-1" />
                                     </div>
                                     <div id="banned_tokens_block_ooba" class="wide100p">
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index 0c2d65519..358f9d30c 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -188,7 +188,6 @@ const settings = {
     xtc_threshold: 0.1,
     xtc_probability: 0,
     include_stop_str_in_output: false,
-    guided_regex: '',
 };
 
 export let textgenerationwebui_banned_in_macros = [];
@@ -262,7 +261,6 @@ export const setting_names = [
     'xtc_threshold',
     'xtc_probability',
     'include_stop_str_in_output',
-    'guided_regex',
 ];
 
 const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba');
@@ -1207,7 +1205,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'presence_penalty': settings.presence_pen,
         'repetition_penalty': settings.rep_pen,
         'seed': settings.seed,
-        'stop': settings.stopping_strings,
+        'stop': getStoppingStrings(isImpersonate, isContinue),
         'temperature': settings.temp,
         'temperature_last': settings.temperature_last,
         'top_p': settings.top_p,
@@ -1221,6 +1219,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'smoothing_factor': settings.smoothing_factor,
         'smoothing_curve': settings.smoothing_curve,
         'use_beam_search': settings.use_beam_search,
+        'best_of': settings.num_beams > 1 ? settings.num_beams : settings.n,
         'length_penalty': settings.length_penalty,
         'early_stopping': settings.early_stopping,
         'ignore_eos': settings.ignore_eos_token,
@@ -1230,8 +1229,6 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'spaces_between_special_tokens': settings.spaces_between_special_tokens,
         'guided_grammar': settings.grammar_string,
         'guided_json': settings.json_schema,
-
-
     };
 
     if (settings.type === OPENROUTER) {
@@ -1271,7 +1268,8 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
             break;
 
         case APHRODITE:
-            params = Object.assign(aphroditeParams);
+            // set params to aphroditeParams
+            params = Object.assign(params, aphroditeParams);
             break;
 
         default:

From fde76069e03ba990041642e6cf26096c13e09cfa Mon Sep 17 00:00:00 2001
From: AlpinDale <alpindale@gmail.com>
Date: Sat, 14 Sep 2024 12:42:21 +0000
Subject: [PATCH 3/8] remove beam search

---
 public/index.html                  | 2 +-
 public/scripts/textgen-settings.js | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/public/index.html b/public/index.html
index 366b0b053..d5423297e 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1422,7 +1422,7 @@
                                             </div>
                                             <div class="">
                                                 <label class="checkbox_label" for="early_stopping_textgenerationwebui">
-                                                    <input type="checkbox" id="early_stopping_textgenerationwebui" checked="false" />
+                                                    <input type="checkbox" id="early_stopping_textgenerationwebui" />
                                                     <small data-i18n="Early Stopping">Early Stopping
                                                         <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Controls the stopping condition for beam search. If checked, the generation stops as soon as there are '# of Beams' sequences. If not checked, a heuristic is applied and the generation is stopped when it's very unlikely to find better candidates." title="Controls the stopping condition for beam search. If checked, the generation stops as soon as there are '# of Beams' sequences. If not checked, a heuristic is applied and the generation is stopped when it's very unlikely to find better candidates."></div>
                                                     </small>
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index 358f9d30c..b07e235cc 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -1218,10 +1218,6 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'typical_p': settings.typical_p,
         'smoothing_factor': settings.smoothing_factor,
         'smoothing_curve': settings.smoothing_curve,
-        'use_beam_search': settings.use_beam_search,
-        'best_of': settings.num_beams > 1 ? settings.num_beams : settings.n,
-        'length_penalty': settings.length_penalty,
-        'early_stopping': settings.early_stopping,
         'ignore_eos': settings.ignore_eos_token,
         'min_tokens': settings.min_length,
         'skip_special_tokens': settings.skip_special_tokens,

From 1cc935796ff2e7c3c758dd69fb10e7b2adf8127e Mon Sep 17 00:00:00 2001
From: AlpinDale <alpindale@gmail.com>
Date: Sat, 14 Sep 2024 12:45:29 +0000
Subject: [PATCH 4/8] fix early_stopping

---
 public/scripts/textgen-settings.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index b07e235cc..90f3d1d11 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -1225,6 +1225,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'spaces_between_special_tokens': settings.spaces_between_special_tokens,
         'guided_grammar': settings.grammar_string,
         'guided_json': settings.json_schema,
+        'early_stopping': false,  // hack
     };
 
     if (settings.type === OPENROUTER) {

From 33e6ffd36e870546ca14e0bb4a1062291bfa26a6 Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Sat, 14 Sep 2024 16:31:04 +0300
Subject: [PATCH 5/8] Update translations

---
 public/index.html         | 2 +-
 public/locales/ar-sa.json | 2 +-
 public/locales/de-de.json | 2 +-
 public/locales/es-es.json | 2 +-
 public/locales/fr-fr.json | 2 +-
 public/locales/is-is.json | 2 +-
 public/locales/it-it.json | 2 +-
 public/locales/ja-jp.json | 2 +-
 public/locales/ko-kr.json | 2 +-
 public/locales/nl-nl.json | 2 +-
 public/locales/pt-pt.json | 2 +-
 public/locales/ru-ru.json | 2 +-
 public/locales/uk-ua.json | 2 +-
 public/locales/zh-cn.json | 2 +-
 public/locales/zh-tw.json | 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/public/index.html b/public/index.html
index d5423297e..e08105239 100644
--- a/public/index.html
+++ b/public/index.html
@@ -215,7 +215,7 @@
                                         <label class="checkbox_label">
                                             <input id="max_context_unlocked" type="checkbox" />
                                             <small><span data-i18n="unlocked">Unlocked</span>
-                                                <div id="max_context_unlocked_warning" class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Only enable this if your model supports context sizes greater than 8192 tokens" title="Only enable this if your model supports context sizes greater than 4096 tokens.&#13;Increase only if you know what you're doing."></div>
+                                                <div id="max_context_unlocked_warning" class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Only enable this if your model supports context sizes greater than 8192 tokens" title="Only enable this if your model supports context sizes greater than 8192 tokens.&#13;Increase only if you know what you're doing."></div>
                                             </small>
                                         </label>
                                     </div>
diff --git a/public/locales/ar-sa.json b/public/locales/ar-sa.json
index 224bb480f..f599ccd2b 100644
--- a/public/locales/ar-sa.json
+++ b/public/locales/ar-sa.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "عرض الاستجابة لحظيا كما يتم إنشاؤها.",
     "context size(tokens)": "حجم الاحرف (بعدد الاحرف او الرموز)",
     "unlocked": "مفتوح",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "قم بتمكين هذا فقط إذا كانت نموذجك يدعم مقاطع السياق بأحجام أكبر من 4096 رمزًا.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "قم بتمكين هذا فقط إذا كانت نموذجك يدعم مقاطع السياق بأحجام أكبر من 8192 رمزًا.",
     "Max prompt cost:": "أقصى تكلفة فورية:",
     "Display the response bit by bit as it is generated.": "عرض الاستجابة بتدريج كما يتم إنشاؤها.",
     "When this is off, responses will be displayed all at once when they are complete.": "عند إيقاف هذا الخيار، سيتم عرض الردود جميعها دفعة واحدة عند اكتمالها.",
diff --git a/public/locales/de-de.json b/public/locales/de-de.json
index 7932eed10..4d37d5806 100644
--- a/public/locales/de-de.json
+++ b/public/locales/de-de.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Zeige die Antwort Stück für Stück an, während sie generiert wird.",
     "context size(tokens)": "Größe des Zusammenhangs (Tokens)",
     "unlocked": "Freigeschaltet",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Aktiviere dies nur, wenn dein Modell Kontextgrößen von mehr als 4096 Tokens unterstützt.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Aktiviere dies nur, wenn dein Modell Kontextgrößen von mehr als 8192 Tokens unterstützt.",
     "Max prompt cost:": "Maximale Sofortkosten:",
     "Display the response bit by bit as it is generated.": "Zeige die Antwort Stück für Stück, während sie generiert wird.",
     "When this is off, responses will be displayed all at once when they are complete.": "Wenn dies ausgeschaltet ist, werden Antworten angezeigt, sobald sie vollständig sind.",
diff --git a/public/locales/es-es.json b/public/locales/es-es.json
index e03f0ca9b..8db9a8cc4 100644
--- a/public/locales/es-es.json
+++ b/public/locales/es-es.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Mostrar la respuesta poco a poco según se genera",
     "context size(tokens)": "Tamaño de contexto (tokens)",
     "unlocked": "Desbloqueado",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Habilita esto solo si tu modelo admite tamaños de contexto mayores de 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Habilita esto solo si tu modelo admite tamaños de contexto mayores de 8192 tokens",
     "Max prompt cost:": "Costo inmediato máximo:",
     "Display the response bit by bit as it is generated.": "Mostrar la respuesta poco a poco a medida que se genera.",
     "When this is off, responses will be displayed all at once when they are complete.": "Cuando esto está apagado, las respuestas se mostrarán de una vez cuando estén completas.",
diff --git a/public/locales/fr-fr.json b/public/locales/fr-fr.json
index 1d4403460..2ae937879 100644
--- a/public/locales/fr-fr.json
+++ b/public/locales/fr-fr.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Afficher la réponse bit par bit au fur et à mesure de sa génération",
     "context size(tokens)": "Taille du contexte (en tokens)",
     "unlocked": "Déverrouillé",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Activez cela uniquement si votre modèle prend en charge des tailles de contexte supérieures à 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Activez cela uniquement si votre modèle prend en charge des tailles de contexte supérieures à 8192 tokens",
     "Max prompt cost:": "Coût rapide maximum :",
     "Display the response bit by bit as it is generated.": "Afficher la réponse morceau par morceau au fur et à mesure de sa génération.",
     "When this is off, responses will be displayed all at once when they are complete.": "Lorsque cette fonction est désactivée, les réponses s'affichent toutes en une fois lorsqu'elles sont complètes.",
diff --git a/public/locales/is-is.json b/public/locales/is-is.json
index 5f36b6277..f03d6c533 100644
--- a/public/locales/is-is.json
+++ b/public/locales/is-is.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Birta svarið bita fyrir bita þegar það er myndað.",
     "context size(tokens)": "Stærð samhengis (í táknum eða stöfum)",
     "unlocked": "Opinn",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Virkjið þetta aðeins ef stærð samhengis styður model meira en 4096 tákn.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Virkjið þetta aðeins ef stærð samhengis styður model meira en 8192 tákn.",
     "Max prompt cost:": "Hámarks skyndikostnaður:",
     "Display the response bit by bit as it is generated.": "Birta svarid bita fyrir bita þegar það er búið til.",
     "When this is off, responses will be displayed all at once when they are complete.": "Þegar þetta er slökkt verða svör birt allt í einu þegar þau eru búin.",
diff --git a/public/locales/it-it.json b/public/locales/it-it.json
index 53d969dcc..10fc75d2b 100644
--- a/public/locales/it-it.json
+++ b/public/locales/it-it.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Mostra la risposta pezzo per pezzo man mano che viene generata",
     "context size(tokens)": "Dimensione del contesto (token)",
     "unlocked": "Sbloccato",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Abilita solo se il tuo modello supporta dimensioni del contesto superiori a 4096 token",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Abilita solo se il tuo modello supporta dimensioni del contesto superiori a 8192 token",
     "Max prompt cost:": "Costo massimo immediato:",
     "Display the response bit by bit as it is generated.": "Visualizza la risposta pezzo per pezzo mentre viene generata.",
     "When this is off, responses will be displayed all at once when they are complete.": "Quando questo è disattivato, le risposte verranno visualizzate tutte in una volta quando sono complete.",
diff --git a/public/locales/ja-jp.json b/public/locales/ja-jp.json
index a38abf9a5..474f32a99 100644
--- a/public/locales/ja-jp.json
+++ b/public/locales/ja-jp.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "生成された応答を逐次表示します。",
     "context size(tokens)": "コンテキストのサイズ（トークン数）",
     "unlocked": "ロック解除",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "モデルが4096トークンを超えるコンテキストサイズをサポートしている場合にのみ有効にします",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "モデルが8192トークンを超えるコンテキストサイズをサポートしている場合にのみ有効にします",
     "Max prompt cost:": "最大プロンプトコスト:",
     "Display the response bit by bit as it is generated.": "生成されるたびに、応答を逐次表示します。",
     "When this is off, responses will be displayed all at once when they are complete.": "この機能がオフの場合、応答は完全に生成されたときに一度ですべて表示されます。",
diff --git a/public/locales/ko-kr.json b/public/locales/ko-kr.json
index 1b23dd8cd..1ba7985af 100644
--- a/public/locales/ko-kr.json
+++ b/public/locales/ko-kr.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "생성되는대로 응답을 조금씩 표시하십시오",
     "context size(tokens)": "컨텍스트 크기 (토큰)",
     "unlocked": "잠금 해제됨",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "모델이 4096 토큰보다 큰 컨텍스트 크기를 지원하는 경우에만 활성화하십시오",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "모델이 8192 토큰보다 큰 컨텍스트 크기를 지원하는 경우에만 활성화하십시오",
     "Max prompt cost:": "최대 프롬프트 비용:",
     "Display the response bit by bit as it is generated.": "생성되는 대답을 조금씩 표시합니다.",
     "When this is off, responses will be displayed all at once when they are complete.": "이 기능이 꺼져 있으면 대답은 완료되면 한 번에 모두 표시됩니다.",
diff --git a/public/locales/nl-nl.json b/public/locales/nl-nl.json
index 2320db081..069c89b9c 100644
--- a/public/locales/nl-nl.json
+++ b/public/locales/nl-nl.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Toon de reactie beetje bij beetje zoals deze wordt gegenereerd",
     "context size(tokens)": "Contextgrootte (tokens)",
     "unlocked": "Ontgrendeld",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Schakel dit alleen in als uw model contextgroottes ondersteunt groter dan 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Schakel dit alleen in als uw model contextgroottes ondersteunt groter dan 8192 tokens",
     "Max prompt cost:": "Maximale promptkosten:",
     "Display the response bit by bit as it is generated.": "Toon het antwoord stuk voor stuk terwijl het wordt gegenereerd.",
     "When this is off, responses will be displayed all at once when they are complete.": "Als dit uit staat, worden reacties in één keer weergegeven wanneer ze compleet zijn.",
diff --git a/public/locales/pt-pt.json b/public/locales/pt-pt.json
index a287d4879..771d60a94 100644
--- a/public/locales/pt-pt.json
+++ b/public/locales/pt-pt.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Exibir a resposta pouco a pouco conforme ela é gerada",
     "context size(tokens)": "Tamanho do contexto (tokens)",
     "unlocked": "Desbloqueado",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Ative isso apenas se seu modelo suportar tamanhos de contexto maiores que 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Ative isso apenas se seu modelo suportar tamanhos de contexto maiores que 8192 tokens",
     "Max prompt cost:": "Custo imediato máximo:",
     "Display the response bit by bit as it is generated.": "Exibir a resposta bit a bit conforme é gerada.",
     "When this is off, responses will be displayed all at once when they are complete.": "Quando isso estiver desligado, as respostas serão exibidas de uma vez quando estiverem completas.",
diff --git a/public/locales/ru-ru.json b/public/locales/ru-ru.json
index 311d41f32..d7ce65466 100644
--- a/public/locales/ru-ru.json
+++ b/public/locales/ru-ru.json
@@ -89,7 +89,7 @@
     "Text Completion presets": "Пресеты для Text Completion",
     "Documentation on sampling parameters": "Документация по параметрам сэмплеров",
     "Set all samplers to their neutral/disabled state.": "Установить все сэмплеры в нейтральное/отключенное состояние.",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Включайте эту опцию, только если ваша модель поддерживает размер контекста более 4096 токенов.\nУвеличивайте только если вы знаете, что делаете.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Включайте эту опцию, только если ваша модель поддерживает размер контекста более 8192 токенов.\nУвеличивайте только если вы знаете, что делаете.",
     "Wrap in Quotes": "Заключать в кавычки",
     "Wrap entire user message in quotes before sending.": "Перед отправкой заключать всё сообщение пользователя в кавычки.",
     "Leave off if you use quotes manually for speech.": "Оставьте выключенным, если вручную выставляете кавычки для прямой речи.",
diff --git a/public/locales/uk-ua.json b/public/locales/uk-ua.json
index 4c002009e..3a6d9c4e2 100644
--- a/public/locales/uk-ua.json
+++ b/public/locales/uk-ua.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "Поступово відображати відповідь по мірі її створення",
     "context size(tokens)": "Контекст (токени)",
     "unlocked": "Розблоковано",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Увімкніть це лише в разі підтримки моделлю розмірів контексту більше 4096 токенів",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Увімкніть це лише в разі підтримки моделлю розмірів контексту більше 8192 токенів",
     "Max prompt cost:": "Максимальна оперативна вартість:",
     "Display the response bit by bit as it is generated.": "Показувати відповідь по бітах по мірі її генерації.",
     "When this is off, responses will be displayed all at once when they are complete.": "Коли це вимкнено, відповіді будуть відображатися разом, коли вони будуть завершені.",
diff --git a/public/locales/zh-cn.json b/public/locales/zh-cn.json
index 844921e92..7fc78e51a 100644
--- a/public/locales/zh-cn.json
+++ b/public/locales/zh-cn.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "逐位显示生成的回复",
     "context size(tokens)": "上下文长度（以词符数计）",
     "unlocked": "解锁",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "仅在您的模型支持大于4096个词符的上下文大小时启用此选项",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "仅在您的模型支持大于8192个词符的上下文大小时启用此选项",
     "Max prompt cost:": "最大提示词费用：",
     "Display the response bit by bit as it is generated.": "随着回复的生成，逐位显示结果。",
     "When this is off, responses will be displayed all at once when they are complete.": "当此选项关闭时，回复将在完成后一次性显示。",
diff --git a/public/locales/zh-tw.json b/public/locales/zh-tw.json
index 20abdeba8..c3b8869cc 100644
--- a/public/locales/zh-tw.json
+++ b/public/locales/zh-tw.json
@@ -32,7 +32,7 @@
     "Streaming_desc": "生成時逐位顯示回應。當此功能關閉時，回應將在完成後一次顯示。",
     "context size(tokens)": "上下文大小(符記數)",
     "unlocked": "解鎖",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "僅在您的模型支援超過4096個符記的上下文大小時啟用此功能",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "僅在您的模型支援超過8192個符記的上下文大小時啟用此功能",
     "Max prompt cost:": "最多提示詞費用",
     "Display the response bit by bit as it is generated.": "生成時逐位顯示回應。",
     "When this is off, responses will be displayed all at once when they are complete.": "關閉時，回應將在完成後一次性顯示。",

From 28837ff883256638142600cbab6ca06f055d7bf7 Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Sat, 14 Sep 2024 16:32:50 +0300
Subject: [PATCH 6/8] Hard code include_stop_str_in_output

---
 public/scripts/textgen-settings.js | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index 90f3d1d11..80061a271 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -187,7 +187,6 @@ const settings = {
     openrouter_allow_fallbacks: true,
     xtc_threshold: 0.1,
     xtc_probability: 0,
-    include_stop_str_in_output: false,
 };
 
 export let textgenerationwebui_banned_in_macros = [];
@@ -260,7 +259,6 @@ export const setting_names = [
     'openrouter_allow_fallbacks',
     'xtc_threshold',
     'xtc_probability',
-    'include_stop_str_in_output',
 ];
 
 const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba');
@@ -1221,11 +1219,11 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'ignore_eos': settings.ignore_eos_token,
         'min_tokens': settings.min_length,
         'skip_special_tokens': settings.skip_special_tokens,
-        'include_stop_str_in_output': settings.include_stop_str_in_output,
         'spaces_between_special_tokens': settings.spaces_between_special_tokens,
         'guided_grammar': settings.grammar_string,
         'guided_json': settings.json_schema,
-        'early_stopping': false,  // hack
+        'early_stopping': false, // hacks
+        'include_stop_str_in_output': false,
     };
 
     if (settings.type === OPENROUTER) {

From f0d361bc7aa20e266d7353916734a8bec877995b Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Sat, 14 Sep 2024 16:41:22 +0300
Subject: [PATCH 7/8] Remove unused beam search

---
 public/scripts/textgen-settings.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index 80061a271..1fb5917e7 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -123,7 +123,6 @@ const settings = {
     rep_pen_slope: 1,
     no_repeat_ngram_size: 0,
     penalty_alpha: 0,
-    use_beam_search: false,
     num_beams: 1,
     length_penalty: 1,
     min_length: 0,

From 50de67898020810e6d79fc62c1102be91f3d77ac Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Sat, 14 Sep 2024 16:53:21 +0300
Subject: [PATCH 8/8] Hide beam search for vllm. It never worked.

---
 public/index.html                  | 2 +-
 public/scripts/textgen-settings.js | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/public/index.html b/public/index.html
index e08105239..dd4a111e8 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1396,7 +1396,7 @@
                                             </div>
                                         </div>
                                     </div>
-                                    <div data-tg-type="ooba, vllm" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
+                                    <div data-tg-type="ooba" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
                                         <h4 class="wide100p textAlignCenter">
                                             <label>
                                                 <span data-i18n="Beam search">Beam Search</span>
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index 1fb5917e7..c22ecee23 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -1120,8 +1120,8 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
         'minimum_message_content_tokens': settings.type === DREAMGEN ? settings.min_length : undefined,
         'min_tokens': settings.min_length,
         'num_beams': settings.type === OOBA ? settings.num_beams : undefined,
-        'length_penalty': settings.length_penalty,
-        'early_stopping': settings.early_stopping,
+        'length_penalty': settings.type === OOBA ? settings.length_penalty : undefined,
+        'early_stopping': settings.type === OOBA ? settings.early_stopping : undefined,
         'add_bos_token': settings.add_bos_token,
         'dynamic_temperature': dynatemp ? true : undefined,
         'dynatemp_low': dynatemp ? settings.min_temp : undefined,