Merge pull request #2841 from AlpinDale/aphrosamplers

chore: slight refactor of aphrodite samplers
2025-06-05 21:59:27 +02:00 · 2024-09-14 16:55:40 +03:00
parent b376ea884b 50de678980
commit 5b8c149947
16 changed files with 76 additions and 56 deletions
--- a/public/index.html
+++ b/public/index.html
@@ -215,7 +215,7 @@
                                        <label class="checkbox_label">
                                            <input id="max_context_unlocked" type="checkbox" />
                                            <small><span data-i18n="unlocked">Unlocked</span>
-                                                <div id="max_context_unlocked_warning" class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Only enable this if your model supports context sizes greater than 4096 tokens" title="Only enable this if your model supports context sizes greater than 4096 tokens.&#13;Increase only if you know what you're doing."></div>
+                                                <div id="max_context_unlocked_warning" class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Only enable this if your model supports context sizes greater than 8192 tokens" title="Only enable this if your model supports context sizes greater than 8192 tokens.&#13;Increase only if you know what you're doing."></div>
                                            </small>
                                        </label>
                                    </div>
@@ -1189,7 +1189,7 @@
                                        <input class="neo-range-slider" type="range" id="tfs_textgenerationwebui" name="volume" min="0" max="1" step="0.01">
                                        <input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="tfs_textgenerationwebui" id="tfs_counter_textgenerationwebui">
                                    </div>
-                                    <div data-tg-type="ooba,mancer" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba,mancer,aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                        <small>
                                            <span data-i18n="Epsilon Cutoff">Epsilon Cutoff</span>
                                            <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Epsilon cutoff sets a probability floor below which tokens are excluded from being sampled" title="Epsilon cutoff sets a probability floor below which tokens are excluded from being sampled.&#13;In units of 1e-4; a reasonable value is 3.&#13;Set to 0 to disable."></div>
@@ -1197,7 +1197,7 @@
                                        <input class="neo-range-slider" type="range" id="epsilon_cutoff_textgenerationwebui" name="volume" min="0" max="9" step="0.01">
                                        <input class="neo-range-input" type="number" min="0" max="9" step="0.01" data-for="epsilon_cutoff_textgenerationwebui" id="epsilon_cutoff_counter_textgenerationwebui">
                                    </div>
-                                    <div data-tg-type="ooba,mancer" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="ooba,mancer,aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                        <small>
                                            <span data-i18n="Eta Cutoff">Eta Cutoff</span>
                                            <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Eta_Cutoff_desc" title="Eta cutoff is the main parameter of the special Eta Sampling technique.&#13;In units of 1e-4; a reasonable value is 3.&#13;Set to 0 to disable.&#13;See the paper Truncation Sampling as Language Model Desmoothing by Hewitt et al. (2022) for details."></div>
@@ -1250,7 +1250,7 @@
                                        <input class="neo-range-slider" type="range" id="skew_textgenerationwebui" name="volume" min="-5" max="5" step="0.01" />
                                        <input class="neo-range-input" type="number" min="-5" max="5" step="0.01" data-for="skew_textgenerationwebui" id="skew_counter_textgenerationwebui">
                                    </div>
-                                    <div data-tg-type="mancer, ooba, tabby, dreamgen, infermaticai" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
+                                    <div data-tg-type="mancer, ooba, tabby, dreamgen, infermaticai, aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
                                        <small data-i18n="Min Length">Min Length</small>
                                        <input class="neo-range-slider" type="range" id="min_length_textgenerationwebui" name="volume" min="0" max="2000" step="1" />
                                        <input class="neo-range-input" type="number" min="0" max="2000" step="1" data-for="min_length_textgenerationwebui" id="min_length_counter_textgenerationwebui">
@@ -1339,7 +1339,7 @@
                                            </div>
                                        </div>
                                    </div>
-                                    <div data-tg-type="ooba, mancer, koboldcpp, tabby, llamacpp, aphrodite" id="dynatemp_block_ooba" class="wide100p">
+                                    <div data-tg-type="ooba, mancer, koboldcpp, tabby, llamacpp" id="dynatemp_block_ooba" class="wide100p">
                                        <h4 class="wide100p textAlignCenter">
                                            <div class="flex-container alignitemscenter justifyCenter">
                                                <div class="checkbox_label" for="dynatemp_textgenerationwebui">
@@ -1367,7 +1367,7 @@
                                            </div>
                                        </div>
                                    </div>
-                                    <div data-tg-type="ooba,aphrodite,infermaticai,koboldcpp,llamacpp,mancer,ollama,tabby" id="mirostat_block_ooba" class="wide100p">
+                                    <div data-tg-type="ooba,infermaticai,koboldcpp,llamacpp,mancer,ollama,tabby" id="mirostat_block_ooba" class="wide100p">
                                        <h4 class="wide100p textAlignCenter">
                                            <label data-i18n="Mirostat (mode=1 is only for llama.cpp)">Mirostat</label>
                                            <div class=" fa-solid fa-circle-info opacity50p " data-i18n="[title]Mirostat_desc" title="Mirostat is a thermostat for output perplexity.&#13;Mirostat matches the output perplexity to that of the input, thus avoiding the repetition trap&#13;(where, as the autoregressive inference produces text, the perplexity of the output tends toward zero)&#13;and the confusion trap (where the perplexity diverges).&#13;For details, see the paper Mirostat: A Neural Text Decoding Algorithm that Directly Controls Perplexity by Basu et al. (2020).&#13;Mode chooses the Mirostat version. 0=disable, 1=Mirostat 1.0 (llama.cpp only), 2=Mirostat 2.0."></div>
@@ -1396,28 +1396,36 @@
                                            </div>
                                        </div>
                                    </div>
-                                    <div data-tg-type="ooba, vllm" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
+                                    <div data-tg-type="ooba" id="beamSearchBlock" name="beamSearchBlock" class="wide100p">
                                        <h4 class="wide100p textAlignCenter">
                                            <label>
                                                <span data-i18n="Beam search">Beam Search</span>
-                                                <div class=" fa-solid fa-circle-info opacity50p " title="Helpful tip coming soon." data-i18n="[title]Helpful tip coming soon."></div>
+                                                <div class=" fa-solid fa-circle-info opacity50p " title="A greedy, brute-force algorithm used in LLM sampling to find the most likely sequence of words or tokens. It expands multiple candidate sequences at once, maintaining a fixed number (beam width) of top sequences at each step." data-i18n="[title]A greedy, brute-force algorithm used in LLM sampling to find the most likely sequence of words or tokens. It expands multiple candidate sequences at once, maintaining a fixed number (beam width) of top sequences at each step."></div>
                                            </label>
                                        </h4>
                                        <div class="flex-container flexFlowRow alignitemscenter gap10px flexShrink">
                                            <div class="alignitemscenter flex-container marginBot5 flexFlowColumn flexGrow flexShrink gap0">
-                                                <small data-i18n="Number of Beams"># of Beams</small>
+                                                <label>
+                                                    <small data-i18n="# of Beams"># of Beams</small>
+                                                    <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]The number of sequences generated at each step with Beam Search." title="The number of sequences generated at each step with Beam Search."></div>
+                                                </label>
                                                <input class="neo-range-slider" type="range" id="num_beams_textgenerationwebui" name="volume" min="1" max="20" step="1" />
                                                <input class="neo-range-input" type="number" min="1" max="20" step="1" data-for="num_beams_textgenerationwebui" id="num_beams_counter_textgenerationwebui">
                                            </div>
                                            <div class="alignitemscenter flex-container marginBot5 flexFlowColumn flexGrow flexShrink gap0">
-                                                <small data-i18n="Length Penalty">Length Penalty</small>
+                                                <label>
+                                                    <small data-i18n="Length Penalty">Length Penalty</small>
+                                                    <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Penalize sequences based on their length." title="Penalize sequences based on their length."></div>
+                                                </label>
                                                <input class="neo-range-slider" type="range" id="length_penalty_textgenerationwebui" name="volume" min="-5" max="5" step="0.1" />
                                                <input class="neo-range-input" type="number" min="-5" max="5" step="0.1" data-for="length_penalty_textgenerationwebui" id="length_penalty_counter_textgenerationwebui">
                                            </div>
                                            <div class="">
                                                <label class="checkbox_label" for="early_stopping_textgenerationwebui">
                                                    <input type="checkbox" id="early_stopping_textgenerationwebui" />
-                                                    <small data-i18n="Early Stopping">Early Stopping</small>
+                                                    <small data-i18n="Early Stopping">Early Stopping
+                                                        <div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Controls the stopping condition for beam search. If checked, the generation stops as soon as there are '# of Beams' sequences. If not checked, a heuristic is applied and the generation is stopped when it's very unlikely to find better candidates." title="Controls the stopping condition for beam search. If checked, the generation stops as soon as there are '# of Beams' sequences. If not checked, a heuristic is applied and the generation is stopped when it's very unlikely to find better candidates."></div>
+                                                    </small>
                                                </label>
                                            </div>
                                        </div>
@@ -1487,7 +1495,10 @@
                                        </div>
                                    </div>
                                    <div data-tg-type="mancer, ooba, koboldcpp, vllm, aphrodite, llamacpp, ollama, infermaticai, huggingface" class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
-                                        <small data-i18n="Seed" class="textAlignCenter">Seed</small>
+                                        <label>
+                                            <small data-i18n="Seed">Seed</small>
+                                            <div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Seed_desc" title="A random seed to use for deterministic and reproducable outputs. Set to -1 to use a random seed."></div>
+                                        </label>
                                        <input type="number" id="seed_textgenerationwebui" class="text_pole textAlignCenter" min="-1" value="-1" />
                                    </div>
                                    <div id="banned_tokens_block_ooba" class="wide100p">
@@ -1537,18 +1548,7 @@
                                            </div>
                                        </div>
                                    </div>
-                                    <div id="json_schema_block" data-tg-type="tabby, llamacpp" class="wide100p">
-                                        <hr class="wide100p">
-                                        <h4 class="wide100p textAlignCenter"><span data-i18n="JSON Schema">JSON Schema</span>
-                                            <a href="https://json-schema.org/learn/getting-started-step-by-step" target="_blank">
-                                                <small>
-                                                    <div class="fa-solid fa-circle-question note-link-span"></div>
-                                                </small>
-                                            </a>
-                                        </h4>
-                                        <textarea id="tabby_json_schema" rows="4" class="text_pole textarea_compact monospace" data-i18n="[placeholder]Type in the desired JSON schema" placeholder="Type in the desired JSON schema"></textarea>
-                                    </div>
-                                    <div id="grammar_block_ooba" class="wide100p">
+                                    <div id="grammar_block_ooba" data-tg-type="ooba,aphrodite" class="wide100p">
                                        <hr class="wide100p">
                                        <h4 class="wide100p textAlignCenter">
                                            <label>
@@ -1563,6 +1563,17 @@
                                        </h4>
                                        <textarea id="grammar_string_textgenerationwebui" rows="4" class="text_pole textarea_compact monospace" data-i18n="[placeholder]Type in the desired custom grammar" placeholder="Type in the desired custom grammar"></textarea>
                                    </div>
+                                    <div id="json_schema_block" data-tg-type="tabby, llamacpp, aphrodite" class="wide100p">
+                                        <hr class="wide100p">
+                                        <h4 class="wide100p textAlignCenter"><span data-i18n="JSON Schema">JSON Schema</span>
+                                            <a href="https://json-schema.org/learn/getting-started-step-by-step" target="_blank">
+                                                <small>
+                                                    <div class="fa-solid fa-circle-question note-link-span"></div>
+                                                </small>
+                                            </a>
+                                        </h4>
+                                        <textarea id="tabby_json_schema" rows="4" class="text_pole textarea_compact monospace" data-i18n="[placeholder]Type in the desired JSON schema" placeholder="Type in the desired JSON schema"></textarea>
+                                    </div>
                                    <div id="sampler_order_block_kcpp" data-tg-type="koboldcpp" class="range-block flexFlowColumn wide100p">
                                        <hr class="wide100p">
                                        <div class="range-block-title">
@@ -2425,7 +2436,7 @@
                                <div data-tg-type="openrouter" class="menu_button menu_button_icon openrouter_authorize" title="Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai" data-i18n="Authorize;[title]Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai">Authorize</div>
                                <div class="api_loading menu_button menu_button_icon" data-i18n="Cancel">Cancel</div>
                            </div>
-                            <label data-tg-type="ooba,aphrodite" class="checkbox_label margin-bot-10px" for="legacy_api_textgenerationwebui">
+                            <label data-tg-type="ooba" class="checkbox_label margin-bot-10px" for="legacy_api_textgenerationwebui">
                                <input type="checkbox" id="legacy_api_textgenerationwebui" />
                                <span data-i18n="Legacy API (pre-OAI, no streaming)">Legacy API (pre-OAI, no streaming)</span>
                            </label>
--- a/public/locales/ar-sa.json
+++ b/public/locales/ar-sa.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "عرض الاستجابة لحظيا كما يتم إنشاؤها.",
    "context size(tokens)": "حجم الاحرف (بعدد الاحرف او الرموز)",
    "unlocked": "مفتوح",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "قم بتمكين هذا فقط إذا كانت نموذجك يدعم مقاطع السياق بأحجام أكبر من 4096 رمزًا.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "قم بتمكين هذا فقط إذا كانت نموذجك يدعم مقاطع السياق بأحجام أكبر من 8192 رمزًا.",
    "Max prompt cost:": "أقصى تكلفة فورية:",
    "Display the response bit by bit as it is generated.": "عرض الاستجابة بتدريج كما يتم إنشاؤها.",
    "When this is off, responses will be displayed all at once when they are complete.": "عند إيقاف هذا الخيار، سيتم عرض الردود جميعها دفعة واحدة عند اكتمالها.",
--- a/public/locales/de-de.json
+++ b/public/locales/de-de.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Zeige die Antwort Stück für Stück an, während sie generiert wird.",
    "context size(tokens)": "Größe des Zusammenhangs (Tokens)",
    "unlocked": "Freigeschaltet",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Aktiviere dies nur, wenn dein Modell Kontextgrößen von mehr als 4096 Tokens unterstützt.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Aktiviere dies nur, wenn dein Modell Kontextgrößen von mehr als 8192 Tokens unterstützt.",
    "Max prompt cost:": "Maximale Sofortkosten:",
    "Display the response bit by bit as it is generated.": "Zeige die Antwort Stück für Stück, während sie generiert wird.",
    "When this is off, responses will be displayed all at once when they are complete.": "Wenn dies ausgeschaltet ist, werden Antworten angezeigt, sobald sie vollständig sind.",
--- a/public/locales/es-es.json
+++ b/public/locales/es-es.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Mostrar la respuesta poco a poco según se genera",
    "context size(tokens)": "Tamaño de contexto (tokens)",
    "unlocked": "Desbloqueado",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Habilita esto solo si tu modelo admite tamaños de contexto mayores de 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Habilita esto solo si tu modelo admite tamaños de contexto mayores de 8192 tokens",
    "Max prompt cost:": "Costo inmediato máximo:",
    "Display the response bit by bit as it is generated.": "Mostrar la respuesta poco a poco a medida que se genera.",
    "When this is off, responses will be displayed all at once when they are complete.": "Cuando esto está apagado, las respuestas se mostrarán de una vez cuando estén completas.",
--- a/public/locales/fr-fr.json
+++ b/public/locales/fr-fr.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Afficher la réponse bit par bit au fur et à mesure de sa génération",
    "context size(tokens)": "Taille du contexte (en tokens)",
    "unlocked": "Déverrouillé",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Activez cela uniquement si votre modèle prend en charge des tailles de contexte supérieures à 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Activez cela uniquement si votre modèle prend en charge des tailles de contexte supérieures à 8192 tokens",
    "Max prompt cost:": "Coût rapide maximum :",
    "Display the response bit by bit as it is generated.": "Afficher la réponse morceau par morceau au fur et à mesure de sa génération.",
    "When this is off, responses will be displayed all at once when they are complete.": "Lorsque cette fonction est désactivée, les réponses s'affichent toutes en une fois lorsqu'elles sont complètes.",
--- a/public/locales/is-is.json
+++ b/public/locales/is-is.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Birta svarið bita fyrir bita þegar það er myndað.",
    "context size(tokens)": "Stærð samhengis (í táknum eða stöfum)",
    "unlocked": "Opinn",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Virkjið þetta aðeins ef stærð samhengis styður model meira en 4096 tákn.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Virkjið þetta aðeins ef stærð samhengis styður model meira en 8192 tákn.",
    "Max prompt cost:": "Hámarks skyndikostnaður:",
    "Display the response bit by bit as it is generated.": "Birta svarid bita fyrir bita þegar það er búið til.",
    "When this is off, responses will be displayed all at once when they are complete.": "Þegar þetta er slökkt verða svör birt allt í einu þegar þau eru búin.",
--- a/public/locales/it-it.json
+++ b/public/locales/it-it.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Mostra la risposta pezzo per pezzo man mano che viene generata",
    "context size(tokens)": "Dimensione del contesto (token)",
    "unlocked": "Sbloccato",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Abilita solo se il tuo modello supporta dimensioni del contesto superiori a 4096 token",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Abilita solo se il tuo modello supporta dimensioni del contesto superiori a 8192 token",
    "Max prompt cost:": "Costo massimo immediato:",
    "Display the response bit by bit as it is generated.": "Visualizza la risposta pezzo per pezzo mentre viene generata.",
    "When this is off, responses will be displayed all at once when they are complete.": "Quando questo è disattivato, le risposte verranno visualizzate tutte in una volta quando sono complete.",
--- a/public/locales/ja-jp.json
+++ b/public/locales/ja-jp.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "生成された応答を逐次表示します。",
    "context size(tokens)": "コンテキストのサイズ（トークン数）",
    "unlocked": "ロック解除",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "モデルが4096トークンを超えるコンテキストサイズをサポートしている場合にのみ有効にします",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "モデルが8192トークンを超えるコンテキストサイズをサポートしている場合にのみ有効にします",
    "Max prompt cost:": "最大プロンプトコスト:",
    "Display the response bit by bit as it is generated.": "生成されるたびに、応答を逐次表示します。",
    "When this is off, responses will be displayed all at once when they are complete.": "この機能がオフの場合、応答は完全に生成されたときに一度ですべて表示されます。",
--- a/public/locales/ko-kr.json
+++ b/public/locales/ko-kr.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "생성되는대로 응답을 조금씩 표시하십시오",
    "context size(tokens)": "컨텍스트 크기 (토큰)",
    "unlocked": "잠금 해제됨",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "모델이 4096 토큰보다 큰 컨텍스트 크기를 지원하는 경우에만 활성화하십시오",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "모델이 8192 토큰보다 큰 컨텍스트 크기를 지원하는 경우에만 활성화하십시오",
    "Max prompt cost:": "최대 프롬프트 비용:",
    "Display the response bit by bit as it is generated.": "생성되는 대답을 조금씩 표시합니다.",
    "When this is off, responses will be displayed all at once when they are complete.": "이 기능이 꺼져 있으면 대답은 완료되면 한 번에 모두 표시됩니다.",
--- a/public/locales/nl-nl.json
+++ b/public/locales/nl-nl.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Toon de reactie beetje bij beetje zoals deze wordt gegenereerd",
    "context size(tokens)": "Contextgrootte (tokens)",
    "unlocked": "Ontgrendeld",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Schakel dit alleen in als uw model contextgroottes ondersteunt groter dan 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Schakel dit alleen in als uw model contextgroottes ondersteunt groter dan 8192 tokens",
    "Max prompt cost:": "Maximale promptkosten:",
    "Display the response bit by bit as it is generated.": "Toon het antwoord stuk voor stuk terwijl het wordt gegenereerd.",
    "When this is off, responses will be displayed all at once when they are complete.": "Als dit uit staat, worden reacties in één keer weergegeven wanneer ze compleet zijn.",
--- a/public/locales/pt-pt.json
+++ b/public/locales/pt-pt.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Exibir a resposta pouco a pouco conforme ela é gerada",
    "context size(tokens)": "Tamanho do contexto (tokens)",
    "unlocked": "Desbloqueado",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Ative isso apenas se seu modelo suportar tamanhos de contexto maiores que 4096 tokens",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Ative isso apenas se seu modelo suportar tamanhos de contexto maiores que 8192 tokens",
    "Max prompt cost:": "Custo imediato máximo:",
    "Display the response bit by bit as it is generated.": "Exibir a resposta bit a bit conforme é gerada.",
    "When this is off, responses will be displayed all at once when they are complete.": "Quando isso estiver desligado, as respostas serão exibidas de uma vez quando estiverem completas.",
--- a/public/locales/ru-ru.json
+++ b/public/locales/ru-ru.json
@@ -89,7 +89,7 @@
    "Text Completion presets": "Пресеты для Text Completion",
    "Documentation on sampling parameters": "Документация по параметрам сэмплеров",
    "Set all samplers to their neutral/disabled state.": "Установить все сэмплеры в нейтральное/отключенное состояние.",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Включайте эту опцию, только если ваша модель поддерживает размер контекста более 4096 токенов.\nУвеличивайте только если вы знаете, что делаете.",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Включайте эту опцию, только если ваша модель поддерживает размер контекста более 8192 токенов.\nУвеличивайте только если вы знаете, что делаете.",
    "Wrap in Quotes": "Заключать в кавычки",
    "Wrap entire user message in quotes before sending.": "Перед отправкой заключать всё сообщение пользователя в кавычки.",
    "Leave off if you use quotes manually for speech.": "Оставьте выключенным, если вручную выставляете кавычки для прямой речи.",
--- a/public/locales/uk-ua.json
+++ b/public/locales/uk-ua.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "Поступово відображати відповідь по мірі її створення",
    "context size(tokens)": "Контекст (токени)",
    "unlocked": "Розблоковано",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "Увімкніть це лише в разі підтримки моделлю розмірів контексту більше 4096 токенів",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "Увімкніть це лише в разі підтримки моделлю розмірів контексту більше 8192 токенів",
    "Max prompt cost:": "Максимальна оперативна вартість:",
    "Display the response bit by bit as it is generated.": "Показувати відповідь по бітах по мірі її генерації.",
    "When this is off, responses will be displayed all at once when they are complete.": "Коли це вимкнено, відповіді будуть відображатися разом, коли вони будуть завершені.",
--- a/public/locales/zh-cn.json
+++ b/public/locales/zh-cn.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "逐位显示生成的回复",
    "context size(tokens)": "上下文长度（以词符数计）",
    "unlocked": "解锁",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "仅在您的模型支持大于4096个词符的上下文大小时启用此选项",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "仅在您的模型支持大于8192个词符的上下文大小时启用此选项",
    "Max prompt cost:": "最大提示词费用：",
    "Display the response bit by bit as it is generated.": "随着回复的生成，逐位显示结果。",
    "When this is off, responses will be displayed all at once when they are complete.": "当此选项关闭时，回复将在完成后一次性显示。",
--- a/public/locales/zh-tw.json
+++ b/public/locales/zh-tw.json
@@ -32,7 +32,7 @@
    "Streaming_desc": "生成時逐位顯示回應。當此功能關閉時，回應將在完成後一次顯示。",
    "context size(tokens)": "上下文大小(符記數)",
    "unlocked": "解鎖",
-    "Only enable this if your model supports context sizes greater than 4096 tokens": "僅在您的模型支援超過4096個符記的上下文大小時啟用此功能",
+    "Only enable this if your model supports context sizes greater than 8192 tokens": "僅在您的模型支援超過8192個符記的上下文大小時啟用此功能",
    "Max prompt cost:": "最多提示詞費用",
    "Display the response bit by bit as it is generated.": "生成時逐位顯示回應。",
    "When this is off, responses will be displayed all at once when they are complete.": "關閉時，回應將在完成後一次性顯示。",
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -162,14 +162,9 @@ const settings = {
    banned_tokens: '',
    sampler_priority: OOBA_DEFAULT_ORDER,
    samplers: LLAMACPP_DEFAULT_ORDER,
-    //n_aphrodite: 1,
-    //best_of_aphrodite: 1,
    ignore_eos_token: false,
    spaces_between_special_tokens: true,
    speculative_ngram: false,
-    //logits_processors_aphrodite: [],
-    //log_probs_aphrodite: 0,
-    //prompt_log_probs_aphrodite: 0,
    type: textgen_types.OOBA,
    mancer_model: 'mytholite',
    togetherai_model: 'Gryphe/MythoMax-L2-13b',
@@ -250,14 +245,9 @@ export const setting_names = [
    'json_schema',
    'banned_tokens',
    'legacy_api',
-    //'n_aphrodite',
-    //'best_of_aphrodite',
    'ignore_eos_token',
    'spaces_between_special_tokens',
    'speculative_ngram',
-    //'logits_processors_aphrodite',
-    //'log_probs_aphrodite',
-    //'prompt_log_probs_aphrodite'
    'sampler_order',
    'sampler_priority',
    'samplers',
@@ -1130,8 +1120,8 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
        'minimum_message_content_tokens': settings.type === DREAMGEN ? settings.min_length : undefined,
        'min_tokens': settings.min_length,
        'num_beams': settings.type === OOBA ? settings.num_beams : undefined,
-        'length_penalty': settings.length_penalty,
-        'early_stopping': settings.early_stopping,
+        'length_penalty': settings.type === OOBA ? settings.length_penalty : undefined,
+        'early_stopping': settings.type === OOBA ? settings.early_stopping : undefined,
        'add_bos_token': settings.add_bos_token,
        'dynamic_temperature': dynatemp ? true : undefined,
        'dynatemp_low': dynatemp ? settings.min_temp : undefined,
@@ -1208,13 +1198,31 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
    };
    const aphroditeParams = {
        'n': canMultiSwipe ? settings.n : 1,
-        'best_of': canMultiSwipe ? settings.n : 1,
+        'frequency_penalty': settings.freq_pen,
+        'presence_penalty': settings.presence_pen,
+        'repetition_penalty': settings.rep_pen,
+        'seed': settings.seed,
+        'stop': getStoppingStrings(isImpersonate, isContinue),
+        'temperature': settings.temp,
+        'temperature_last': settings.temperature_last,
+        'top_p': settings.top_p,
+        'top_k': settings.top_k,
+        'top_a': settings.top_a,
+        'min_p': settings.min_p,
+        'tfs': settings.tfs,
+        'eta_cutoff': settings.eta_cutoff,
+        'epsilon_cutoff': settings.epsilon_cutoff,
+        'typical_p': settings.typical_p,
+        'smoothing_factor': settings.smoothing_factor,
+        'smoothing_curve': settings.smoothing_curve,
        'ignore_eos': settings.ignore_eos_token,
+        'min_tokens': settings.min_length,
+        'skip_special_tokens': settings.skip_special_tokens,
        'spaces_between_special_tokens': settings.spaces_between_special_tokens,
-        'grammar': settings.grammar_string,
-        //'logits_processors': settings.logits_processors_aphrodite,
-        //'logprobs': settings.log_probs_aphrodite,
-        //'prompt_logprobs': settings.prompt_log_probs_aphrodite,
+        'guided_grammar': settings.grammar_string,
+        'guided_json': settings.json_schema,
+        'early_stopping': false, // hacks
+        'include_stop_str_in_output': false,
    };

    if (settings.type === OPENROUTER) {
@@ -1254,6 +1262,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
            break;

        case APHRODITE:
+            // set params to aphroditeParams
            params = Object.assign(params, aphroditeParams);
            break;