From efd477da04a3c916124eeaacd78a74a6febdf0f1 Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Fri, 13 Sep 2024 10:34:06 +0000 Subject: [PATCH 1/8] chore: slight refactor of aphrodite samplers --- public/index.html | 38 +++++++++++------------ public/scripts/textgen-settings.js | 49 ++++++++++++++++++++---------- 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/public/index.html b/public/index.html index 6f8c87112..c4b299fbe 100644 --- a/public/index.html +++ b/public/index.html @@ -1189,7 +1189,7 @@ -
+
Epsilon Cutoff
@@ -1197,7 +1197,7 @@
-
+
Eta Cutoff
@@ -1250,7 +1250,7 @@
-
+
Min Length @@ -1339,7 +1339,7 @@
-
+

@@ -1367,7 +1367,7 @@

-
+

@@ -1396,7 +1396,7 @@

-
+

-
-
-

JSON Schema - - - - - -

- -
-
+

+
+
+

JSON Schema + + + + + +

+ +

@@ -2425,7 +2425,7 @@
-
@@ -1396,28 +1396,36 @@
-
+

- # of Beams +
- Length Penalty +
@@ -1487,7 +1495,10 @@
- Seed +
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 0c2d65519..358f9d30c 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -188,7 +188,6 @@ const settings = { xtc_threshold: 0.1, xtc_probability: 0, include_stop_str_in_output: false, - guided_regex: '', }; export let textgenerationwebui_banned_in_macros = []; @@ -262,7 +261,6 @@ export const setting_names = [ 'xtc_threshold', 'xtc_probability', 'include_stop_str_in_output', - 'guided_regex', ]; const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba'); @@ -1207,7 +1205,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, 'presence_penalty': settings.presence_pen, 'repetition_penalty': settings.rep_pen, 'seed': settings.seed, - 'stop': settings.stopping_strings, + 'stop': getStoppingStrings(isImpersonate, isContinue), 'temperature': settings.temp, 'temperature_last': settings.temperature_last, 'top_p': settings.top_p, @@ -1221,6 +1219,7 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, 'smoothing_factor': settings.smoothing_factor, 'smoothing_curve': settings.smoothing_curve, 'use_beam_search': settings.use_beam_search, + 'best_of': settings.num_beams > 1 ? settings.num_beams : settings.n, 'length_penalty': settings.length_penalty, 'early_stopping': settings.early_stopping, 'ignore_eos': settings.ignore_eos_token, @@ -1230,8 +1229,6 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, 'spaces_between_special_tokens': settings.spaces_between_special_tokens, 'guided_grammar': settings.grammar_string, 'guided_json': settings.json_schema, - - }; if (settings.type === OPENROUTER) { @@ -1271,7 +1268,8 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, break; case APHRODITE: - params = Object.assign(aphroditeParams); + // set params to aphroditeParams + params = Object.assign(params, aphroditeParams); break; default: From fde76069e03ba990041642e6cf26096c13e09cfa Mon Sep 17 00:00:00 2001 From: AlpinDale Date: Sat, 14 Sep 2024 12:42:21 +0000 Subject: [PATCH 3/8] remove beam search --- public/index.html | 2 +- public/scripts/textgen-settings.js | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/public/index.html b/public/index.html index 366b0b053..d5423297e 100644 --- a/public/index.html +++ b/public/index.html @@ -1422,7 +1422,7 @@
diff --git a/public/locales/ar-sa.json b/public/locales/ar-sa.json index 224bb480f..f599ccd2b 100644 --- a/public/locales/ar-sa.json +++ b/public/locales/ar-sa.json @@ -32,7 +32,7 @@ "Streaming_desc": "عرض الاستجابة لحظيا كما يتم إنشاؤها.", "context size(tokens)": "حجم الاحرف (بعدد الاحرف او الرموز)", "unlocked": "مفتوح", - "Only enable this if your model supports context sizes greater than 4096 tokens": "قم بتمكين هذا فقط إذا كانت نموذجك يدعم مقاطع السياق بأحجام أكبر من 4096 رمزًا.", + "Only enable this if your model supports context sizes greater than 8192 tokens": "قم بتمكين هذا فقط إذا كانت نموذجك يدعم مقاطع السياق بأحجام أكبر من 8192 رمزًا.", "Max prompt cost:": "أقصى تكلفة فورية:", "Display the response bit by bit as it is generated.": "عرض الاستجابة بتدريج كما يتم إنشاؤها.", "When this is off, responses will be displayed all at once when they are complete.": "عند إيقاف هذا الخيار، سيتم عرض الردود جميعها دفعة واحدة عند اكتمالها.", diff --git a/public/locales/de-de.json b/public/locales/de-de.json index 7932eed10..4d37d5806 100644 --- a/public/locales/de-de.json +++ b/public/locales/de-de.json @@ -32,7 +32,7 @@ "Streaming_desc": "Zeige die Antwort Stück für Stück an, während sie generiert wird.", "context size(tokens)": "Größe des Zusammenhangs (Tokens)", "unlocked": "Freigeschaltet", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Aktiviere dies nur, wenn dein Modell Kontextgrößen von mehr als 4096 Tokens unterstützt.", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Aktiviere dies nur, wenn dein Modell Kontextgrößen von mehr als 8192 Tokens unterstützt.", "Max prompt cost:": "Maximale Sofortkosten:", "Display the response bit by bit as it is generated.": "Zeige die Antwort Stück für Stück, während sie generiert wird.", "When this is off, responses will be displayed all at once when they are complete.": "Wenn dies ausgeschaltet ist, werden Antworten angezeigt, sobald sie vollständig sind.", diff --git a/public/locales/es-es.json b/public/locales/es-es.json index e03f0ca9b..8db9a8cc4 100644 --- a/public/locales/es-es.json +++ b/public/locales/es-es.json @@ -32,7 +32,7 @@ "Streaming_desc": "Mostrar la respuesta poco a poco según se genera", "context size(tokens)": "Tamaño de contexto (tokens)", "unlocked": "Desbloqueado", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Habilita esto solo si tu modelo admite tamaños de contexto mayores de 4096 tokens", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Habilita esto solo si tu modelo admite tamaños de contexto mayores de 8192 tokens", "Max prompt cost:": "Costo inmediato máximo:", "Display the response bit by bit as it is generated.": "Mostrar la respuesta poco a poco a medida que se genera.", "When this is off, responses will be displayed all at once when they are complete.": "Cuando esto está apagado, las respuestas se mostrarán de una vez cuando estén completas.", diff --git a/public/locales/fr-fr.json b/public/locales/fr-fr.json index 1d4403460..2ae937879 100644 --- a/public/locales/fr-fr.json +++ b/public/locales/fr-fr.json @@ -32,7 +32,7 @@ "Streaming_desc": "Afficher la réponse bit par bit au fur et à mesure de sa génération", "context size(tokens)": "Taille du contexte (en tokens)", "unlocked": "Déverrouillé", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Activez cela uniquement si votre modèle prend en charge des tailles de contexte supérieures à 4096 tokens", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Activez cela uniquement si votre modèle prend en charge des tailles de contexte supérieures à 8192 tokens", "Max prompt cost:": "Coût rapide maximum :", "Display the response bit by bit as it is generated.": "Afficher la réponse morceau par morceau au fur et à mesure de sa génération.", "When this is off, responses will be displayed all at once when they are complete.": "Lorsque cette fonction est désactivée, les réponses s'affichent toutes en une fois lorsqu'elles sont complètes.", diff --git a/public/locales/is-is.json b/public/locales/is-is.json index 5f36b6277..f03d6c533 100644 --- a/public/locales/is-is.json +++ b/public/locales/is-is.json @@ -32,7 +32,7 @@ "Streaming_desc": "Birta svarið bita fyrir bita þegar það er myndað.", "context size(tokens)": "Stærð samhengis (í táknum eða stöfum)", "unlocked": "Opinn", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Virkjið þetta aðeins ef stærð samhengis styður model meira en 4096 tákn.", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Virkjið þetta aðeins ef stærð samhengis styður model meira en 8192 tákn.", "Max prompt cost:": "Hámarks skyndikostnaður:", "Display the response bit by bit as it is generated.": "Birta svarid bita fyrir bita þegar það er búið til.", "When this is off, responses will be displayed all at once when they are complete.": "Þegar þetta er slökkt verða svör birt allt í einu þegar þau eru búin.", diff --git a/public/locales/it-it.json b/public/locales/it-it.json index 53d969dcc..10fc75d2b 100644 --- a/public/locales/it-it.json +++ b/public/locales/it-it.json @@ -32,7 +32,7 @@ "Streaming_desc": "Mostra la risposta pezzo per pezzo man mano che viene generata", "context size(tokens)": "Dimensione del contesto (token)", "unlocked": "Sbloccato", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Abilita solo se il tuo modello supporta dimensioni del contesto superiori a 4096 token", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Abilita solo se il tuo modello supporta dimensioni del contesto superiori a 8192 token", "Max prompt cost:": "Costo massimo immediato:", "Display the response bit by bit as it is generated.": "Visualizza la risposta pezzo per pezzo mentre viene generata.", "When this is off, responses will be displayed all at once when they are complete.": "Quando questo è disattivato, le risposte verranno visualizzate tutte in una volta quando sono complete.", diff --git a/public/locales/ja-jp.json b/public/locales/ja-jp.json index a38abf9a5..474f32a99 100644 --- a/public/locales/ja-jp.json +++ b/public/locales/ja-jp.json @@ -32,7 +32,7 @@ "Streaming_desc": "生成された応答を逐次表示します。", "context size(tokens)": "コンテキストのサイズ(トークン数)", "unlocked": "ロック解除", - "Only enable this if your model supports context sizes greater than 4096 tokens": "モデルが4096トークンを超えるコンテキストサイズをサポートしている場合にのみ有効にします", + "Only enable this if your model supports context sizes greater than 8192 tokens": "モデルが8192トークンを超えるコンテキストサイズをサポートしている場合にのみ有効にします", "Max prompt cost:": "最大プロンプトコスト:", "Display the response bit by bit as it is generated.": "生成されるたびに、応答を逐次表示します。", "When this is off, responses will be displayed all at once when they are complete.": "この機能がオフの場合、応答は完全に生成されたときに一度ですべて表示されます。", diff --git a/public/locales/ko-kr.json b/public/locales/ko-kr.json index 1b23dd8cd..1ba7985af 100644 --- a/public/locales/ko-kr.json +++ b/public/locales/ko-kr.json @@ -32,7 +32,7 @@ "Streaming_desc": "생성되는대로 응답을 조금씩 표시하십시오", "context size(tokens)": "컨텍스트 크기 (토큰)", "unlocked": "잠금 해제됨", - "Only enable this if your model supports context sizes greater than 4096 tokens": "모델이 4096 토큰보다 큰 컨텍스트 크기를 지원하는 경우에만 활성화하십시오", + "Only enable this if your model supports context sizes greater than 8192 tokens": "모델이 8192 토큰보다 큰 컨텍스트 크기를 지원하는 경우에만 활성화하십시오", "Max prompt cost:": "최대 프롬프트 비용:", "Display the response bit by bit as it is generated.": "생성되는 대답을 조금씩 표시합니다.", "When this is off, responses will be displayed all at once when they are complete.": "이 기능이 꺼져 있으면 대답은 완료되면 한 번에 모두 표시됩니다.", diff --git a/public/locales/nl-nl.json b/public/locales/nl-nl.json index 2320db081..069c89b9c 100644 --- a/public/locales/nl-nl.json +++ b/public/locales/nl-nl.json @@ -32,7 +32,7 @@ "Streaming_desc": "Toon de reactie beetje bij beetje zoals deze wordt gegenereerd", "context size(tokens)": "Contextgrootte (tokens)", "unlocked": "Ontgrendeld", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Schakel dit alleen in als uw model contextgroottes ondersteunt groter dan 4096 tokens", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Schakel dit alleen in als uw model contextgroottes ondersteunt groter dan 8192 tokens", "Max prompt cost:": "Maximale promptkosten:", "Display the response bit by bit as it is generated.": "Toon het antwoord stuk voor stuk terwijl het wordt gegenereerd.", "When this is off, responses will be displayed all at once when they are complete.": "Als dit uit staat, worden reacties in één keer weergegeven wanneer ze compleet zijn.", diff --git a/public/locales/pt-pt.json b/public/locales/pt-pt.json index a287d4879..771d60a94 100644 --- a/public/locales/pt-pt.json +++ b/public/locales/pt-pt.json @@ -32,7 +32,7 @@ "Streaming_desc": "Exibir a resposta pouco a pouco conforme ela é gerada", "context size(tokens)": "Tamanho do contexto (tokens)", "unlocked": "Desbloqueado", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Ative isso apenas se seu modelo suportar tamanhos de contexto maiores que 4096 tokens", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Ative isso apenas se seu modelo suportar tamanhos de contexto maiores que 8192 tokens", "Max prompt cost:": "Custo imediato máximo:", "Display the response bit by bit as it is generated.": "Exibir a resposta bit a bit conforme é gerada.", "When this is off, responses will be displayed all at once when they are complete.": "Quando isso estiver desligado, as respostas serão exibidas de uma vez quando estiverem completas.", diff --git a/public/locales/ru-ru.json b/public/locales/ru-ru.json index 311d41f32..d7ce65466 100644 --- a/public/locales/ru-ru.json +++ b/public/locales/ru-ru.json @@ -89,7 +89,7 @@ "Text Completion presets": "Пресеты для Text Completion", "Documentation on sampling parameters": "Документация по параметрам сэмплеров", "Set all samplers to their neutral/disabled state.": "Установить все сэмплеры в нейтральное/отключенное состояние.", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Включайте эту опцию, только если ваша модель поддерживает размер контекста более 4096 токенов.\nУвеличивайте только если вы знаете, что делаете.", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Включайте эту опцию, только если ваша модель поддерживает размер контекста более 8192 токенов.\nУвеличивайте только если вы знаете, что делаете.", "Wrap in Quotes": "Заключать в кавычки", "Wrap entire user message in quotes before sending.": "Перед отправкой заключать всё сообщение пользователя в кавычки.", "Leave off if you use quotes manually for speech.": "Оставьте выключенным, если вручную выставляете кавычки для прямой речи.", diff --git a/public/locales/uk-ua.json b/public/locales/uk-ua.json index 4c002009e..3a6d9c4e2 100644 --- a/public/locales/uk-ua.json +++ b/public/locales/uk-ua.json @@ -32,7 +32,7 @@ "Streaming_desc": "Поступово відображати відповідь по мірі її створення", "context size(tokens)": "Контекст (токени)", "unlocked": "Розблоковано", - "Only enable this if your model supports context sizes greater than 4096 tokens": "Увімкніть це лише в разі підтримки моделлю розмірів контексту більше 4096 токенів", + "Only enable this if your model supports context sizes greater than 8192 tokens": "Увімкніть це лише в разі підтримки моделлю розмірів контексту більше 8192 токенів", "Max prompt cost:": "Максимальна оперативна вартість:", "Display the response bit by bit as it is generated.": "Показувати відповідь по бітах по мірі її генерації.", "When this is off, responses will be displayed all at once when they are complete.": "Коли це вимкнено, відповіді будуть відображатися разом, коли вони будуть завершені.", diff --git a/public/locales/zh-cn.json b/public/locales/zh-cn.json index 844921e92..7fc78e51a 100644 --- a/public/locales/zh-cn.json +++ b/public/locales/zh-cn.json @@ -32,7 +32,7 @@ "Streaming_desc": "逐位显示生成的回复", "context size(tokens)": "上下文长度(以词符数计)", "unlocked": "解锁", - "Only enable this if your model supports context sizes greater than 4096 tokens": "仅在您的模型支持大于4096个词符的上下文大小时启用此选项", + "Only enable this if your model supports context sizes greater than 8192 tokens": "仅在您的模型支持大于8192个词符的上下文大小时启用此选项", "Max prompt cost:": "最大提示词费用:", "Display the response bit by bit as it is generated.": "随着回复的生成,逐位显示结果。", "When this is off, responses will be displayed all at once when they are complete.": "当此选项关闭时,回复将在完成后一次性显示。", diff --git a/public/locales/zh-tw.json b/public/locales/zh-tw.json index 20abdeba8..c3b8869cc 100644 --- a/public/locales/zh-tw.json +++ b/public/locales/zh-tw.json @@ -32,7 +32,7 @@ "Streaming_desc": "生成時逐位顯示回應。當此功能關閉時,回應將在完成後一次顯示。", "context size(tokens)": "上下文大小(符記數)", "unlocked": "解鎖", - "Only enable this if your model supports context sizes greater than 4096 tokens": "僅在您的模型支援超過4096個符記的上下文大小時啟用此功能", + "Only enable this if your model supports context sizes greater than 8192 tokens": "僅在您的模型支援超過8192個符記的上下文大小時啟用此功能", "Max prompt cost:": "最多提示詞費用", "Display the response bit by bit as it is generated.": "生成時逐位顯示回應。", "When this is off, responses will be displayed all at once when they are complete.": "關閉時,回應將在完成後一次性顯示。", From 28837ff883256638142600cbab6ca06f055d7bf7 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sat, 14 Sep 2024 16:32:50 +0300 Subject: [PATCH 6/8] Hard code include_stop_str_in_output --- public/scripts/textgen-settings.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 90f3d1d11..80061a271 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -187,7 +187,6 @@ const settings = { openrouter_allow_fallbacks: true, xtc_threshold: 0.1, xtc_probability: 0, - include_stop_str_in_output: false, }; export let textgenerationwebui_banned_in_macros = []; @@ -260,7 +259,6 @@ export const setting_names = [ 'openrouter_allow_fallbacks', 'xtc_threshold', 'xtc_probability', - 'include_stop_str_in_output', ]; const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba'); @@ -1221,11 +1219,11 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, 'ignore_eos': settings.ignore_eos_token, 'min_tokens': settings.min_length, 'skip_special_tokens': settings.skip_special_tokens, - 'include_stop_str_in_output': settings.include_stop_str_in_output, 'spaces_between_special_tokens': settings.spaces_between_special_tokens, 'guided_grammar': settings.grammar_string, 'guided_json': settings.json_schema, - 'early_stopping': false, // hack + 'early_stopping': false, // hacks + 'include_stop_str_in_output': false, }; if (settings.type === OPENROUTER) { From f0d361bc7aa20e266d7353916734a8bec877995b Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sat, 14 Sep 2024 16:41:22 +0300 Subject: [PATCH 7/8] Remove unused beam search --- public/scripts/textgen-settings.js | 1 - 1 file changed, 1 deletion(-) diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index 80061a271..1fb5917e7 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -123,7 +123,6 @@ const settings = { rep_pen_slope: 1, no_repeat_ngram_size: 0, penalty_alpha: 0, - use_beam_search: false, num_beams: 1, length_penalty: 1, min_length: 0, From 50de67898020810e6d79fc62c1102be91f3d77ac Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sat, 14 Sep 2024 16:53:21 +0300 Subject: [PATCH 8/8] Hide beam search for vllm. It never worked. --- public/index.html | 2 +- public/scripts/textgen-settings.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index e08105239..dd4a111e8 100644 --- a/public/index.html +++ b/public/index.html @@ -1396,7 +1396,7 @@ -
+