From a95056db40aebef80a5258ca3df5833a39103e6e Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Mon, 21 Apr 2025 21:10:40 +0300 Subject: [PATCH 01/15] Thinking Budget 2.5: Electric Googaloo --- public/css/toggle-dependent.css | 4 ++ public/index.html | 26 ++++++++++ public/scripts/openai.js | 58 ++++++++++++++++++++++ src/endpoints/backends/chat-completions.js | 9 ++++ 4 files changed, 97 insertions(+) diff --git a/public/css/toggle-dependent.css b/public/css/toggle-dependent.css index d582cbbd8..aa805b1f2 100644 --- a/public/css/toggle-dependent.css +++ b/public/css/toggle-dependent.css @@ -498,3 +498,7 @@ label[for="trim_spaces"]:not(:has(input:checked)) small { #banned_tokens_block_ooba:not(:has(#send_banned_tokens_textgenerationwebui:checked)) #banned_tokens_controls_ooba { filter: brightness(0.5); } + +#thinking_budget_controls:not(:has(#enable_thinking:checked)) .range-block:has(#thinking_budget) { + filter: brightness(0.5); +} diff --git a/public/index.html b/public/index.html index 7be6838e7..97529202b 100644 --- a/public/index.html +++ b/public/index.html @@ -2034,6 +2034,32 @@ +
+
+
+ +
+ Must be enabled for Thinking Budget to take effect. +
+
+
+
+ Thinking Budget (tokens) +
+
+
+ +
+
+ +
+
+
+
+
-
-
-
- -
- Must be enabled for Thinking Budget to take effect. -
-
-
-
- Thinking Budget (tokens) -
-
-
- -
-
- -
-
-
-
-
-
-
+
+
diff --git a/public/scripts/openai.js b/public/scripts/openai.js index b22a29fba..1f768b82f 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -165,11 +165,6 @@ const textCompletionModels = [ 'code-search-ada-code-001', ]; -// One more models list to maintain, yay -const thinkingBudgetModels = [ - 'gemini-2.5-flash-preview-04-17', -]; - let biasCache = undefined; export let model_list = []; @@ -221,6 +216,15 @@ const openrouter_middleout_types = { OFF: 'off', }; +export const reasoning_effort_types = { + auto: 'auto', + low: 'low', + medium: 'medium', + high: 'high', + min: 'min', + max: 'max', +}; + const sensitiveFields = [ 'reverse_proxy', 'proxy_password', @@ -311,8 +315,6 @@ export const settingsToUpdate = { n: ['#n_openai', 'n', false], bypass_status_check: ['#openai_bypass_status_check', 'bypass_status_check', true], request_images: ['#openai_request_images', 'request_images', true], - enable_thinking: ['#enable_thinking', 'enable_thinking', true], - thinking_budget: ['#thinking_budget', 'thinking_budget', false], }; const default_settings = { @@ -389,11 +391,9 @@ const default_settings = { continue_postfix: continue_postfix_types.SPACE, custom_prompt_post_processing: custom_prompt_post_processing_types.NONE, show_thoughts: true, - reasoning_effort: 'medium', + reasoning_effort: reasoning_effort_types.medium, enable_web_search: false, request_images: false, - enable_thinking: true, - thinking_budget: 1000, seed: -1, n: 1, }; @@ -472,11 +472,9 @@ const oai_settings = { continue_postfix: continue_postfix_types.SPACE, custom_prompt_post_processing: custom_prompt_post_processing_types.NONE, show_thoughts: true, - reasoning_effort: 'medium', + reasoning_effort: reasoning_effort_types.medium, enable_web_search: false, request_images: false, - enable_thinking: true, - thinking_budget: 1000, seed: -1, n: 1, }; @@ -1948,6 +1946,35 @@ async function sendAltScaleRequest(messages, logit_bias, signal, type) { return data.output; } +function getReasoningEffort() { + // Do not set the field. Let the model decide. + if (oai_settings.reasoning_effort === reasoning_effort_types.auto) { + return undefined; + } + + // These sources require effort as a string + if (oai_settings.reasoning_effort === reasoning_effort_types.min) { + switch (oai_settings.chat_completion_source) { + case chat_completion_sources.OPENAI: + case chat_completion_sources.CUSTOM: + case chat_completion_sources.XAI: + return reasoning_effort_types.low; + } + } + + // Same here, but max effort + if (oai_settings.reasoning_effort === reasoning_effort_types.max) { + switch (oai_settings.chat_completion_source) { + case chat_completion_sources.OPENAI: + case chat_completion_sources.CUSTOM: + case chat_completion_sources.XAI: + return reasoning_effort_types.high; + } + } + + return oai_settings.reasoning_effort; +} + /** * Send a chat completion request to backend * @param {string} type (impersonate, quiet, continue, etc) @@ -2034,17 +2061,12 @@ async function sendOpenAIRequest(type, messages, signal) { 'char_name': name2, 'group_names': getGroupNames(), 'include_reasoning': Boolean(oai_settings.show_thoughts), - 'reasoning_effort': String(oai_settings.reasoning_effort), + 'reasoning_effort': getReasoningEffort(), 'enable_web_search': Boolean(oai_settings.enable_web_search), 'request_images': Boolean(oai_settings.request_images), 'custom_prompt_post_processing': oai_settings.custom_prompt_post_processing, }; - if (thinkingBudgetModels.includes(model)) { - generate_data['enable_thinking'] = oai_settings.enable_thinking; - generate_data['thinking_budget'] = oai_settings.thinking_budget; - } - if (!canMultiSwipe && ToolManager.canPerformToolCalls(type)) { await ToolManager.registerFunctionToolsOpenAI(generate_data); } @@ -3300,8 +3322,6 @@ function loadOpenAISettings(data, settings) { oai_settings.show_thoughts = settings.show_thoughts ?? default_settings.show_thoughts; oai_settings.reasoning_effort = settings.reasoning_effort ?? default_settings.reasoning_effort; oai_settings.enable_web_search = settings.enable_web_search ?? default_settings.enable_web_search; - oai_settings.enable_thinking = settings.enable_thinking ?? default_settings.enable_thinking; - oai_settings.thinking_budget = settings.thinking_budget ?? default_settings.thinking_budget; oai_settings.request_images = settings.request_images ?? default_settings.request_images; oai_settings.seed = settings.seed ?? default_settings.seed; oai_settings.n = settings.n ?? default_settings.n; @@ -3437,10 +3457,6 @@ function loadOpenAISettings(data, settings) { $('#openai_reasoning_effort').val(oai_settings.reasoning_effort); $(`#openai_reasoning_effort option[value="${oai_settings.reasoning_effort}"]`).prop('selected', true); - $('#enable_thinking').prop('checked', oai_settings.enable_thinking); - $('#thinking_budget').val(oai_settings.thinking_budget); - $('#thinking_budget_counter').val(oai_settings.thinking_budget); - if (settings.reverse_proxy !== undefined) oai_settings.reverse_proxy = settings.reverse_proxy; $('#openai_reverse_proxy').val(oai_settings.reverse_proxy); @@ -3471,7 +3487,6 @@ function loadOpenAISettings(data, settings) { setNamesBehaviorControls(); setContinuePostfixControls(); - updateThinkingBudgetUI(); if (oai_settings.custom_prompt_post_processing === custom_prompt_post_processing_types.CLAUDE) { oai_settings.custom_prompt_post_processing = custom_prompt_post_processing_types.MERGE; @@ -3529,14 +3544,6 @@ function setContinuePostfixControls() { $('#continue_postfix_display').text(checkedItemText); } -/** - * Updates the visibility and state of the Thinking Budget controls. - */ -function updateThinkingBudgetUI() { - const modelSupportsControl = thinkingBudgetModels.includes(getChatCompletionModel()); - $('#thinking_budget_controls').toggle(modelSupportsControl); -} - async function getStatusOpen() { if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) { let status; @@ -3717,8 +3724,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) { reasoning_effort: settings.reasoning_effort, enable_web_search: settings.enable_web_search, request_images: settings.request_images, - enable_thinking: settings.enable_thinking, - thinking_budget: settings.thinking_budget, seed: settings.seed, n: settings.n, }; @@ -4749,7 +4754,6 @@ async function onModelChange() { $('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max'))); - updateThinkingBudgetUI(); saveSettingsDebounced(); eventSource.emit(event_types.CHATCOMPLETION_MODEL_CHANGED, value); } @@ -5568,7 +5572,6 @@ export function initOpenAI() { $('#chat_completion_source').on('change', function () { oai_settings.chat_completion_source = String($(this).find(':selected').val()); toggleChatCompletionForms(); - updateThinkingBudgetUI(); saveSettingsDebounced(); reconnectOpenAi(); forceCharacterEditorTokenize(); @@ -5754,29 +5757,6 @@ export function initOpenAI() { saveSettingsDebounced(); }); - $('#enable_thinking').on('input', function () { - oai_settings.enable_thinking = !!$(this).prop('checked'); - updateThinkingBudgetUI(); - saveSettingsDebounced(); - }); - - $('#thinking_budget').on('input', function () { - oai_settings.thinking_budget = Number($(this).val()); - $('#thinking_budget_counter').val(oai_settings.thinking_budget); - saveSettingsDebounced(); - }); - - $('#thinking_budget_counter').on('input', function () { - let value = Number($(this).val()); - const min = Number($('#thinking_budget').attr('min')); - const max = Number($('#thinking_budget').attr('max')); - value = Math.max(min, Math.min(max, value)); - $(this).val(value); - oai_settings.thinking_budget = value; - $('#thinking_budget').val(value); - saveSettingsDebounced(); - }); - $('#openai_enable_web_search').on('input', function () { oai_settings.enable_web_search = !!$(this).prop('checked'); calculateOpenRouterCost(); diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 167dfbcdb..3ecf45f36 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -28,7 +28,8 @@ import { cachingAtDepthForOpenRouterClaude, cachingAtDepthForClaude, getPromptNames, - calculateBudgetTokens, + calculateClaudeBudgetTokens, + calculateGoogleBudgetTokens, } from '../../prompt-converters.js'; import { readSecret, SECRET_KEYS } from '../secrets.js'; @@ -202,7 +203,7 @@ async function sendClaudeRequest(request, response) { // No prefill when thinking voidPrefill = true; const reasoningEffort = request.body.reasoning_effort; - const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); + const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); const minThinkTokens = 1024; if (requestBody.max_tokens <= minThinkTokens) { const newValue = requestBody.max_tokens + minThinkTokens; @@ -340,6 +341,7 @@ async function sendMakerSuiteRequest(request, response) { const stream = Boolean(request.body.stream); const enableWebSearch = Boolean(request.body.enable_web_search); const requestImages = Boolean(request.body.request_images); + const reasoningEffort = String(request.body.reasoning_effort); const isThinking = model.includes('thinking'); const isGemma = model.includes('gemma'); @@ -412,11 +414,15 @@ async function sendMakerSuiteRequest(request, response) { tools.push({ function_declarations: functionDeclarations }); } - if ('enable_thinking' in request.body && 'thinking_budget' in request.body) { - const thinkingEnabled = Boolean(request.body.enable_thinking); - const thinkingBudget = Number(request.body.thinking_budget); + // One more models list to maintain, yay + const thinkingBudgetModels = [ + 'gemini-2.5-flash-preview-04-17', + ]; - if (thinkingEnabled) { + if (thinkingBudgetModels.includes(model)) { + const thinkingBudget = calculateGoogleBudgetTokens(generationConfig.maxOutputTokens, reasoningEffort); + + if (Number.isInteger(thinkingBudget)) { generationConfig.thinkingConfig = { thinkingBudget: thinkingBudget }; } } diff --git a/src/prompt-converters.js b/src/prompt-converters.js index 8ea3ab896..266047b8d 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -3,6 +3,15 @@ import { getConfigValue, tryParse } from './util.js'; const PROMPT_PLACEHOLDER = getConfigValue('promptPlaceholder', 'Let\'s get started.'); +const REASONING_EFFORT = { + auto: 'auto', + low: 'low', + medium: 'medium', + high: 'high', + min: 'min', + max: 'max', +}; + /** * @typedef {object} PromptNames * @property {string} charName Character name @@ -944,25 +953,35 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) { } /** - * Calculate the budget tokens for a given reasoning effort. + * Calculate the Claude budget tokens for a given reasoning effort. * @param {number} maxTokens Maximum tokens * @param {string} reasoningEffort Reasoning effort * @param {boolean} stream If streaming is enabled * @returns {number} Budget tokens */ -export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) { +export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) { let budgetTokens = 0; switch (reasoningEffort) { - case 'low': + // Claude doesn't have a default budget value. Use same as min. + case REASONING_EFFORT.auto: + budgetTokens = 1024; + break; + case REASONING_EFFORT.min: + budgetTokens = 1024; + break; + case REASONING_EFFORT.low: budgetTokens = Math.floor(maxTokens * 0.1); break; - case 'medium': + case REASONING_EFFORT.medium: budgetTokens = Math.floor(maxTokens * 0.25); break; - case 'high': + case REASONING_EFFORT.high: budgetTokens = Math.floor(maxTokens * 0.5); break; + case REASONING_EFFORT.max: + budgetTokens = maxTokens; + break; } budgetTokens = Math.max(budgetTokens, 1024); @@ -973,3 +992,37 @@ export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) { return budgetTokens; } + +/** + * Calculate the Google budget tokens for a given reasoning effort. + * @param {number} maxTokens Maximum tokens + * @param {string} reasoningEffort Reasoning effort + * @returns {number?} Budget tokens + */ +export function calculateGoogleBudgetTokens(maxTokens, reasoningEffort) { + let budgetTokens = 0; + + switch (reasoningEffort) { + case REASONING_EFFORT.auto: + return null; + case REASONING_EFFORT.min: + budgetTokens = 0; + break; + case REASONING_EFFORT.low: + budgetTokens = Math.floor(maxTokens * 0.1); + break; + case REASONING_EFFORT.medium: + budgetTokens = Math.floor(maxTokens * 0.25); + break; + case REASONING_EFFORT.high: + budgetTokens = Math.floor(maxTokens * 0.5); + break; + case REASONING_EFFORT.max: + budgetTokens = maxTokens; + break; + } + + budgetTokens = Math.min(budgetTokens, 24576); + + return budgetTokens; +} From 5c8b8f4b9876d227296ae65d5a9e55256528b90a Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 00:44:14 +0300 Subject: [PATCH 03/15] Refactor getReasoningEffort --- public/scripts/openai.js | 41 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 1f768b82f..6ffe84138 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1947,32 +1947,27 @@ async function sendAltScaleRequest(messages, logit_bias, signal, type) { } function getReasoningEffort() { - // Do not set the field. Let the model decide. - if (oai_settings.reasoning_effort === reasoning_effort_types.auto) { - return undefined; + // These sources expect the effort as string. + const reasoningEffortSources = [ + chat_completion_sources.OPENAI, + chat_completion_sources.CUSTOM, + chat_completion_sources.XAI, + ]; + + if (!reasoningEffortSources.includes(oai_settings.chat_completion_source)) { + return oai_settings.reasoning_effort; } - // These sources require effort as a string - if (oai_settings.reasoning_effort === reasoning_effort_types.min) { - switch (oai_settings.chat_completion_source) { - case chat_completion_sources.OPENAI: - case chat_completion_sources.CUSTOM: - case chat_completion_sources.XAI: - return reasoning_effort_types.low; - } + switch (oai_settings.reasoning_effort) { + case reasoning_effort_types.auto: + return undefined; + case reasoning_effort_types.min: + return reasoning_effort_types.low; + case reasoning_effort_types.max: + return reasoning_effort_types.high; + default: + return oai_settings.reasoning_effort; } - - // Same here, but max effort - if (oai_settings.reasoning_effort === reasoning_effort_types.max) { - switch (oai_settings.chat_completion_source) { - case chat_completion_sources.OPENAI: - case chat_completion_sources.CUSTOM: - case chat_completion_sources.XAI: - return reasoning_effort_types.high; - } - } - - return oai_settings.reasoning_effort; } /** From 266fa5cbf83c7bbcde47c733e5fb27c398f01404 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 00:45:49 +0300 Subject: [PATCH 04/15] Make auto (undefined) actually work --- src/endpoints/backends/chat-completions.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 3ecf45f36..934c9d52e 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -883,7 +883,7 @@ async function sendXaiRequest(request, response) { bodyParams['stop'] = request.body.stop; } - if (['grok-3-mini-beta', 'grok-3-mini-fast-beta'].includes(request.body.model)) { + if (request.body.reasoning_effort && ['grok-3-mini-beta', 'grok-3-mini-fast-beta'].includes(request.body.model)) { bodyParams['reasoning_effort'] = request.body.reasoning_effort === 'high' ? 'high' : 'low'; } @@ -1273,7 +1273,7 @@ router.post('/generate', function (request, response) { } // A few of OpenAIs reasoning models support reasoning effort - if ([CHAT_COMPLETION_SOURCES.CUSTOM, CHAT_COMPLETION_SOURCES.OPENAI].includes(request.body.chat_completion_source)) { + if (request.body.reasoning_effort && [CHAT_COMPLETION_SOURCES.CUSTOM, CHAT_COMPLETION_SOURCES.OPENAI].includes(request.body.chat_completion_source)) { if (['o1', 'o3-mini', 'o3-mini-2025-01-31', 'o4-mini', 'o4-mini-2025-04-16', 'o3', 'o3-2025-04-16'].includes(request.body.model)) { bodyParams['reasoning_effort'] = request.body.reasoning_effort; } From e43023fde78006ad7a235fed188a41cc0dc670aa Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 00:54:03 +0300 Subject: [PATCH 05/15] Cut option labels --- public/index.html | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index 58fefa004..bcb376473 100644 --- a/public/index.html +++ b/public/index.html @@ -2055,12 +2055,12 @@
From f61d600c05e5c562fe1cfe884432323f89550269 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 00:59:12 +0300 Subject: [PATCH 06/15] ok buddy claude --- src/prompt-converters.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/prompt-converters.js b/src/prompt-converters.js index 266047b8d..69292b7a8 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -365,7 +365,7 @@ export function convertCohereMessages(messages, names) { * @param {string} model Model name * @param {boolean} useSysPrompt Use system prompt * @param {PromptNames} names Prompt names - * @returns {{contents: *[], system_instruction: {parts: {text: string}}}} Prompt for Google MakerSuite models + * @returns {{contents: *[], system_instruction: {parts: {text: string}[]}}} Prompt for Google MakerSuite models */ export function convertGooglePrompt(messages, model, useSysPrompt, names) { const visionSupportedModels = [ @@ -980,7 +980,7 @@ export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) budgetTokens = Math.floor(maxTokens * 0.5); break; case REASONING_EFFORT.max: - budgetTokens = maxTokens; + budgetTokens = Math.floor(maxTokens * 0.95); break; } From f81bbbea08aca994f758b116956ac51e573c03af Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 01:02:28 +0300 Subject: [PATCH 07/15] Fix effort blurb title --- public/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/index.html b/public/index.html index bcb376473..b90605a0d 100644 --- a/public/index.html +++ b/public/index.html @@ -2052,7 +2052,7 @@
From cf44ac8c1f2537df3cfb40a214b307c58455c6ab Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 20:04:00 +0300 Subject: [PATCH 12/15] Don't add sys instruction if empty --- src/endpoints/backends/chat-completions.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 934c9d52e..c582a099e 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -433,7 +433,7 @@ async function sendMakerSuiteRequest(request, response) { generationConfig: generationConfig, }; - if (useSystemPrompt) { + if (useSystemPrompt && Array.isArray(prompt.system_instruction) && prompt.system_instruction.length) { body.systemInstruction = prompt.system_instruction; } From 50cdaadba08c8b032f79f6908164fb31737c836e Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 20:05:28 +0300 Subject: [PATCH 13/15] Only verify parts length --- src/endpoints/backends/chat-completions.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index c582a099e..ad9557e11 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -433,7 +433,7 @@ async function sendMakerSuiteRequest(request, response) { generationConfig: generationConfig, }; - if (useSystemPrompt && Array.isArray(prompt.system_instruction) && prompt.system_instruction.length) { + if (useSystemPrompt && Array.isArray(prompt.system_instruction.parts) && prompt.system_instruction.parts.length) { body.systemInstruction = prompt.system_instruction; } From 5241b22a7392dc7ff30c5646ff3d1be5cb8e52da Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 23 Apr 2025 21:38:31 +0300 Subject: [PATCH 14/15] Add reasoning effort control for CC OpenRouter Closes #3890 --- public/index.html | 4 ++-- public/scripts/openai.js | 1 + src/endpoints/backends/chat-completions.js | 4 ++++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/public/index.html b/public/index.html index 4dbcf243c..f708229d4 100644 --- a/public/index.html +++ b/public/index.html @@ -2048,11 +2048,11 @@
-
+