diff --git a/public/css/toggle-dependent.css b/public/css/toggle-dependent.css index aa805b1f2..d582cbbd8 100644 --- a/public/css/toggle-dependent.css +++ b/public/css/toggle-dependent.css @@ -498,7 +498,3 @@ label[for="trim_spaces"]:not(:has(input:checked)) small { #banned_tokens_block_ooba:not(:has(#send_banned_tokens_textgenerationwebui:checked)) #banned_tokens_controls_ooba { filter: brightness(0.5); } - -#thinking_budget_controls:not(:has(#enable_thinking:checked)) .range-block:has(#thinking_budget) { - filter: brightness(0.5); -} diff --git a/public/index.html b/public/index.html index 97529202b..58fefa004 100644 --- a/public/index.html +++ b/public/index.html @@ -2034,32 +2034,6 @@ -
-
-
- -
- Must be enabled for Thinking Budget to take effect. -
-
-
-
- Thinking Budget (tokens) -
-
-
- -
-
- -
-
-
-
-
-
-
+
+
diff --git a/public/scripts/openai.js b/public/scripts/openai.js index b22a29fba..1f768b82f 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -165,11 +165,6 @@ const textCompletionModels = [ 'code-search-ada-code-001', ]; -// One more models list to maintain, yay -const thinkingBudgetModels = [ - 'gemini-2.5-flash-preview-04-17', -]; - let biasCache = undefined; export let model_list = []; @@ -221,6 +216,15 @@ const openrouter_middleout_types = { OFF: 'off', }; +export const reasoning_effort_types = { + auto: 'auto', + low: 'low', + medium: 'medium', + high: 'high', + min: 'min', + max: 'max', +}; + const sensitiveFields = [ 'reverse_proxy', 'proxy_password', @@ -311,8 +315,6 @@ export const settingsToUpdate = { n: ['#n_openai', 'n', false], bypass_status_check: ['#openai_bypass_status_check', 'bypass_status_check', true], request_images: ['#openai_request_images', 'request_images', true], - enable_thinking: ['#enable_thinking', 'enable_thinking', true], - thinking_budget: ['#thinking_budget', 'thinking_budget', false], }; const default_settings = { @@ -389,11 +391,9 @@ const default_settings = { continue_postfix: continue_postfix_types.SPACE, custom_prompt_post_processing: custom_prompt_post_processing_types.NONE, show_thoughts: true, - reasoning_effort: 'medium', + reasoning_effort: reasoning_effort_types.medium, enable_web_search: false, request_images: false, - enable_thinking: true, - thinking_budget: 1000, seed: -1, n: 1, }; @@ -472,11 +472,9 @@ const oai_settings = { continue_postfix: continue_postfix_types.SPACE, custom_prompt_post_processing: custom_prompt_post_processing_types.NONE, show_thoughts: true, - reasoning_effort: 'medium', + reasoning_effort: reasoning_effort_types.medium, enable_web_search: false, request_images: false, - enable_thinking: true, - thinking_budget: 1000, seed: -1, n: 1, }; @@ -1948,6 +1946,35 @@ async function sendAltScaleRequest(messages, logit_bias, signal, type) { return data.output; } +function getReasoningEffort() { + // Do not set the field. Let the model decide. + if (oai_settings.reasoning_effort === reasoning_effort_types.auto) { + return undefined; + } + + // These sources require effort as a string + if (oai_settings.reasoning_effort === reasoning_effort_types.min) { + switch (oai_settings.chat_completion_source) { + case chat_completion_sources.OPENAI: + case chat_completion_sources.CUSTOM: + case chat_completion_sources.XAI: + return reasoning_effort_types.low; + } + } + + // Same here, but max effort + if (oai_settings.reasoning_effort === reasoning_effort_types.max) { + switch (oai_settings.chat_completion_source) { + case chat_completion_sources.OPENAI: + case chat_completion_sources.CUSTOM: + case chat_completion_sources.XAI: + return reasoning_effort_types.high; + } + } + + return oai_settings.reasoning_effort; +} + /** * Send a chat completion request to backend * @param {string} type (impersonate, quiet, continue, etc) @@ -2034,17 +2061,12 @@ async function sendOpenAIRequest(type, messages, signal) { 'char_name': name2, 'group_names': getGroupNames(), 'include_reasoning': Boolean(oai_settings.show_thoughts), - 'reasoning_effort': String(oai_settings.reasoning_effort), + 'reasoning_effort': getReasoningEffort(), 'enable_web_search': Boolean(oai_settings.enable_web_search), 'request_images': Boolean(oai_settings.request_images), 'custom_prompt_post_processing': oai_settings.custom_prompt_post_processing, }; - if (thinkingBudgetModels.includes(model)) { - generate_data['enable_thinking'] = oai_settings.enable_thinking; - generate_data['thinking_budget'] = oai_settings.thinking_budget; - } - if (!canMultiSwipe && ToolManager.canPerformToolCalls(type)) { await ToolManager.registerFunctionToolsOpenAI(generate_data); } @@ -3300,8 +3322,6 @@ function loadOpenAISettings(data, settings) { oai_settings.show_thoughts = settings.show_thoughts ?? default_settings.show_thoughts; oai_settings.reasoning_effort = settings.reasoning_effort ?? default_settings.reasoning_effort; oai_settings.enable_web_search = settings.enable_web_search ?? default_settings.enable_web_search; - oai_settings.enable_thinking = settings.enable_thinking ?? default_settings.enable_thinking; - oai_settings.thinking_budget = settings.thinking_budget ?? default_settings.thinking_budget; oai_settings.request_images = settings.request_images ?? default_settings.request_images; oai_settings.seed = settings.seed ?? default_settings.seed; oai_settings.n = settings.n ?? default_settings.n; @@ -3437,10 +3457,6 @@ function loadOpenAISettings(data, settings) { $('#openai_reasoning_effort').val(oai_settings.reasoning_effort); $(`#openai_reasoning_effort option[value="${oai_settings.reasoning_effort}"]`).prop('selected', true); - $('#enable_thinking').prop('checked', oai_settings.enable_thinking); - $('#thinking_budget').val(oai_settings.thinking_budget); - $('#thinking_budget_counter').val(oai_settings.thinking_budget); - if (settings.reverse_proxy !== undefined) oai_settings.reverse_proxy = settings.reverse_proxy; $('#openai_reverse_proxy').val(oai_settings.reverse_proxy); @@ -3471,7 +3487,6 @@ function loadOpenAISettings(data, settings) { setNamesBehaviorControls(); setContinuePostfixControls(); - updateThinkingBudgetUI(); if (oai_settings.custom_prompt_post_processing === custom_prompt_post_processing_types.CLAUDE) { oai_settings.custom_prompt_post_processing = custom_prompt_post_processing_types.MERGE; @@ -3529,14 +3544,6 @@ function setContinuePostfixControls() { $('#continue_postfix_display').text(checkedItemText); } -/** - * Updates the visibility and state of the Thinking Budget controls. - */ -function updateThinkingBudgetUI() { - const modelSupportsControl = thinkingBudgetModels.includes(getChatCompletionModel()); - $('#thinking_budget_controls').toggle(modelSupportsControl); -} - async function getStatusOpen() { if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) { let status; @@ -3717,8 +3724,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) { reasoning_effort: settings.reasoning_effort, enable_web_search: settings.enable_web_search, request_images: settings.request_images, - enable_thinking: settings.enable_thinking, - thinking_budget: settings.thinking_budget, seed: settings.seed, n: settings.n, }; @@ -4749,7 +4754,6 @@ async function onModelChange() { $('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max'))); - updateThinkingBudgetUI(); saveSettingsDebounced(); eventSource.emit(event_types.CHATCOMPLETION_MODEL_CHANGED, value); } @@ -5568,7 +5572,6 @@ export function initOpenAI() { $('#chat_completion_source').on('change', function () { oai_settings.chat_completion_source = String($(this).find(':selected').val()); toggleChatCompletionForms(); - updateThinkingBudgetUI(); saveSettingsDebounced(); reconnectOpenAi(); forceCharacterEditorTokenize(); @@ -5754,29 +5757,6 @@ export function initOpenAI() { saveSettingsDebounced(); }); - $('#enable_thinking').on('input', function () { - oai_settings.enable_thinking = !!$(this).prop('checked'); - updateThinkingBudgetUI(); - saveSettingsDebounced(); - }); - - $('#thinking_budget').on('input', function () { - oai_settings.thinking_budget = Number($(this).val()); - $('#thinking_budget_counter').val(oai_settings.thinking_budget); - saveSettingsDebounced(); - }); - - $('#thinking_budget_counter').on('input', function () { - let value = Number($(this).val()); - const min = Number($('#thinking_budget').attr('min')); - const max = Number($('#thinking_budget').attr('max')); - value = Math.max(min, Math.min(max, value)); - $(this).val(value); - oai_settings.thinking_budget = value; - $('#thinking_budget').val(value); - saveSettingsDebounced(); - }); - $('#openai_enable_web_search').on('input', function () { oai_settings.enable_web_search = !!$(this).prop('checked'); calculateOpenRouterCost(); diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 167dfbcdb..3ecf45f36 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -28,7 +28,8 @@ import { cachingAtDepthForOpenRouterClaude, cachingAtDepthForClaude, getPromptNames, - calculateBudgetTokens, + calculateClaudeBudgetTokens, + calculateGoogleBudgetTokens, } from '../../prompt-converters.js'; import { readSecret, SECRET_KEYS } from '../secrets.js'; @@ -202,7 +203,7 @@ async function sendClaudeRequest(request, response) { // No prefill when thinking voidPrefill = true; const reasoningEffort = request.body.reasoning_effort; - const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); + const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); const minThinkTokens = 1024; if (requestBody.max_tokens <= minThinkTokens) { const newValue = requestBody.max_tokens + minThinkTokens; @@ -340,6 +341,7 @@ async function sendMakerSuiteRequest(request, response) { const stream = Boolean(request.body.stream); const enableWebSearch = Boolean(request.body.enable_web_search); const requestImages = Boolean(request.body.request_images); + const reasoningEffort = String(request.body.reasoning_effort); const isThinking = model.includes('thinking'); const isGemma = model.includes('gemma'); @@ -412,11 +414,15 @@ async function sendMakerSuiteRequest(request, response) { tools.push({ function_declarations: functionDeclarations }); } - if ('enable_thinking' in request.body && 'thinking_budget' in request.body) { - const thinkingEnabled = Boolean(request.body.enable_thinking); - const thinkingBudget = Number(request.body.thinking_budget); + // One more models list to maintain, yay + const thinkingBudgetModels = [ + 'gemini-2.5-flash-preview-04-17', + ]; - if (thinkingEnabled) { + if (thinkingBudgetModels.includes(model)) { + const thinkingBudget = calculateGoogleBudgetTokens(generationConfig.maxOutputTokens, reasoningEffort); + + if (Number.isInteger(thinkingBudget)) { generationConfig.thinkingConfig = { thinkingBudget: thinkingBudget }; } } diff --git a/src/prompt-converters.js b/src/prompt-converters.js index 8ea3ab896..266047b8d 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -3,6 +3,15 @@ import { getConfigValue, tryParse } from './util.js'; const PROMPT_PLACEHOLDER = getConfigValue('promptPlaceholder', 'Let\'s get started.'); +const REASONING_EFFORT = { + auto: 'auto', + low: 'low', + medium: 'medium', + high: 'high', + min: 'min', + max: 'max', +}; + /** * @typedef {object} PromptNames * @property {string} charName Character name @@ -944,25 +953,35 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) { } /** - * Calculate the budget tokens for a given reasoning effort. + * Calculate the Claude budget tokens for a given reasoning effort. * @param {number} maxTokens Maximum tokens * @param {string} reasoningEffort Reasoning effort * @param {boolean} stream If streaming is enabled * @returns {number} Budget tokens */ -export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) { +export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) { let budgetTokens = 0; switch (reasoningEffort) { - case 'low': + // Claude doesn't have a default budget value. Use same as min. + case REASONING_EFFORT.auto: + budgetTokens = 1024; + break; + case REASONING_EFFORT.min: + budgetTokens = 1024; + break; + case REASONING_EFFORT.low: budgetTokens = Math.floor(maxTokens * 0.1); break; - case 'medium': + case REASONING_EFFORT.medium: budgetTokens = Math.floor(maxTokens * 0.25); break; - case 'high': + case REASONING_EFFORT.high: budgetTokens = Math.floor(maxTokens * 0.5); break; + case REASONING_EFFORT.max: + budgetTokens = maxTokens; + break; } budgetTokens = Math.max(budgetTokens, 1024); @@ -973,3 +992,37 @@ export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) { return budgetTokens; } + +/** + * Calculate the Google budget tokens for a given reasoning effort. + * @param {number} maxTokens Maximum tokens + * @param {string} reasoningEffort Reasoning effort + * @returns {number?} Budget tokens + */ +export function calculateGoogleBudgetTokens(maxTokens, reasoningEffort) { + let budgetTokens = 0; + + switch (reasoningEffort) { + case REASONING_EFFORT.auto: + return null; + case REASONING_EFFORT.min: + budgetTokens = 0; + break; + case REASONING_EFFORT.low: + budgetTokens = Math.floor(maxTokens * 0.1); + break; + case REASONING_EFFORT.medium: + budgetTokens = Math.floor(maxTokens * 0.25); + break; + case REASONING_EFFORT.high: + budgetTokens = Math.floor(maxTokens * 0.5); + break; + case REASONING_EFFORT.max: + budgetTokens = maxTokens; + break; + } + + budgetTokens = Math.min(budgetTokens, 24576); + + return budgetTokens; +}