+
+
diff --git a/public/scripts/openai.js b/public/scripts/openai.js
index b22a29fba..1f768b82f 100644
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@@ -165,11 +165,6 @@ const textCompletionModels = [
'code-search-ada-code-001',
];
-// One more models list to maintain, yay
-const thinkingBudgetModels = [
- 'gemini-2.5-flash-preview-04-17',
-];
-
let biasCache = undefined;
export let model_list = [];
@@ -221,6 +216,15 @@ const openrouter_middleout_types = {
OFF: 'off',
};
+export const reasoning_effort_types = {
+ auto: 'auto',
+ low: 'low',
+ medium: 'medium',
+ high: 'high',
+ min: 'min',
+ max: 'max',
+};
+
const sensitiveFields = [
'reverse_proxy',
'proxy_password',
@@ -311,8 +315,6 @@ export const settingsToUpdate = {
n: ['#n_openai', 'n', false],
bypass_status_check: ['#openai_bypass_status_check', 'bypass_status_check', true],
request_images: ['#openai_request_images', 'request_images', true],
- enable_thinking: ['#enable_thinking', 'enable_thinking', true],
- thinking_budget: ['#thinking_budget', 'thinking_budget', false],
};
const default_settings = {
@@ -389,11 +391,9 @@ const default_settings = {
continue_postfix: continue_postfix_types.SPACE,
custom_prompt_post_processing: custom_prompt_post_processing_types.NONE,
show_thoughts: true,
- reasoning_effort: 'medium',
+ reasoning_effort: reasoning_effort_types.medium,
enable_web_search: false,
request_images: false,
- enable_thinking: true,
- thinking_budget: 1000,
seed: -1,
n: 1,
};
@@ -472,11 +472,9 @@ const oai_settings = {
continue_postfix: continue_postfix_types.SPACE,
custom_prompt_post_processing: custom_prompt_post_processing_types.NONE,
show_thoughts: true,
- reasoning_effort: 'medium',
+ reasoning_effort: reasoning_effort_types.medium,
enable_web_search: false,
request_images: false,
- enable_thinking: true,
- thinking_budget: 1000,
seed: -1,
n: 1,
};
@@ -1948,6 +1946,35 @@ async function sendAltScaleRequest(messages, logit_bias, signal, type) {
return data.output;
}
+function getReasoningEffort() {
+ // Do not set the field. Let the model decide.
+ if (oai_settings.reasoning_effort === reasoning_effort_types.auto) {
+ return undefined;
+ }
+
+ // These sources require effort as a string
+ if (oai_settings.reasoning_effort === reasoning_effort_types.min) {
+ switch (oai_settings.chat_completion_source) {
+ case chat_completion_sources.OPENAI:
+ case chat_completion_sources.CUSTOM:
+ case chat_completion_sources.XAI:
+ return reasoning_effort_types.low;
+ }
+ }
+
+ // Same here, but max effort
+ if (oai_settings.reasoning_effort === reasoning_effort_types.max) {
+ switch (oai_settings.chat_completion_source) {
+ case chat_completion_sources.OPENAI:
+ case chat_completion_sources.CUSTOM:
+ case chat_completion_sources.XAI:
+ return reasoning_effort_types.high;
+ }
+ }
+
+ return oai_settings.reasoning_effort;
+}
+
/**
* Send a chat completion request to backend
* @param {string} type (impersonate, quiet, continue, etc)
@@ -2034,17 +2061,12 @@ async function sendOpenAIRequest(type, messages, signal) {
'char_name': name2,
'group_names': getGroupNames(),
'include_reasoning': Boolean(oai_settings.show_thoughts),
- 'reasoning_effort': String(oai_settings.reasoning_effort),
+ 'reasoning_effort': getReasoningEffort(),
'enable_web_search': Boolean(oai_settings.enable_web_search),
'request_images': Boolean(oai_settings.request_images),
'custom_prompt_post_processing': oai_settings.custom_prompt_post_processing,
};
- if (thinkingBudgetModels.includes(model)) {
- generate_data['enable_thinking'] = oai_settings.enable_thinking;
- generate_data['thinking_budget'] = oai_settings.thinking_budget;
- }
-
if (!canMultiSwipe && ToolManager.canPerformToolCalls(type)) {
await ToolManager.registerFunctionToolsOpenAI(generate_data);
}
@@ -3300,8 +3322,6 @@ function loadOpenAISettings(data, settings) {
oai_settings.show_thoughts = settings.show_thoughts ?? default_settings.show_thoughts;
oai_settings.reasoning_effort = settings.reasoning_effort ?? default_settings.reasoning_effort;
oai_settings.enable_web_search = settings.enable_web_search ?? default_settings.enable_web_search;
- oai_settings.enable_thinking = settings.enable_thinking ?? default_settings.enable_thinking;
- oai_settings.thinking_budget = settings.thinking_budget ?? default_settings.thinking_budget;
oai_settings.request_images = settings.request_images ?? default_settings.request_images;
oai_settings.seed = settings.seed ?? default_settings.seed;
oai_settings.n = settings.n ?? default_settings.n;
@@ -3437,10 +3457,6 @@ function loadOpenAISettings(data, settings) {
$('#openai_reasoning_effort').val(oai_settings.reasoning_effort);
$(`#openai_reasoning_effort option[value="${oai_settings.reasoning_effort}"]`).prop('selected', true);
- $('#enable_thinking').prop('checked', oai_settings.enable_thinking);
- $('#thinking_budget').val(oai_settings.thinking_budget);
- $('#thinking_budget_counter').val(oai_settings.thinking_budget);
-
if (settings.reverse_proxy !== undefined) oai_settings.reverse_proxy = settings.reverse_proxy;
$('#openai_reverse_proxy').val(oai_settings.reverse_proxy);
@@ -3471,7 +3487,6 @@ function loadOpenAISettings(data, settings) {
setNamesBehaviorControls();
setContinuePostfixControls();
- updateThinkingBudgetUI();
if (oai_settings.custom_prompt_post_processing === custom_prompt_post_processing_types.CLAUDE) {
oai_settings.custom_prompt_post_processing = custom_prompt_post_processing_types.MERGE;
@@ -3529,14 +3544,6 @@ function setContinuePostfixControls() {
$('#continue_postfix_display').text(checkedItemText);
}
-/**
- * Updates the visibility and state of the Thinking Budget controls.
- */
-function updateThinkingBudgetUI() {
- const modelSupportsControl = thinkingBudgetModels.includes(getChatCompletionModel());
- $('#thinking_budget_controls').toggle(modelSupportsControl);
-}
-
async function getStatusOpen() {
if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI) {
let status;
@@ -3717,8 +3724,6 @@ async function saveOpenAIPreset(name, settings, triggerUi = true) {
reasoning_effort: settings.reasoning_effort,
enable_web_search: settings.enable_web_search,
request_images: settings.request_images,
- enable_thinking: settings.enable_thinking,
- thinking_budget: settings.thinking_budget,
seed: settings.seed,
n: settings.n,
};
@@ -4749,7 +4754,6 @@ async function onModelChange() {
$('#openai_max_context_counter').attr('max', Number($('#openai_max_context').attr('max')));
- updateThinkingBudgetUI();
saveSettingsDebounced();
eventSource.emit(event_types.CHATCOMPLETION_MODEL_CHANGED, value);
}
@@ -5568,7 +5572,6 @@ export function initOpenAI() {
$('#chat_completion_source').on('change', function () {
oai_settings.chat_completion_source = String($(this).find(':selected').val());
toggleChatCompletionForms();
- updateThinkingBudgetUI();
saveSettingsDebounced();
reconnectOpenAi();
forceCharacterEditorTokenize();
@@ -5754,29 +5757,6 @@ export function initOpenAI() {
saveSettingsDebounced();
});
- $('#enable_thinking').on('input', function () {
- oai_settings.enable_thinking = !!$(this).prop('checked');
- updateThinkingBudgetUI();
- saveSettingsDebounced();
- });
-
- $('#thinking_budget').on('input', function () {
- oai_settings.thinking_budget = Number($(this).val());
- $('#thinking_budget_counter').val(oai_settings.thinking_budget);
- saveSettingsDebounced();
- });
-
- $('#thinking_budget_counter').on('input', function () {
- let value = Number($(this).val());
- const min = Number($('#thinking_budget').attr('min'));
- const max = Number($('#thinking_budget').attr('max'));
- value = Math.max(min, Math.min(max, value));
- $(this).val(value);
- oai_settings.thinking_budget = value;
- $('#thinking_budget').val(value);
- saveSettingsDebounced();
- });
-
$('#openai_enable_web_search').on('input', function () {
oai_settings.enable_web_search = !!$(this).prop('checked');
calculateOpenRouterCost();
diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js
index 167dfbcdb..3ecf45f36 100644
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@@ -28,7 +28,8 @@ import {
cachingAtDepthForOpenRouterClaude,
cachingAtDepthForClaude,
getPromptNames,
- calculateBudgetTokens,
+ calculateClaudeBudgetTokens,
+ calculateGoogleBudgetTokens,
} from '../../prompt-converters.js';
import { readSecret, SECRET_KEYS } from '../secrets.js';
@@ -202,7 +203,7 @@ async function sendClaudeRequest(request, response) {
// No prefill when thinking
voidPrefill = true;
const reasoningEffort = request.body.reasoning_effort;
- const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
+ const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
const minThinkTokens = 1024;
if (requestBody.max_tokens <= minThinkTokens) {
const newValue = requestBody.max_tokens + minThinkTokens;
@@ -340,6 +341,7 @@ async function sendMakerSuiteRequest(request, response) {
const stream = Boolean(request.body.stream);
const enableWebSearch = Boolean(request.body.enable_web_search);
const requestImages = Boolean(request.body.request_images);
+ const reasoningEffort = String(request.body.reasoning_effort);
const isThinking = model.includes('thinking');
const isGemma = model.includes('gemma');
@@ -412,11 +414,15 @@ async function sendMakerSuiteRequest(request, response) {
tools.push({ function_declarations: functionDeclarations });
}
- if ('enable_thinking' in request.body && 'thinking_budget' in request.body) {
- const thinkingEnabled = Boolean(request.body.enable_thinking);
- const thinkingBudget = Number(request.body.thinking_budget);
+ // One more models list to maintain, yay
+ const thinkingBudgetModels = [
+ 'gemini-2.5-flash-preview-04-17',
+ ];
- if (thinkingEnabled) {
+ if (thinkingBudgetModels.includes(model)) {
+ const thinkingBudget = calculateGoogleBudgetTokens(generationConfig.maxOutputTokens, reasoningEffort);
+
+ if (Number.isInteger(thinkingBudget)) {
generationConfig.thinkingConfig = { thinkingBudget: thinkingBudget };
}
}
diff --git a/src/prompt-converters.js b/src/prompt-converters.js
index 8ea3ab896..266047b8d 100644
--- a/src/prompt-converters.js
+++ b/src/prompt-converters.js
@@ -3,6 +3,15 @@ import { getConfigValue, tryParse } from './util.js';
const PROMPT_PLACEHOLDER = getConfigValue('promptPlaceholder', 'Let\'s get started.');
+const REASONING_EFFORT = {
+ auto: 'auto',
+ low: 'low',
+ medium: 'medium',
+ high: 'high',
+ min: 'min',
+ max: 'max',
+};
+
/**
* @typedef {object} PromptNames
* @property {string} charName Character name
@@ -944,25 +953,35 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
}
/**
- * Calculate the budget tokens for a given reasoning effort.
+ * Calculate the Claude budget tokens for a given reasoning effort.
* @param {number} maxTokens Maximum tokens
* @param {string} reasoningEffort Reasoning effort
* @param {boolean} stream If streaming is enabled
* @returns {number} Budget tokens
*/
-export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
+export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) {
let budgetTokens = 0;
switch (reasoningEffort) {
- case 'low':
+ // Claude doesn't have a default budget value. Use same as min.
+ case REASONING_EFFORT.auto:
+ budgetTokens = 1024;
+ break;
+ case REASONING_EFFORT.min:
+ budgetTokens = 1024;
+ break;
+ case REASONING_EFFORT.low:
budgetTokens = Math.floor(maxTokens * 0.1);
break;
- case 'medium':
+ case REASONING_EFFORT.medium:
budgetTokens = Math.floor(maxTokens * 0.25);
break;
- case 'high':
+ case REASONING_EFFORT.high:
budgetTokens = Math.floor(maxTokens * 0.5);
break;
+ case REASONING_EFFORT.max:
+ budgetTokens = maxTokens;
+ break;
}
budgetTokens = Math.max(budgetTokens, 1024);
@@ -973,3 +992,37 @@ export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
return budgetTokens;
}
+
+/**
+ * Calculate the Google budget tokens for a given reasoning effort.
+ * @param {number} maxTokens Maximum tokens
+ * @param {string} reasoningEffort Reasoning effort
+ * @returns {number?} Budget tokens
+ */
+export function calculateGoogleBudgetTokens(maxTokens, reasoningEffort) {
+ let budgetTokens = 0;
+
+ switch (reasoningEffort) {
+ case REASONING_EFFORT.auto:
+ return null;
+ case REASONING_EFFORT.min:
+ budgetTokens = 0;
+ break;
+ case REASONING_EFFORT.low:
+ budgetTokens = Math.floor(maxTokens * 0.1);
+ break;
+ case REASONING_EFFORT.medium:
+ budgetTokens = Math.floor(maxTokens * 0.25);
+ break;
+ case REASONING_EFFORT.high:
+ budgetTokens = Math.floor(maxTokens * 0.5);
+ break;
+ case REASONING_EFFORT.max:
+ budgetTokens = maxTokens;
+ break;
+ }
+
+ budgetTokens = Math.min(budgetTokens, 24576);
+
+ return budgetTokens;
+}