diff --git a/public/index.html b/public/index.html index 49809c9dc..f708229d4 100644 --- a/public/index.html +++ b/public/index.html @@ -2048,16 +2048,20 @@ -
-
+
+
diff --git a/public/script.js b/public/script.js index f2efda2b5..ca5f87850 100644 --- a/public/script.js +++ b/public/script.js @@ -6955,15 +6955,23 @@ export async function saveChat({ chatName, withMetadata, mesId, force = false } throw new Error(result.statusText); } - const forceSaveConfirmed = await Popup.show.confirm( - t`ERROR: Chat integrity check failed.`, - t`Continuing the operation may result in data loss. Would you like to overwrite the chat file anyway? Pressing "NO" will cancel the save operation.`, - { okButton: t`Yes, overwrite`, cancelButton: t`No, cancel` }, - ) === POPUP_RESULT.AFFIRMATIVE; + const popupResult = await Popup.show.input( + t`ERROR: Chat integrity check failed while saving the file.`, + t`

After you click OK, the page will be reloaded to prevent data corruption.

+

To confirm an overwrite (and potentially LOSE YOUR DATA), enter OVERWRITE (in all caps) in the box below before clicking OK.

`, + '', + { okButton: 'OK', cancelButton: false }, + ); - if (forceSaveConfirmed) { - await saveChat({ chatName, withMetadata, mesId, force: true }); + const forceSaveConfirmed = popupResult === 'OVERWRITE'; + + if (!forceSaveConfirmed) { + console.warn('Chat integrity check failed, and user did not confirm the overwrite. Reloading the page.'); + window.location.reload(); + return; } + + await saveChat({ chatName, withMetadata, mesId, force: true }); } catch (error) { console.error(error); toastr.error(t`Check the server connection and reload the page to prevent data loss.`, t`Chat could not be saved`); diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 02ab85b2b..2eb809a92 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -216,6 +216,15 @@ const openrouter_middleout_types = { OFF: 'off', }; +export const reasoning_effort_types = { + auto: 'auto', + low: 'low', + medium: 'medium', + high: 'high', + min: 'min', + max: 'max', +}; + const sensitiveFields = [ 'reverse_proxy', 'proxy_password', @@ -382,7 +391,7 @@ const default_settings = { continue_postfix: continue_postfix_types.SPACE, custom_prompt_post_processing: custom_prompt_post_processing_types.NONE, show_thoughts: true, - reasoning_effort: 'medium', + reasoning_effort: reasoning_effort_types.auto, enable_web_search: false, request_images: false, seed: -1, @@ -463,7 +472,7 @@ const oai_settings = { continue_postfix: continue_postfix_types.SPACE, custom_prompt_post_processing: custom_prompt_post_processing_types.NONE, show_thoughts: true, - reasoning_effort: 'medium', + reasoning_effort: reasoning_effort_types.auto, enable_web_search: false, request_images: false, seed: -1, @@ -1937,6 +1946,31 @@ async function sendAltScaleRequest(messages, logit_bias, signal, type) { return data.output; } +function getReasoningEffort() { + // These sources expect the effort as string. + const reasoningEffortSources = [ + chat_completion_sources.OPENAI, + chat_completion_sources.CUSTOM, + chat_completion_sources.XAI, + chat_completion_sources.OPENROUTER, + ]; + + if (!reasoningEffortSources.includes(oai_settings.chat_completion_source)) { + return oai_settings.reasoning_effort; + } + + switch (oai_settings.reasoning_effort) { + case reasoning_effort_types.auto: + return undefined; + case reasoning_effort_types.min: + return reasoning_effort_types.low; + case reasoning_effort_types.max: + return reasoning_effort_types.high; + default: + return oai_settings.reasoning_effort; + } +} + /** * Send a chat completion request to backend * @param {string} type (impersonate, quiet, continue, etc) @@ -2023,7 +2057,7 @@ async function sendOpenAIRequest(type, messages, signal) { 'char_name': name2, 'group_names': getGroupNames(), 'include_reasoning': Boolean(oai_settings.show_thoughts), - 'reasoning_effort': String(oai_settings.reasoning_effort), + 'reasoning_effort': getReasoningEffort(), 'enable_web_search': Boolean(oai_settings.enable_web_search), 'request_images': Boolean(oai_settings.request_images), 'custom_prompt_post_processing': oai_settings.custom_prompt_post_processing, diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 34a7c6bdd..e61dd16bf 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -28,7 +28,8 @@ import { cachingAtDepthForOpenRouterClaude, cachingAtDepthForClaude, getPromptNames, - calculateBudgetTokens, + calculateClaudeBudgetTokens, + calculateGoogleBudgetTokens, } from '../../prompt-converters.js'; import { readSecret, SECRET_KEYS } from '../secrets.js'; @@ -202,7 +203,7 @@ async function sendClaudeRequest(request, response) { // No prefill when thinking voidPrefill = true; const reasoningEffort = request.body.reasoning_effort; - const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); + const budgetTokens = calculateClaudeBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream); const minThinkTokens = 1024; if (requestBody.max_tokens <= minThinkTokens) { const newValue = requestBody.max_tokens + minThinkTokens; @@ -340,6 +341,7 @@ async function sendMakerSuiteRequest(request, response) { const stream = Boolean(request.body.stream); const enableWebSearch = Boolean(request.body.enable_web_search); const requestImages = Boolean(request.body.request_images); + const reasoningEffort = String(request.body.reasoning_effort); const isThinking = model.includes('thinking'); const isGemma = model.includes('gemma'); @@ -412,13 +414,26 @@ async function sendMakerSuiteRequest(request, response) { tools.push({ function_declarations: functionDeclarations }); } + // One more models list to maintain, yay + const thinkingBudgetModels = [ + 'gemini-2.5-flash-preview-04-17', + ]; + + if (thinkingBudgetModels.includes(model)) { + const thinkingBudget = calculateGoogleBudgetTokens(generationConfig.maxOutputTokens, reasoningEffort); + + if (Number.isInteger(thinkingBudget)) { + generationConfig.thinkingConfig = { thinkingBudget: thinkingBudget }; + } + } + let body = { contents: prompt.contents, safetySettings: safetySettings, generationConfig: generationConfig, }; - if (useSystemPrompt) { + if (useSystemPrompt && Array.isArray(prompt.system_instruction.parts) && prompt.system_instruction.parts.length) { body.systemInstruction = prompt.system_instruction; } @@ -868,7 +883,7 @@ async function sendXaiRequest(request, response) { bodyParams['stop'] = request.body.stop; } - if (['grok-3-mini-beta', 'grok-3-mini-fast-beta'].includes(request.body.model)) { + if (request.body.reasoning_effort && ['grok-3-mini-beta', 'grok-3-mini-fast-beta'].includes(request.body.model)) { bodyParams['reasoning_effort'] = request.body.reasoning_effort === 'high' ? 'high' : 'low'; } @@ -1210,6 +1225,10 @@ router.post('/generate', function (request, response) { bodyParams['route'] = 'fallback'; } + if (request.body.reasoning_effort) { + bodyParams['reasoning'] = { effort: request.body.reasoning_effort }; + } + let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number'); if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model?.startsWith('anthropic/claude-3')) { cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth); @@ -1258,7 +1277,7 @@ router.post('/generate', function (request, response) { } // A few of OpenAIs reasoning models support reasoning effort - if ([CHAT_COMPLETION_SOURCES.CUSTOM, CHAT_COMPLETION_SOURCES.OPENAI].includes(request.body.chat_completion_source)) { + if (request.body.reasoning_effort && [CHAT_COMPLETION_SOURCES.CUSTOM, CHAT_COMPLETION_SOURCES.OPENAI].includes(request.body.chat_completion_source)) { if (['o1', 'o3-mini', 'o3-mini-2025-01-31', 'o4-mini', 'o4-mini-2025-04-16', 'o3', 'o3-2025-04-16'].includes(request.body.model)) { bodyParams['reasoning_effort'] = request.body.reasoning_effort; } diff --git a/src/prompt-converters.js b/src/prompt-converters.js index 54b0f4df1..75ee89cc8 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -3,6 +3,15 @@ import { getConfigValue, tryParse } from './util.js'; const PROMPT_PLACEHOLDER = getConfigValue('promptPlaceholder', 'Let\'s get started.'); +const REASONING_EFFORT = { + auto: 'auto', + low: 'low', + medium: 'medium', + high: 'high', + min: 'min', + max: 'max', +}; + /** * @typedef {object} PromptNames * @property {string} charName Character name @@ -356,7 +365,7 @@ export function convertCohereMessages(messages, names) { * @param {string} model Model name * @param {boolean} useSysPrompt Use system prompt * @param {PromptNames} names Prompt names - * @returns {{contents: *[], system_instruction: {parts: {text: string}}}} Prompt for Google MakerSuite models + * @returns {{contents: *[], system_instruction: {parts: {text: string}[]}}} Prompt for Google MakerSuite models */ export function convertGooglePrompt(messages, model, useSysPrompt, names) { const visionSupportedModelPrefix = [ @@ -369,8 +378,8 @@ export function convertGooglePrompt(messages, model, useSysPrompt, names) { ]; const isMultimodal = visionSupportedModelPrefix.some(prefix => model.startsWith(prefix)); + const sysPrompt = []; - let sys_prompt = ''; if (useSysPrompt) { while (messages.length > 1 && messages[0].role === 'system') { // Append example names if not already done by the frontend (e.g. for group chats). @@ -384,12 +393,12 @@ export function convertGooglePrompt(messages, model, useSysPrompt, names) { messages[0].content = `${names.charName}: ${messages[0].content}`; } } - sys_prompt += `${messages[0].content}\n\n`; + sysPrompt.push(messages[0].content); messages.shift(); } } - const system_instruction = { parts: [{ text: sys_prompt.trim() }] }; + const system_instruction = { parts: sysPrompt.map(text => ({ text })) }; const toolNameMap = {}; const contents = []; @@ -919,25 +928,32 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) { } /** - * Calculate the budget tokens for a given reasoning effort. + * Calculate the Claude budget tokens for a given reasoning effort. * @param {number} maxTokens Maximum tokens * @param {string} reasoningEffort Reasoning effort * @param {boolean} stream If streaming is enabled * @returns {number} Budget tokens */ -export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) { +export function calculateClaudeBudgetTokens(maxTokens, reasoningEffort, stream) { let budgetTokens = 0; switch (reasoningEffort) { - case 'low': + case REASONING_EFFORT.min: + budgetTokens = 1024; + break; + case REASONING_EFFORT.low: budgetTokens = Math.floor(maxTokens * 0.1); break; - case 'medium': + case REASONING_EFFORT.auto: + case REASONING_EFFORT.medium: budgetTokens = Math.floor(maxTokens * 0.25); break; - case 'high': + case REASONING_EFFORT.high: budgetTokens = Math.floor(maxTokens * 0.5); break; + case REASONING_EFFORT.max: + budgetTokens = Math.floor(maxTokens * 0.95); + break; } budgetTokens = Math.max(budgetTokens, 1024); @@ -948,3 +964,37 @@ export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) { return budgetTokens; } + +/** + * Calculate the Google budget tokens for a given reasoning effort. + * @param {number} maxTokens Maximum tokens + * @param {string} reasoningEffort Reasoning effort + * @returns {number?} Budget tokens + */ +export function calculateGoogleBudgetTokens(maxTokens, reasoningEffort) { + let budgetTokens = 0; + + switch (reasoningEffort) { + case REASONING_EFFORT.auto: + return null; + case REASONING_EFFORT.min: + budgetTokens = 0; + break; + case REASONING_EFFORT.low: + budgetTokens = Math.floor(maxTokens * 0.1); + break; + case REASONING_EFFORT.medium: + budgetTokens = Math.floor(maxTokens * 0.25); + break; + case REASONING_EFFORT.high: + budgetTokens = Math.floor(maxTokens * 0.5); + break; + case REASONING_EFFORT.max: + budgetTokens = maxTokens; + break; + } + + budgetTokens = Math.min(budgetTokens, 24576); + + return budgetTokens; +}