Claude 3.7 think mode

2025-06-05 21:59:27 +02:00 · 2025-02-24 23:43:13 +02:00
parent db148d5142
commit b8ebed0f4c
6 changed files with 75 additions and 8 deletions
--- a/public/index.html
+++ b/public/index.html
@@ -2000,7 +2000,7 @@
                                            </span>
                                        </div>
                                    </div>
-                                    <div class="range-block" data-source="deepseek,openrouter,custom">
+                                    <div class="range-block" data-source="deepseek,openrouter,custom,claude">
                                        <label for="openai_show_thoughts" class="checkbox_label widthFreeExpand">
                                            <input id="openai_show_thoughts" type="checkbox" />
                                            <span>
@@ -2014,10 +2014,11 @@
                                            </span>
                                        </div>
                                    </div>
-                                    <div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom">
+                                    <div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom,claude">
                                        <div class="flex-container oneline-dropdown" title="Constrains effort on reasoning for reasoning models.&#10;Currently supported values are low, medium, and high.&#10;Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response." data-i18n="[title]Constrains effort on reasoning for reasoning models.">
-                                            <label for="openai_reasoning_effort" data-i18n="Reasoning Effort">
-                                                Reasoning Effort
+                                            <label for="openai_reasoning_effort">
+                                                <span data-i18n="Reasoning Effort">Reasoning Effort</span>
+                                                <i data-source="claude" class="opacity50p fa-solid fa-circle-info" title="Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%)."></i>
                                            </label>
                                            <select id="openai_reasoning_effort">
                                                <option data-i18n="openai_reasoning_effort_low" value="low">Low</option>
--- a/public/script.js
+++ b/public/script.js
@@ -5725,7 +5725,7 @@ function extractMessageFromData(data) {
        case 'novel':
            return data.output;
        case 'openai':
-            return data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? '';
+            return data?.content?.find(p => p.type === 'text')?.text ?? data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? '';
        default:
            return '';
    }
--- a/public/scripts/openai.js
+++ b/public/scripts/openai.js
@@ -2149,6 +2149,9 @@ async function sendOpenAIRequest(type, messages, signal) {
 */
 function getStreamingReply(data, state) {
    if (oai_settings.chat_completion_source === chat_completion_sources.CLAUDE) {
+        if (oai_settings.show_thoughts) {
+            state.reasoning += data?.delta?.thinking || '';
+        }
        return data?.delta?.text || '';
    } else if (oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE) {
        if (oai_settings.show_thoughts) {
--- a/public/scripts/reasoning.js
+++ b/public/scripts/reasoning.js
@@ -76,6 +76,8 @@ export function extractReasoningFromData(data) {
                    return data?.choices?.[0]?.message?.reasoning ?? '';
                case chat_completion_sources.MAKERSUITE:
                    return data?.responseContent?.parts?.filter(part => part.thought)?.map(part => part.text)?.join('\n\n') ?? '';
+                case chat_completion_sources.CLAUDE:
+                    return data?.content?.find(part => part.type === 'thinking')?.thinking ?? '';
                case chat_completion_sources.CUSTOM: {
                    return data?.choices?.[0]?.message?.reasoning_content
                        ?? data?.choices?.[0]?.message?.reasoning
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@@ -28,6 +28,7 @@ import {
    cachingAtDepthForOpenRouterClaude,
    cachingAtDepthForClaude,
    getPromptNames,
+    calculateBudgetTokens,
 } from '../../prompt-converters.js';

 import { readSecret, SECRET_KEYS } from '../secrets.js';
@@ -129,6 +130,8 @@ async function sendClaudeRequest(request, response) {
        const useTools = request.body.model.startsWith('claude-3') && Array.isArray(request.body.tools) && request.body.tools.length > 0;
        const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
        const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
+        const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
+        let voidPrefill = false;
        // Add custom stop sequences
        const stopSequences = [];
        if (Array.isArray(request.body.stop)) {
@@ -163,9 +166,9 @@ async function sendClaudeRequest(request, response) {
                .map(tool => tool.function)
                .map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));

-            // Claude doesn't do prefills on function calls, and doesn't allow empty messages
-            if (requestBody.tools.length && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
-                convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
+            if (requestBody.tools.length) {
+                // No prefill when using tools
+                voidPrefill = true;
            }
            if (enableSystemPromptCache && requestBody.tools.length) {
                requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
@@ -180,6 +183,33 @@ async function sendClaudeRequest(request, response) {
            betaHeaders.push('prompt-caching-2024-07-31');
        }

+        if (useThinking) {
+            // No prefill when thinking
+            voidPrefill = true;
+            const reasoningEffort = request.body.reasoning_effort;
+            const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
+            const minThinkTokens = 1024;
+            if (requestBody.max_tokens <= minThinkTokens) {
+                const newValue = requestBody.max_tokens + minThinkTokens;
+                console.warn(color.yellow(`Claude thinking requires a minimum of ${minThinkTokens} response tokens.`));
+                console.info(color.blue(`Increasing response length to ${newValue}.`));
+                requestBody.max_tokens = newValue;
+            }
+            requestBody.thinking = {
+                type: 'enabled',
+                budget_tokens: budgetTokens,
+            };
+
+            // NO I CAN'T SILENTLY IGNORE THE TEMPERATURE.
+            delete requestBody.temperature;
+            delete requestBody.top_p;
+            delete requestBody.top_k;
+        }
+
+        if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
+            convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
+        }
+
        if (betaHeaders.length) {
            additionalHeaders['anthropic-beta'] = betaHeaders.join(',');
        }
--- a/src/prompt-converters.js
+++ b/src/prompt-converters.js
@@ -862,3 +862,34 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
        }
    }
 }
+
+/**
+ * Calculate the budget tokens for a given reasoning effort.
+ * @param {number} maxTokens Maximum tokens
+ * @param {string} reasoningEffort Reasoning effort
+ * @param {boolean} stream If streaming is enabled
+ * @returns {number} Budget tokens
+ */
+export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
+    let budgetTokens = 0;
+
+    switch (reasoningEffort) {
+        case 'low':
+            budgetTokens = Math.floor(maxTokens * 0.1);
+            break;
+        case 'medium':
+            budgetTokens = Math.floor(maxTokens * 0.25);
+            break;
+        case 'high':
+            budgetTokens = Math.floor(maxTokens * 0.5);
+            break;
+    }
+
+    budgetTokens = Math.max(budgetTokens, 1024);
+
+    if (!stream) {
+        budgetTokens = Math.min(budgetTokens, 21333);
+    }
+
+    return budgetTokens;
+}