Claude 3.7 think mode

This commit is contained in:
Cohee 2025-02-24 23:43:13 +02:00
parent db148d5142
commit b8ebed0f4c
6 changed files with 75 additions and 8 deletions

View File

@ -2000,7 +2000,7 @@
</span>
</div>
</div>
<div class="range-block" data-source="deepseek,openrouter,custom">
<div class="range-block" data-source="deepseek,openrouter,custom,claude">
<label for="openai_show_thoughts" class="checkbox_label widthFreeExpand">
<input id="openai_show_thoughts" type="checkbox" />
<span>
@ -2014,10 +2014,11 @@
</span>
</div>
</div>
<div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom">
<div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom,claude">
<div class="flex-container oneline-dropdown" title="Constrains effort on reasoning for reasoning models.&#10;Currently supported values are low, medium, and high.&#10;Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response." data-i18n="[title]Constrains effort on reasoning for reasoning models.">
<label for="openai_reasoning_effort" data-i18n="Reasoning Effort">
Reasoning Effort
<label for="openai_reasoning_effort">
<span data-i18n="Reasoning Effort">Reasoning Effort</span>
<i data-source="claude" class="opacity50p fa-solid fa-circle-info" title="Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%)."></i>
</label>
<select id="openai_reasoning_effort">
<option data-i18n="openai_reasoning_effort_low" value="low">Low</option>

View File

@ -5725,7 +5725,7 @@ function extractMessageFromData(data) {
case 'novel':
return data.output;
case 'openai':
return data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? '';
return data?.content?.find(p => p.type === 'text')?.text ?? data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? '';
default:
return '';
}

View File

@ -2149,6 +2149,9 @@ async function sendOpenAIRequest(type, messages, signal) {
*/
function getStreamingReply(data, state) {
if (oai_settings.chat_completion_source === chat_completion_sources.CLAUDE) {
if (oai_settings.show_thoughts) {
state.reasoning += data?.delta?.thinking || '';
}
return data?.delta?.text || '';
} else if (oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE) {
if (oai_settings.show_thoughts) {

View File

@ -76,6 +76,8 @@ export function extractReasoningFromData(data) {
return data?.choices?.[0]?.message?.reasoning ?? '';
case chat_completion_sources.MAKERSUITE:
return data?.responseContent?.parts?.filter(part => part.thought)?.map(part => part.text)?.join('\n\n') ?? '';
case chat_completion_sources.CLAUDE:
return data?.content?.find(part => part.type === 'thinking')?.thinking ?? '';
case chat_completion_sources.CUSTOM: {
return data?.choices?.[0]?.message?.reasoning_content
?? data?.choices?.[0]?.message?.reasoning

View File

@ -28,6 +28,7 @@ import {
cachingAtDepthForOpenRouterClaude,
cachingAtDepthForClaude,
getPromptNames,
calculateBudgetTokens,
} from '../../prompt-converters.js';
import { readSecret, SECRET_KEYS } from '../secrets.js';
@ -129,6 +130,8 @@ async function sendClaudeRequest(request, response) {
const useTools = request.body.model.startsWith('claude-3') && Array.isArray(request.body.tools) && request.body.tools.length > 0;
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
let voidPrefill = false;
// Add custom stop sequences
const stopSequences = [];
if (Array.isArray(request.body.stop)) {
@ -163,9 +166,9 @@ async function sendClaudeRequest(request, response) {
.map(tool => tool.function)
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
if (requestBody.tools.length && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
if (requestBody.tools.length) {
// No prefill when using tools
voidPrefill = true;
}
if (enableSystemPromptCache && requestBody.tools.length) {
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
@ -180,6 +183,33 @@ async function sendClaudeRequest(request, response) {
betaHeaders.push('prompt-caching-2024-07-31');
}
if (useThinking) {
// No prefill when thinking
voidPrefill = true;
const reasoningEffort = request.body.reasoning_effort;
const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
const minThinkTokens = 1024;
if (requestBody.max_tokens <= minThinkTokens) {
const newValue = requestBody.max_tokens + minThinkTokens;
console.warn(color.yellow(`Claude thinking requires a minimum of ${minThinkTokens} response tokens.`));
console.info(color.blue(`Increasing response length to ${newValue}.`));
requestBody.max_tokens = newValue;
}
requestBody.thinking = {
type: 'enabled',
budget_tokens: budgetTokens,
};
// NO I CAN'T SILENTLY IGNORE THE TEMPERATURE.
delete requestBody.temperature;
delete requestBody.top_p;
delete requestBody.top_k;
}
if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
}
if (betaHeaders.length) {
additionalHeaders['anthropic-beta'] = betaHeaders.join(',');
}

View File

@ -862,3 +862,34 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
}
}
}
/**
* Calculate the budget tokens for a given reasoning effort.
* @param {number} maxTokens Maximum tokens
* @param {string} reasoningEffort Reasoning effort
* @param {boolean} stream If streaming is enabled
* @returns {number} Budget tokens
*/
export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
let budgetTokens = 0;
switch (reasoningEffort) {
case 'low':
budgetTokens = Math.floor(maxTokens * 0.1);
break;
case 'medium':
budgetTokens = Math.floor(maxTokens * 0.25);
break;
case 'high':
budgetTokens = Math.floor(maxTokens * 0.5);
break;
}
budgetTokens = Math.max(budgetTokens, 1024);
if (!stream) {
budgetTokens = Math.min(budgetTokens, 21333);
}
return budgetTokens;
}