mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-03-13 10:30:14 +01:00
Claude 3.7 think mode
This commit is contained in:
parent
db148d5142
commit
b8ebed0f4c
@ -2000,7 +2000,7 @@
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="range-block" data-source="deepseek,openrouter,custom">
|
||||
<div class="range-block" data-source="deepseek,openrouter,custom,claude">
|
||||
<label for="openai_show_thoughts" class="checkbox_label widthFreeExpand">
|
||||
<input id="openai_show_thoughts" type="checkbox" />
|
||||
<span>
|
||||
@ -2014,10 +2014,11 @@
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom">
|
||||
<div class="flex-container flexFlowColumn wide100p textAlignCenter marginTop10" data-source="openai,custom,claude">
|
||||
<div class="flex-container oneline-dropdown" title="Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response." data-i18n="[title]Constrains effort on reasoning for reasoning models.">
|
||||
<label for="openai_reasoning_effort" data-i18n="Reasoning Effort">
|
||||
Reasoning Effort
|
||||
<label for="openai_reasoning_effort">
|
||||
<span data-i18n="Reasoning Effort">Reasoning Effort</span>
|
||||
<i data-source="claude" class="opacity50p fa-solid fa-circle-info" title="Allocates a portion of the response length for thinking (low: 10%, medium: 25%, high: 50%)."></i>
|
||||
</label>
|
||||
<select id="openai_reasoning_effort">
|
||||
<option data-i18n="openai_reasoning_effort_low" value="low">Low</option>
|
||||
|
@ -5725,7 +5725,7 @@ function extractMessageFromData(data) {
|
||||
case 'novel':
|
||||
return data.output;
|
||||
case 'openai':
|
||||
return data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? '';
|
||||
return data?.content?.find(p => p.type === 'text')?.text ?? data?.choices?.[0]?.message?.content ?? data?.choices?.[0]?.text ?? data?.text ?? data?.message?.content?.[0]?.text ?? data?.message?.tool_plan ?? '';
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
|
@ -2149,6 +2149,9 @@ async function sendOpenAIRequest(type, messages, signal) {
|
||||
*/
|
||||
function getStreamingReply(data, state) {
|
||||
if (oai_settings.chat_completion_source === chat_completion_sources.CLAUDE) {
|
||||
if (oai_settings.show_thoughts) {
|
||||
state.reasoning += data?.delta?.thinking || '';
|
||||
}
|
||||
return data?.delta?.text || '';
|
||||
} else if (oai_settings.chat_completion_source === chat_completion_sources.MAKERSUITE) {
|
||||
if (oai_settings.show_thoughts) {
|
||||
|
@ -76,6 +76,8 @@ export function extractReasoningFromData(data) {
|
||||
return data?.choices?.[0]?.message?.reasoning ?? '';
|
||||
case chat_completion_sources.MAKERSUITE:
|
||||
return data?.responseContent?.parts?.filter(part => part.thought)?.map(part => part.text)?.join('\n\n') ?? '';
|
||||
case chat_completion_sources.CLAUDE:
|
||||
return data?.content?.find(part => part.type === 'thinking')?.thinking ?? '';
|
||||
case chat_completion_sources.CUSTOM: {
|
||||
return data?.choices?.[0]?.message?.reasoning_content
|
||||
?? data?.choices?.[0]?.message?.reasoning
|
||||
|
@ -28,6 +28,7 @@ import {
|
||||
cachingAtDepthForOpenRouterClaude,
|
||||
cachingAtDepthForClaude,
|
||||
getPromptNames,
|
||||
calculateBudgetTokens,
|
||||
} from '../../prompt-converters.js';
|
||||
|
||||
import { readSecret, SECRET_KEYS } from '../secrets.js';
|
||||
@ -129,6 +130,8 @@ async function sendClaudeRequest(request, response) {
|
||||
const useTools = request.body.model.startsWith('claude-3') && Array.isArray(request.body.tools) && request.body.tools.length > 0;
|
||||
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
|
||||
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
|
||||
const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
|
||||
let voidPrefill = false;
|
||||
// Add custom stop sequences
|
||||
const stopSequences = [];
|
||||
if (Array.isArray(request.body.stop)) {
|
||||
@ -163,9 +166,9 @@ async function sendClaudeRequest(request, response) {
|
||||
.map(tool => tool.function)
|
||||
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
|
||||
|
||||
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
|
||||
if (requestBody.tools.length && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
|
||||
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
|
||||
if (requestBody.tools.length) {
|
||||
// No prefill when using tools
|
||||
voidPrefill = true;
|
||||
}
|
||||
if (enableSystemPromptCache && requestBody.tools.length) {
|
||||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
||||
@ -180,6 +183,33 @@ async function sendClaudeRequest(request, response) {
|
||||
betaHeaders.push('prompt-caching-2024-07-31');
|
||||
}
|
||||
|
||||
if (useThinking) {
|
||||
// No prefill when thinking
|
||||
voidPrefill = true;
|
||||
const reasoningEffort = request.body.reasoning_effort;
|
||||
const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
|
||||
const minThinkTokens = 1024;
|
||||
if (requestBody.max_tokens <= minThinkTokens) {
|
||||
const newValue = requestBody.max_tokens + minThinkTokens;
|
||||
console.warn(color.yellow(`Claude thinking requires a minimum of ${minThinkTokens} response tokens.`));
|
||||
console.info(color.blue(`Increasing response length to ${newValue}.`));
|
||||
requestBody.max_tokens = newValue;
|
||||
}
|
||||
requestBody.thinking = {
|
||||
type: 'enabled',
|
||||
budget_tokens: budgetTokens,
|
||||
};
|
||||
|
||||
// NO I CAN'T SILENTLY IGNORE THE TEMPERATURE.
|
||||
delete requestBody.temperature;
|
||||
delete requestBody.top_p;
|
||||
delete requestBody.top_k;
|
||||
}
|
||||
|
||||
if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
|
||||
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
|
||||
}
|
||||
|
||||
if (betaHeaders.length) {
|
||||
additionalHeaders['anthropic-beta'] = betaHeaders.join(',');
|
||||
}
|
||||
|
@ -862,3 +862,34 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the budget tokens for a given reasoning effort.
|
||||
* @param {number} maxTokens Maximum tokens
|
||||
* @param {string} reasoningEffort Reasoning effort
|
||||
* @param {boolean} stream If streaming is enabled
|
||||
* @returns {number} Budget tokens
|
||||
*/
|
||||
export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
|
||||
let budgetTokens = 0;
|
||||
|
||||
switch (reasoningEffort) {
|
||||
case 'low':
|
||||
budgetTokens = Math.floor(maxTokens * 0.1);
|
||||
break;
|
||||
case 'medium':
|
||||
budgetTokens = Math.floor(maxTokens * 0.25);
|
||||
break;
|
||||
case 'high':
|
||||
budgetTokens = Math.floor(maxTokens * 0.5);
|
||||
break;
|
||||
}
|
||||
|
||||
budgetTokens = Math.max(budgetTokens, 1024);
|
||||
|
||||
if (!stream) {
|
||||
budgetTokens = Math.min(budgetTokens, 21333);
|
||||
}
|
||||
|
||||
return budgetTokens;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user