mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Merge branch 'staging' into immutable-config
This commit is contained in:
@@ -28,6 +28,7 @@ import {
|
||||
cachingAtDepthForOpenRouterClaude,
|
||||
cachingAtDepthForClaude,
|
||||
getPromptNames,
|
||||
calculateBudgetTokens,
|
||||
} from '../../prompt-converters.js';
|
||||
|
||||
import { readSecret, SECRET_KEYS } from '../secrets.js';
|
||||
@@ -125,9 +126,12 @@ async function sendClaudeRequest(request, response) {
|
||||
controller.abort();
|
||||
});
|
||||
const additionalHeaders = {};
|
||||
const betaHeaders = ['output-128k-2025-02-19'];
|
||||
const useTools = request.body.model.startsWith('claude-3') && Array.isArray(request.body.tools) && request.body.tools.length > 0;
|
||||
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
|
||||
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
|
||||
const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
|
||||
let voidPrefill = false;
|
||||
// Add custom stop sequences
|
||||
const stopSequences = [];
|
||||
if (Array.isArray(request.body.stop)) {
|
||||
@@ -155,16 +159,16 @@ async function sendClaudeRequest(request, response) {
|
||||
delete requestBody.system;
|
||||
}
|
||||
if (useTools) {
|
||||
additionalHeaders['anthropic-beta'] = 'tools-2024-05-16';
|
||||
betaHeaders.push('tools-2024-05-16');
|
||||
requestBody.tool_choice = { type: request.body.tool_choice };
|
||||
requestBody.tools = request.body.tools
|
||||
.filter(tool => tool.type === 'function')
|
||||
.map(tool => tool.function)
|
||||
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
|
||||
|
||||
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
|
||||
if (requestBody.tools.length && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
|
||||
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
|
||||
if (requestBody.tools.length) {
|
||||
// No prefill when using tools
|
||||
voidPrefill = true;
|
||||
}
|
||||
if (enableSystemPromptCache && requestBody.tools.length) {
|
||||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
||||
@@ -176,7 +180,38 @@ async function sendClaudeRequest(request, response) {
|
||||
}
|
||||
|
||||
if (enableSystemPromptCache || cachingAtDepth !== -1) {
|
||||
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
||||
betaHeaders.push('prompt-caching-2024-07-31');
|
||||
}
|
||||
|
||||
if (useThinking) {
|
||||
// No prefill when thinking
|
||||
voidPrefill = true;
|
||||
const reasoningEffort = request.body.reasoning_effort;
|
||||
const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
|
||||
const minThinkTokens = 1024;
|
||||
if (requestBody.max_tokens <= minThinkTokens) {
|
||||
const newValue = requestBody.max_tokens + minThinkTokens;
|
||||
console.warn(color.yellow(`Claude thinking requires a minimum of ${minThinkTokens} response tokens.`));
|
||||
console.info(color.blue(`Increasing response length to ${newValue}.`));
|
||||
requestBody.max_tokens = newValue;
|
||||
}
|
||||
requestBody.thinking = {
|
||||
type: 'enabled',
|
||||
budget_tokens: budgetTokens,
|
||||
};
|
||||
|
||||
// NO I CAN'T SILENTLY IGNORE THE TEMPERATURE.
|
||||
delete requestBody.temperature;
|
||||
delete requestBody.top_p;
|
||||
delete requestBody.top_k;
|
||||
}
|
||||
|
||||
if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
|
||||
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
|
||||
}
|
||||
|
||||
if (betaHeaders.length) {
|
||||
additionalHeaders['anthropic-beta'] = betaHeaders.join(',');
|
||||
}
|
||||
|
||||
console.debug('Claude request:', requestBody);
|
||||
@@ -979,6 +1014,7 @@ router.post('/generate', jsonParser, function (request, response) {
|
||||
headers = { ...OPENROUTER_HEADERS };
|
||||
bodyParams = {
|
||||
'transforms': getOpenRouterTransforms(request),
|
||||
'include_reasoning': Boolean(request.body.include_reasoning),
|
||||
};
|
||||
|
||||
if (request.body.min_p !== undefined) {
|
||||
@@ -1004,10 +1040,6 @@ router.post('/generate', jsonParser, function (request, response) {
|
||||
bodyParams['route'] = 'fallback';
|
||||
}
|
||||
|
||||
if (request.body.include_reasoning) {
|
||||
bodyParams['include_reasoning'] = true;
|
||||
}
|
||||
|
||||
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
|
||||
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model?.startsWith('anthropic/claude-3')) {
|
||||
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);
|
||||
|
@@ -862,3 +862,34 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the budget tokens for a given reasoning effort.
|
||||
* @param {number} maxTokens Maximum tokens
|
||||
* @param {string} reasoningEffort Reasoning effort
|
||||
* @param {boolean} stream If streaming is enabled
|
||||
* @returns {number} Budget tokens
|
||||
*/
|
||||
export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
|
||||
let budgetTokens = 0;
|
||||
|
||||
switch (reasoningEffort) {
|
||||
case 'low':
|
||||
budgetTokens = Math.floor(maxTokens * 0.1);
|
||||
break;
|
||||
case 'medium':
|
||||
budgetTokens = Math.floor(maxTokens * 0.25);
|
||||
break;
|
||||
case 'high':
|
||||
budgetTokens = Math.floor(maxTokens * 0.5);
|
||||
break;
|
||||
}
|
||||
|
||||
budgetTokens = Math.max(budgetTokens, 1024);
|
||||
|
||||
if (!stream) {
|
||||
budgetTokens = Math.min(budgetTokens, 21333);
|
||||
}
|
||||
|
||||
return budgetTokens;
|
||||
}
|
||||
|
Reference in New Issue
Block a user