Merge branch 'staging' into immutable-config

This commit is contained in:
Cohee
2025-02-25 22:08:35 +02:00
30 changed files with 839 additions and 470 deletions

View File

@@ -28,6 +28,7 @@ import {
cachingAtDepthForOpenRouterClaude,
cachingAtDepthForClaude,
getPromptNames,
calculateBudgetTokens,
} from '../../prompt-converters.js';
import { readSecret, SECRET_KEYS } from '../secrets.js';
@@ -125,9 +126,12 @@ async function sendClaudeRequest(request, response) {
controller.abort();
});
const additionalHeaders = {};
const betaHeaders = ['output-128k-2025-02-19'];
const useTools = request.body.model.startsWith('claude-3') && Array.isArray(request.body.tools) && request.body.tools.length > 0;
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
const useThinking = request.body.model.startsWith('claude-3-7') && Boolean(request.body.include_reasoning);
let voidPrefill = false;
// Add custom stop sequences
const stopSequences = [];
if (Array.isArray(request.body.stop)) {
@@ -155,16 +159,16 @@ async function sendClaudeRequest(request, response) {
delete requestBody.system;
}
if (useTools) {
additionalHeaders['anthropic-beta'] = 'tools-2024-05-16';
betaHeaders.push('tools-2024-05-16');
requestBody.tool_choice = { type: request.body.tool_choice };
requestBody.tools = request.body.tools
.filter(tool => tool.type === 'function')
.map(tool => tool.function)
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
if (requestBody.tools.length && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
if (requestBody.tools.length) {
// No prefill when using tools
voidPrefill = true;
}
if (enableSystemPromptCache && requestBody.tools.length) {
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
@@ -176,7 +180,38 @@ async function sendClaudeRequest(request, response) {
}
if (enableSystemPromptCache || cachingAtDepth !== -1) {
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
betaHeaders.push('prompt-caching-2024-07-31');
}
if (useThinking) {
// No prefill when thinking
voidPrefill = true;
const reasoningEffort = request.body.reasoning_effort;
const budgetTokens = calculateBudgetTokens(requestBody.max_tokens, reasoningEffort, requestBody.stream);
const minThinkTokens = 1024;
if (requestBody.max_tokens <= minThinkTokens) {
const newValue = requestBody.max_tokens + minThinkTokens;
console.warn(color.yellow(`Claude thinking requires a minimum of ${minThinkTokens} response tokens.`));
console.info(color.blue(`Increasing response length to ${newValue}.`));
requestBody.max_tokens = newValue;
}
requestBody.thinking = {
type: 'enabled',
budget_tokens: budgetTokens,
};
// NO I CAN'T SILENTLY IGNORE THE TEMPERATURE.
delete requestBody.temperature;
delete requestBody.top_p;
delete requestBody.top_k;
}
if (voidPrefill && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] });
}
if (betaHeaders.length) {
additionalHeaders['anthropic-beta'] = betaHeaders.join(',');
}
console.debug('Claude request:', requestBody);
@@ -979,6 +1014,7 @@ router.post('/generate', jsonParser, function (request, response) {
headers = { ...OPENROUTER_HEADERS };
bodyParams = {
'transforms': getOpenRouterTransforms(request),
'include_reasoning': Boolean(request.body.include_reasoning),
};
if (request.body.min_p !== undefined) {
@@ -1004,10 +1040,6 @@ router.post('/generate', jsonParser, function (request, response) {
bodyParams['route'] = 'fallback';
}
if (request.body.include_reasoning) {
bodyParams['include_reasoning'] = true;
}
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model?.startsWith('anthropic/claude-3')) {
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);

View File

@@ -862,3 +862,34 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
}
}
}
/**
* Calculate the budget tokens for a given reasoning effort.
* @param {number} maxTokens Maximum tokens
* @param {string} reasoningEffort Reasoning effort
* @param {boolean} stream If streaming is enabled
* @returns {number} Budget tokens
*/
export function calculateBudgetTokens(maxTokens, reasoningEffort, stream) {
let budgetTokens = 0;
switch (reasoningEffort) {
case 'low':
budgetTokens = Math.floor(maxTokens * 0.1);
break;
case 'medium':
budgetTokens = Math.floor(maxTokens * 0.25);
break;
case 'high':
budgetTokens = Math.floor(maxTokens * 0.5);
break;
}
budgetTokens = Math.max(budgetTokens, 1024);
if (!stream) {
budgetTokens = Math.min(budgetTokens, 21333);
}
return budgetTokens;
}