diff --git a/default/config.yaml b/default/config.yaml index e7fe7e196..00476a198 100644 --- a/default/config.yaml +++ b/default/config.yaml @@ -234,6 +234,10 @@ claude: # should be ideal for most use cases. # Any value other than a non-negative integer will be ignored and caching at depth will not be enabled. cachingAtDepth: -1 + # Use 1h TTL instead of the default 5m. + ## 5m: base price x 1.25 + ## 1h: base price x 2 + extendedTTL: false # -- GOOGLE GEMINI API CONFIGURATION -- gemini: # API endpoint version ("v1beta" or "v1alpha") diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index b357d3935..50ad3548b 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -154,6 +154,7 @@ async function sendClaudeRequest(request, response) { const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request)); const useThinking = /^claude-(3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.include_reasoning); const useWebSearch = /^claude-(3-5|3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.enable_web_search); + const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m'; let fixThinkingPrefill = false; // Add custom stop sequences const stopSequences = []; @@ -174,7 +175,7 @@ async function sendClaudeRequest(request, response) { }; if (useSystemPrompt) { if (enableSystemPromptCache && Array.isArray(convertedPrompt.systemPrompt) && convertedPrompt.systemPrompt.length) { - convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' }; + convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL }; } requestBody.system = convertedPrompt.systemPrompt; @@ -190,7 +191,7 @@ async function sendClaudeRequest(request, response) { .map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters })); if (enableSystemPromptCache && requestBody.tools.length) { - requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' }; + requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL }; } } @@ -203,7 +204,7 @@ async function sendClaudeRequest(request, response) { } if (cachingAtDepth !== -1) { - cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth); + cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth, cacheTTL); } if (enableSystemPromptCache || cachingAtDepth !== -1) { diff --git a/src/prompt-converters.js b/src/prompt-converters.js index d882f92e0..70a77101e 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -854,7 +854,13 @@ export function convertTextCompletionPrompt(messages) { return messageStrings.join('\n') + '\nassistant:'; } -export function cachingAtDepthForClaude(messages, cachingAtDepth) { +/** + * Append cache_control object to a Claude messages at depth. Directly modifies the messages array. + * @param {any[]} messages Messages to modify + * @param {number} cachingAtDepth Depth at which caching is supposed to occur + * @param {string} ttl TTL value + */ +export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) { let passedThePrefill = false; let depth = 0; let previousRoleName = ''; @@ -869,7 +875,7 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth) { if (messages[i].role !== previousRoleName) { if (depth === cachingAtDepth || depth === cachingAtDepth + 2) { const content = messages[i].content; - content[content.length - 1].cache_control = { type: 'ephemeral', ttl: '1h' }; + content[content.length - 1].cache_control = { type: 'ephemeral', ttl: ttl }; } if (depth === cachingAtDepth + 2) {