Claude: control cache TTL with config

2025-06-05 21:59:27 +02:00 · 2025-05-23 21:40:40 +03:00
parent ed2e6fff6e
commit 560c6e8ff1
3 changed files with 16 additions and 5 deletions
--- a/default/config.yaml
+++ b/default/config.yaml
@@ -234,6 +234,10 @@ claude:
  # should be ideal for most use cases.
  # Any value other than a non-negative integer will be ignored and caching at depth will not be enabled.
  cachingAtDepth: -1
+  # Use 1h TTL instead of the default 5m.
+  ## 5m: base price x 1.25
+  ## 1h: base price x 2
+  extendedTTL: false
 # -- GOOGLE GEMINI API CONFIGURATION --
 gemini:
  # API endpoint version ("v1beta" or "v1alpha")
--- a/src/endpoints/backends/chat-completions.js
+++ b/src/endpoints/backends/chat-completions.js
@@ -154,6 +154,7 @@ async function sendClaudeRequest(request, response) {
        const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
        const useThinking = /^claude-(3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.include_reasoning);
        const useWebSearch = /^claude-(3-5|3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.enable_web_search);
+        const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
        let fixThinkingPrefill = false;
        // Add custom stop sequences
        const stopSequences = [];
@@ -174,7 +175,7 @@ async function sendClaudeRequest(request, response) {
        };
        if (useSystemPrompt) {
            if (enableSystemPromptCache && Array.isArray(convertedPrompt.systemPrompt) && convertedPrompt.systemPrompt.length) {
-                convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' };
+                convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL };
            }

            requestBody.system = convertedPrompt.systemPrompt;
@@ -190,7 +191,7 @@ async function sendClaudeRequest(request, response) {
                .map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));

            if (enableSystemPromptCache && requestBody.tools.length) {
-                requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' };
+                requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL };
            }
        }

@@ -203,7 +204,7 @@ async function sendClaudeRequest(request, response) {
        }

        if (cachingAtDepth !== -1) {
-            cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
+            cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth, cacheTTL);
        }

        if (enableSystemPromptCache || cachingAtDepth !== -1) {
--- a/src/prompt-converters.js
+++ b/src/prompt-converters.js
@@ -854,7 +854,13 @@ export function convertTextCompletionPrompt(messages) {
    return messageStrings.join('\n') + '\nassistant:';
 }

-export function cachingAtDepthForClaude(messages, cachingAtDepth) {
+/**
+ * Append cache_control object to a Claude messages at depth. Directly modifies the messages array.
+ * @param {any[]} messages Messages to modify
+ * @param {number} cachingAtDepth Depth at which caching is supposed to occur
+ * @param {string} ttl TTL value
+ */
+export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) {
    let passedThePrefill = false;
    let depth = 0;
    let previousRoleName = '';
@@ -869,7 +875,7 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth) {
        if (messages[i].role !== previousRoleName) {
            if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
                const content = messages[i].content;
-                content[content.length - 1].cache_control = { type: 'ephemeral', ttl: '1h' };
+                content[content.length - 1].cache_control = { type: 'ephemeral', ttl: ttl };
            }

            if (depth === cachingAtDepth + 2) {