From 0eff634bd063ec084eb6a8deb3f9fecfe73ad51e Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 4 Jun 2025 20:30:20 +0300 Subject: [PATCH] OpenRouter: add cache TTL control for Claude --- src/endpoints/backends/chat-completions.js | 5 +++-- src/prompt-converters.js | 6 ++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index c8a7c5f7e..ba2c28129 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -1342,10 +1342,11 @@ router.post('/generate', function (request, response) { bodyParams['reasoning'] = { effort: request.body.reasoning_effort }; } - let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number'); + const cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number'); const isClaude3or4 = /anthropic\/claude-(3|opus-4|sonnet-4)/.test(request.body.model); + const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m'; if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && isClaude3or4) { - cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth); + cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth, cacheTTL); } } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) { apiUrl = request.body.custom_url; diff --git a/src/prompt-converters.js b/src/prompt-converters.js index eab185c7d..0ce1cc3ed 100644 --- a/src/prompt-converters.js +++ b/src/prompt-converters.js @@ -906,8 +906,9 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) { * messages array. * @param {object[]} messages Array of messages * @param {number} cachingAtDepth Depth at which caching is supposed to occur + * @param {string} ttl TTL value */ -export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) { +export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth, ttl) { //caching the prefill is a terrible idea in general let passedThePrefill = false; //depth here is the number of message role switches @@ -927,12 +928,13 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) { messages[i].content = [{ type: 'text', text: content, - cache_control: { type: 'ephemeral' }, + cache_control: { type: 'ephemeral', ttl: ttl }, }]; } else { const contentPartCount = content.length; content[contentPartCount - 1].cache_control = { type: 'ephemeral', + ttl: ttl, }; } }