From befe5a7171e62517cb6cd985a37eafb9a58f1d11 Mon Sep 17 00:00:00 2001 From: Honey Tree Date: Sun, 17 Nov 2024 15:53:02 -0300 Subject: [PATCH] Adding Claude caching support to OpenRouter as well --- src/endpoints/backends/chat-completions.js | 47 ++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index bd5417464..acce5acf4 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -80,9 +80,9 @@ async function sendClaudeRequest(request, response) { const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE); const divider = '-'.repeat(process.stdout.columns); const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3'); - let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1) && request.body.model.startsWith('claude-3'); - // Disabled if not an integer or negative - if (!Number.isInteger(cachingAtDepth) || cachingAtDepth < 0) { + let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1); + // Disabled if not an integer or negative, or if the model doesn't support it + if (!Number.isInteger(cachingAtDepth) || cachingAtDepth < 0 || !request.body.model.startsWith('claude-3')) { cachingAtDepth = -1; } @@ -899,6 +899,47 @@ router.post('/generate', jsonParser, function (request, response) { if (request.body.use_fallback) { bodyParams['route'] = 'fallback'; } + + let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1); + if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model.startsWith('anthropic/claude-3')) { + //caching the prefill is a terrible idea in general + let passedThePrefill = false; + //depth here is the number of message role switches + let depth = 0; + let previousRoleName = ""; + for (let i = request.body.messages.length - 1; i >= 0; i--) { + if (!passedThePrefill && request.body.messages[i].role === 'assistant') { + continue; + } + + passedThePrefill = true; + + if (request.body.messages[i].role !== previousRoleName) { + if (depth === cachingAtDepth || depth === cachingAtDepth + 2) { + const content = request.body.messages[i].content; + if (typeof content === 'string') { + request.body.messages[i].content = [{ + type: 'text', + text: content, + cache_control: { type: "ephemeral"}, + }]; + } else { + const contentPartCount = content.length; + content[contentPartCount - 1].cache_control = { + type: "ephemeral" + } + } + } + + if (depth === cachingAtDepth + 2) { + break + } + + depth += 1; + previousRoleName = request.body.messages[i].role; + } + } + } } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) { apiUrl = request.body.custom_url; apiKey = readSecret(request.user.directories, SECRET_KEYS.CUSTOM);