Claude: control cache TTL with config

This commit is contained in:
Cohee
2025-05-23 21:40:40 +03:00
parent ed2e6fff6e
commit 560c6e8ff1
3 changed files with 16 additions and 5 deletions

View File

@ -234,6 +234,10 @@ claude:
# should be ideal for most use cases.
# Any value other than a non-negative integer will be ignored and caching at depth will not be enabled.
cachingAtDepth: -1
# Use 1h TTL instead of the default 5m.
## 5m: base price x 1.25
## 1h: base price x 2
extendedTTL: false
# -- GOOGLE GEMINI API CONFIGURATION --
gemini:
# API endpoint version ("v1beta" or "v1alpha")

View File

@ -154,6 +154,7 @@ async function sendClaudeRequest(request, response) {
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
const useThinking = /^claude-(3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.include_reasoning);
const useWebSearch = /^claude-(3-5|3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.enable_web_search);
const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
let fixThinkingPrefill = false;
// Add custom stop sequences
const stopSequences = [];
@ -174,7 +175,7 @@ async function sendClaudeRequest(request, response) {
};
if (useSystemPrompt) {
if (enableSystemPromptCache && Array.isArray(convertedPrompt.systemPrompt) && convertedPrompt.systemPrompt.length) {
convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' };
convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL };
}
requestBody.system = convertedPrompt.systemPrompt;
@ -190,7 +191,7 @@ async function sendClaudeRequest(request, response) {
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
if (enableSystemPromptCache && requestBody.tools.length) {
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' };
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL };
}
}
@ -203,7 +204,7 @@ async function sendClaudeRequest(request, response) {
}
if (cachingAtDepth !== -1) {
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth, cacheTTL);
}
if (enableSystemPromptCache || cachingAtDepth !== -1) {

View File

@ -854,7 +854,13 @@ export function convertTextCompletionPrompt(messages) {
return messageStrings.join('\n') + '\nassistant:';
}
export function cachingAtDepthForClaude(messages, cachingAtDepth) {
/**
* Append cache_control object to a Claude messages at depth. Directly modifies the messages array.
* @param {any[]} messages Messages to modify
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
* @param {string} ttl TTL value
*/
export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) {
let passedThePrefill = false;
let depth = 0;
let previousRoleName = '';
@ -869,7 +875,7 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth) {
if (messages[i].role !== previousRoleName) {
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
const content = messages[i].content;
content[content.length - 1].cache_control = { type: 'ephemeral', ttl: '1h' };
content[content.length - 1].cache_control = { type: 'ephemeral', ttl: ttl };
}
if (depth === cachingAtDepth + 2) {