mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Claude: control cache TTL with config
This commit is contained in:
@ -234,6 +234,10 @@ claude:
|
||||
# should be ideal for most use cases.
|
||||
# Any value other than a non-negative integer will be ignored and caching at depth will not be enabled.
|
||||
cachingAtDepth: -1
|
||||
# Use 1h TTL instead of the default 5m.
|
||||
## 5m: base price x 1.25
|
||||
## 1h: base price x 2
|
||||
extendedTTL: false
|
||||
# -- GOOGLE GEMINI API CONFIGURATION --
|
||||
gemini:
|
||||
# API endpoint version ("v1beta" or "v1alpha")
|
||||
|
@ -154,6 +154,7 @@ async function sendClaudeRequest(request, response) {
|
||||
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request));
|
||||
const useThinking = /^claude-(3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.include_reasoning);
|
||||
const useWebSearch = /^claude-(3-5|3-7|opus-4|sonnet-4)/.test(request.body.model) && Boolean(request.body.enable_web_search);
|
||||
const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
|
||||
let fixThinkingPrefill = false;
|
||||
// Add custom stop sequences
|
||||
const stopSequences = [];
|
||||
@ -174,7 +175,7 @@ async function sendClaudeRequest(request, response) {
|
||||
};
|
||||
if (useSystemPrompt) {
|
||||
if (enableSystemPromptCache && Array.isArray(convertedPrompt.systemPrompt) && convertedPrompt.systemPrompt.length) {
|
||||
convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' };
|
||||
convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL };
|
||||
}
|
||||
|
||||
requestBody.system = convertedPrompt.systemPrompt;
|
||||
@ -190,7 +191,7 @@ async function sendClaudeRequest(request, response) {
|
||||
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
|
||||
|
||||
if (enableSystemPromptCache && requestBody.tools.length) {
|
||||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: '1h' };
|
||||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral', ttl: cacheTTL };
|
||||
}
|
||||
}
|
||||
|
||||
@ -203,7 +204,7 @@ async function sendClaudeRequest(request, response) {
|
||||
}
|
||||
|
||||
if (cachingAtDepth !== -1) {
|
||||
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
|
||||
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth, cacheTTL);
|
||||
}
|
||||
|
||||
if (enableSystemPromptCache || cachingAtDepth !== -1) {
|
||||
|
@ -854,7 +854,13 @@ export function convertTextCompletionPrompt(messages) {
|
||||
return messageStrings.join('\n') + '\nassistant:';
|
||||
}
|
||||
|
||||
export function cachingAtDepthForClaude(messages, cachingAtDepth) {
|
||||
/**
|
||||
* Append cache_control object to a Claude messages at depth. Directly modifies the messages array.
|
||||
* @param {any[]} messages Messages to modify
|
||||
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
|
||||
* @param {string} ttl TTL value
|
||||
*/
|
||||
export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) {
|
||||
let passedThePrefill = false;
|
||||
let depth = 0;
|
||||
let previousRoleName = '';
|
||||
@ -869,7 +875,7 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth) {
|
||||
if (messages[i].role !== previousRoleName) {
|
||||
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
|
||||
const content = messages[i].content;
|
||||
content[content.length - 1].cache_control = { type: 'ephemeral', ttl: '1h' };
|
||||
content[content.length - 1].cache_control = { type: 'ephemeral', ttl: ttl };
|
||||
}
|
||||
|
||||
if (depth === cachingAtDepth + 2) {
|
||||
|
Reference in New Issue
Block a user