Simple implementation of caching at depth that should be useful for most use cases

This commit is contained in:
Honey Tree 2024-11-17 08:32:36 -03:00
parent 85d25a8e13
commit 73dabd8905
2 changed files with 25 additions and 1 deletions

View File

@ -168,5 +168,12 @@ claude:
# (e.g {{random}} macro or lorebooks not as in-chat injections).
# Otherwise, you'll just waste money on cache misses.
enableSystemPromptCache: false
# Enables caching of the message history at depth (if supported).
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
# -- IMPORTANT! --
# Use with caution. Behavior may be unpredictable and no guarantees can or will be made.
# Set to an integer to specify the desired depth. 0 (which does NOT include the prefill)
# should be ideal for most use cases.
cachingAtDepth: false
# -- SERVER PLUGIN CONFIGURATION --
enableServerPlugins: false

View File

@ -80,6 +80,7 @@ async function sendClaudeRequest(request, response) {
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
const divider = '-'.repeat(process.stdout.columns);
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3');
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', false) && request.body.model.startsWith('claude-3');
if (!apiKey) {
console.log(color.red(`Claude API key is missing.\n${divider}`));
@ -138,9 +139,25 @@ async function sendClaudeRequest(request, response) {
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
}
}
if (enableSystemPromptCache) {
if (cachingAtDepth !== false) {
// There are extremely few scenarios in which caching the prefill is a good idea, it mostly just breaks everything
const messageCount = convertedPrompt.messages.length;
cachingAtDepth += convertedPrompt.messages[messageCount - 1].role === 'assistant' ? 1 : 0;
if (messageCount - 1 - cachingAtDepth >= 0) {
convertedPrompt.messages[messageCount - 1 - cachingAtDepth]['cache_control'] = { type: 'ephemeral' };
}
if (messageCount - 1 - cachingAtDepth - 2 >= 0) {
convertedPrompt.messages[messageCount - 1 - cachingAtDepth - 2]['cache_control'] = { type: 'ephemeral' };
}
}
if (enableSystemPromptCache || cachingAtDepth !== false) {
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
}
console.log('Claude request:', requestBody);
const generateResponse = await fetch(apiUrl + '/messages', {