Simple implementation of caching at depth that should be useful for most use cases
This commit is contained in:
parent
85d25a8e13
commit
73dabd8905
|
@ -168,5 +168,12 @@ claude:
|
|||
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
||||
# Otherwise, you'll just waste money on cache misses.
|
||||
enableSystemPromptCache: false
|
||||
# Enables caching of the message history at depth (if supported).
|
||||
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||
# -- IMPORTANT! --
|
||||
# Use with caution. Behavior may be unpredictable and no guarantees can or will be made.
|
||||
# Set to an integer to specify the desired depth. 0 (which does NOT include the prefill)
|
||||
# should be ideal for most use cases.
|
||||
cachingAtDepth: false
|
||||
# -- SERVER PLUGIN CONFIGURATION --
|
||||
enableServerPlugins: false
|
||||
|
|
|
@ -80,6 +80,7 @@ async function sendClaudeRequest(request, response) {
|
|||
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
||||
const divider = '-'.repeat(process.stdout.columns);
|
||||
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3');
|
||||
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', false) && request.body.model.startsWith('claude-3');
|
||||
|
||||
if (!apiKey) {
|
||||
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
||||
|
@ -138,9 +139,25 @@ async function sendClaudeRequest(request, response) {
|
|||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
||||
}
|
||||
}
|
||||
if (enableSystemPromptCache) {
|
||||
|
||||
if (cachingAtDepth !== false) {
|
||||
// There are extremely few scenarios in which caching the prefill is a good idea, it mostly just breaks everything
|
||||
const messageCount = convertedPrompt.messages.length;
|
||||
cachingAtDepth += convertedPrompt.messages[messageCount - 1].role === 'assistant' ? 1 : 0;
|
||||
|
||||
if (messageCount - 1 - cachingAtDepth >= 0) {
|
||||
convertedPrompt.messages[messageCount - 1 - cachingAtDepth]['cache_control'] = { type: 'ephemeral' };
|
||||
}
|
||||
|
||||
if (messageCount - 1 - cachingAtDepth - 2 >= 0) {
|
||||
convertedPrompt.messages[messageCount - 1 - cachingAtDepth - 2]['cache_control'] = { type: 'ephemeral' };
|
||||
}
|
||||
}
|
||||
|
||||
if (enableSystemPromptCache || cachingAtDepth !== false) {
|
||||
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
||||
}
|
||||
|
||||
console.log('Claude request:', requestBody);
|
||||
|
||||
const generateResponse = await fetch(apiUrl + '/messages', {
|
||||
|
|
Loading…
Reference in New Issue