Simple implementation of caching at depth that should be useful for most use cases
This commit is contained in:
parent
85d25a8e13
commit
73dabd8905
|
@ -168,5 +168,12 @@ claude:
|
||||||
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
||||||
# Otherwise, you'll just waste money on cache misses.
|
# Otherwise, you'll just waste money on cache misses.
|
||||||
enableSystemPromptCache: false
|
enableSystemPromptCache: false
|
||||||
|
# Enables caching of the message history at depth (if supported).
|
||||||
|
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||||
|
# -- IMPORTANT! --
|
||||||
|
# Use with caution. Behavior may be unpredictable and no guarantees can or will be made.
|
||||||
|
# Set to an integer to specify the desired depth. 0 (which does NOT include the prefill)
|
||||||
|
# should be ideal for most use cases.
|
||||||
|
cachingAtDepth: false
|
||||||
# -- SERVER PLUGIN CONFIGURATION --
|
# -- SERVER PLUGIN CONFIGURATION --
|
||||||
enableServerPlugins: false
|
enableServerPlugins: false
|
||||||
|
|
|
@ -80,6 +80,7 @@ async function sendClaudeRequest(request, response) {
|
||||||
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
||||||
const divider = '-'.repeat(process.stdout.columns);
|
const divider = '-'.repeat(process.stdout.columns);
|
||||||
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3');
|
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3');
|
||||||
|
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', false) && request.body.model.startsWith('claude-3');
|
||||||
|
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
||||||
|
@ -138,9 +139,25 @@ async function sendClaudeRequest(request, response) {
|
||||||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (enableSystemPromptCache) {
|
|
||||||
|
if (cachingAtDepth !== false) {
|
||||||
|
// There are extremely few scenarios in which caching the prefill is a good idea, it mostly just breaks everything
|
||||||
|
const messageCount = convertedPrompt.messages.length;
|
||||||
|
cachingAtDepth += convertedPrompt.messages[messageCount - 1].role === 'assistant' ? 1 : 0;
|
||||||
|
|
||||||
|
if (messageCount - 1 - cachingAtDepth >= 0) {
|
||||||
|
convertedPrompt.messages[messageCount - 1 - cachingAtDepth]['cache_control'] = { type: 'ephemeral' };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (messageCount - 1 - cachingAtDepth - 2 >= 0) {
|
||||||
|
convertedPrompt.messages[messageCount - 1 - cachingAtDepth - 2]['cache_control'] = { type: 'ephemeral' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enableSystemPromptCache || cachingAtDepth !== false) {
|
||||||
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Claude request:', requestBody);
|
console.log('Claude request:', requestBody);
|
||||||
|
|
||||||
const generateResponse = await fetch(apiUrl + '/messages', {
|
const generateResponse = await fetch(apiUrl + '/messages', {
|
||||||
|
|
Loading…
Reference in New Issue