Merge pull request #3085 from honey-tree/claude-caching-at-depth
Claude caching at depth
This commit is contained in:
commit
54db4983f4
|
@ -168,5 +168,13 @@ claude:
|
||||||
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
||||||
# Otherwise, you'll just waste money on cache misses.
|
# Otherwise, you'll just waste money on cache misses.
|
||||||
enableSystemPromptCache: false
|
enableSystemPromptCache: false
|
||||||
|
# Enables caching of the message history at depth (if supported).
|
||||||
|
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||||
|
# -- IMPORTANT! --
|
||||||
|
# Use with caution. Behavior may be unpredictable and no guarantees can or will be made.
|
||||||
|
# Set to an integer to specify the desired depth. 0 (which does NOT include the prefill)
|
||||||
|
# should be ideal for most use cases.
|
||||||
|
# Any value other than a non-negative integer will be ignored and caching at depth will not be enabled.
|
||||||
|
cachingAtDepth: -1
|
||||||
# -- SERVER PLUGIN CONFIGURATION --
|
# -- SERVER PLUGIN CONFIGURATION --
|
||||||
enableServerPlugins: false
|
enableServerPlugins: false
|
||||||
|
|
|
@ -26,6 +26,8 @@ import {
|
||||||
convertMistralMessages,
|
convertMistralMessages,
|
||||||
convertAI21Messages,
|
convertAI21Messages,
|
||||||
mergeMessages,
|
mergeMessages,
|
||||||
|
cachingAtDepthForOpenRouterClaude,
|
||||||
|
cachingAtDepthForClaude,
|
||||||
} from '../../prompt-converters.js';
|
} from '../../prompt-converters.js';
|
||||||
|
|
||||||
import { readSecret, SECRET_KEYS } from '../secrets.js';
|
import { readSecret, SECRET_KEYS } from '../secrets.js';
|
||||||
|
@ -80,6 +82,11 @@ async function sendClaudeRequest(request, response) {
|
||||||
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
||||||
const divider = '-'.repeat(process.stdout.columns);
|
const divider = '-'.repeat(process.stdout.columns);
|
||||||
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3');
|
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3');
|
||||||
|
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1);
|
||||||
|
// Disabled if not an integer or negative, or if the model doesn't support it
|
||||||
|
if (!Number.isInteger(cachingAtDepth) || cachingAtDepth < 0 || !request.body.model.startsWith('claude-3')) {
|
||||||
|
cachingAtDepth = -1;
|
||||||
|
}
|
||||||
|
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
||||||
|
@ -138,9 +145,15 @@ async function sendClaudeRequest(request, response) {
|
||||||
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (enableSystemPromptCache) {
|
|
||||||
|
if (cachingAtDepth !== -1) {
|
||||||
|
cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (enableSystemPromptCache || cachingAtDepth !== -1) {
|
||||||
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Claude request:', requestBody);
|
console.log('Claude request:', requestBody);
|
||||||
|
|
||||||
const generateResponse = await fetch(apiUrl + '/messages', {
|
const generateResponse = await fetch(apiUrl + '/messages', {
|
||||||
|
@ -876,6 +889,11 @@ router.post('/generate', jsonParser, function (request, response) {
|
||||||
if (request.body.use_fallback) {
|
if (request.body.use_fallback) {
|
||||||
bodyParams['route'] = 'fallback';
|
bodyParams['route'] = 'fallback';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1);
|
||||||
|
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model.startsWith('anthropic/claude-3')) {
|
||||||
|
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);
|
||||||
|
}
|
||||||
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) {
|
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) {
|
||||||
apiUrl = request.body.custom_url;
|
apiUrl = request.body.custom_url;
|
||||||
apiKey = readSecret(request.user.directories, SECRET_KEYS.CUSTOM);
|
apiKey = readSecret(request.user.directories, SECRET_KEYS.CUSTOM);
|
||||||
|
|
|
@ -718,3 +718,77 @@ export function convertTextCompletionPrompt(messages) {
|
||||||
});
|
});
|
||||||
return messageStrings.join('\n') + '\nassistant:';
|
return messageStrings.join('\n') + '\nassistant:';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function cachingAtDepthForClaude(messages, cachingAtDepth) {
|
||||||
|
let passedThePrefill = false;
|
||||||
|
let depth = 0;
|
||||||
|
let previousRoleName = '';
|
||||||
|
|
||||||
|
for (let i = messages.length - 1; i >= 0; i--) {
|
||||||
|
if (!passedThePrefill && messages[i].role === 'assistant') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
passedThePrefill = true;
|
||||||
|
|
||||||
|
if (messages[i].role !== previousRoleName) {
|
||||||
|
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
|
||||||
|
const content = messages[i].content;
|
||||||
|
content[content.length - 1].cache_control = { type: 'ephemeral' };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (depth === cachingAtDepth + 2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
depth += 1;
|
||||||
|
previousRoleName = messages[i].role;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append cache_control headers to an OpenRouter request at depth. Directly modifies the
|
||||||
|
* messages array.
|
||||||
|
* @param {object[]} messages Array of messages
|
||||||
|
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
|
||||||
|
*/
|
||||||
|
export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
||||||
|
//caching the prefill is a terrible idea in general
|
||||||
|
let passedThePrefill = false;
|
||||||
|
//depth here is the number of message role switches
|
||||||
|
let depth = 0;
|
||||||
|
let previousRoleName = '';
|
||||||
|
for (let i = messages.length - 1; i >= 0; i--) {
|
||||||
|
if (!passedThePrefill && messages[i].role === 'assistant') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
passedThePrefill = true;
|
||||||
|
|
||||||
|
if (messages[i].role !== previousRoleName) {
|
||||||
|
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
|
||||||
|
const content = messages[i].content;
|
||||||
|
if (typeof content === 'string') {
|
||||||
|
messages[i].content = [{
|
||||||
|
type: 'text',
|
||||||
|
text: content,
|
||||||
|
cache_control: { type: 'ephemeral' },
|
||||||
|
}];
|
||||||
|
} else {
|
||||||
|
const contentPartCount = content.length;
|
||||||
|
content[contentPartCount - 1].cache_control = {
|
||||||
|
type: 'ephemeral',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (depth === cachingAtDepth + 2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
depth += 1;
|
||||||
|
previousRoleName = messages[i].role;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue