Add optional Claude system prompt cache.

This commit is contained in:
Cohee 2024-08-15 21:25:08 +03:00
parent 0869270c26
commit 7322dd1954
2 changed files with 27 additions and 12 deletions

View File

@ -41,6 +41,11 @@ enableCorsProxy: false
enableUserAccounts: false
# Enable discreet login mode: hides user list on the login screen
enableDiscreetLogin: false
# User session timeout *in seconds* (defaults to 24 hours).
## Set to a positive number to expire session after a certain time of inactivity
## Set to 0 to expire session when the browser is closed
## Set to a negative number to disable session expiration
sessionTimeout: 86400
# Used to sign session cookies. Will be auto-generated if not set
cookieSecret: ''
# Disable CSRF protection - NOT RECOMMENDED
@ -123,10 +128,14 @@ ollama:
# * 0: Unload the model immediately after the request
# * 5m: Keep the model loaded for 5 minutes after the request. Accepts duration strings (e.g. 5h30m40s)
keepAlive: -1
# -- ANTHROPIC CLAUDE API CONFIGURATION --
claude:
# Enables caching of the system prompt (if supported).
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
# -- IMPORTANT! --
# Use only when the prompt before the chat history is static and doesn't change between requests
# (e.g {{random}} macro or lorebooks not as in-chat injections).
# Otherwise, you'll just waste money on cache misses.
enableSystemPromptCache: false
# -- SERVER PLUGIN CONFIGURATION --
enableServerPlugins: false
# User session timeout *in seconds* (defaults to 24 hours).
## Set to a positive number to expire session after a certain time of inactivity
## Set to 0 to expire session when the browser is closed
## Set to a negative number to disable session expiration
sessionTimeout: 86400

View File

@ -105,6 +105,7 @@ async function sendClaudeRequest(request, response) {
const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString();
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
const divider = '-'.repeat(process.stdout.columns);
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false);
if (!apiKey) {
console.log(color.red(`Claude API key is missing.\n${divider}`));
@ -118,8 +119,8 @@ async function sendClaudeRequest(request, response) {
controller.abort();
});
const additionalHeaders = {};
let use_system_prompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
let converted_prompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, use_system_prompt, request.body.human_sysprompt_message, request.body.char_name, request.body.user_name);
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, request.body.human_sysprompt_message, request.body.char_name, request.body.user_name);
// Add custom stop sequences
const stopSequences = [];
if (Array.isArray(request.body.stop)) {
@ -127,7 +128,7 @@ async function sendClaudeRequest(request, response) {
}
const requestBody = {
messages: converted_prompt.messages,
messages: convertedPrompt.messages,
model: request.body.model,
max_tokens: request.body.max_tokens,
stop_sequences: stopSequences,
@ -136,13 +137,15 @@ async function sendClaudeRequest(request, response) {
top_k: request.body.top_k,
stream: request.body.stream,
};
if (use_system_prompt) {
requestBody.system = converted_prompt.systemPrompt;
if (useSystemPrompt) {
requestBody.system = enableSystemPromptCache
? [{ type: 'text', text: convertedPrompt.systemPrompt, cache_control: { type: 'ephemeral' } }]
: convertedPrompt.systemPrompt;
}
if (Array.isArray(request.body.tools) && request.body.tools.length > 0) {
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
if (converted_prompt.messages.length && converted_prompt.messages[converted_prompt.messages.length - 1].role === 'assistant') {
converted_prompt.messages.push({ role: 'user', content: '.' });
if (convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
convertedPrompt.messages.push({ role: 'user', content: '.' });
}
additionalHeaders['anthropic-beta'] = 'tools-2024-05-16';
requestBody.tool_choice = { type: request.body.tool_choice === 'required' ? 'any' : 'auto' };
@ -151,6 +154,9 @@ async function sendClaudeRequest(request, response) {
.map(tool => tool.function)
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
}
if (enableSystemPromptCache) {
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
}
console.log('Claude request:', requestBody);
const generateResponse = await fetch(apiUrl + '/messages', {