mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add optional Claude system prompt cache.
This commit is contained in:
@ -41,6 +41,11 @@ enableCorsProxy: false
|
|||||||
enableUserAccounts: false
|
enableUserAccounts: false
|
||||||
# Enable discreet login mode: hides user list on the login screen
|
# Enable discreet login mode: hides user list on the login screen
|
||||||
enableDiscreetLogin: false
|
enableDiscreetLogin: false
|
||||||
|
# User session timeout *in seconds* (defaults to 24 hours).
|
||||||
|
## Set to a positive number to expire session after a certain time of inactivity
|
||||||
|
## Set to 0 to expire session when the browser is closed
|
||||||
|
## Set to a negative number to disable session expiration
|
||||||
|
sessionTimeout: 86400
|
||||||
# Used to sign session cookies. Will be auto-generated if not set
|
# Used to sign session cookies. Will be auto-generated if not set
|
||||||
cookieSecret: ''
|
cookieSecret: ''
|
||||||
# Disable CSRF protection - NOT RECOMMENDED
|
# Disable CSRF protection - NOT RECOMMENDED
|
||||||
@ -123,10 +128,14 @@ ollama:
|
|||||||
# * 0: Unload the model immediately after the request
|
# * 0: Unload the model immediately after the request
|
||||||
# * 5m: Keep the model loaded for 5 minutes after the request. Accepts duration strings (e.g. 5h30m40s)
|
# * 5m: Keep the model loaded for 5 minutes after the request. Accepts duration strings (e.g. 5h30m40s)
|
||||||
keepAlive: -1
|
keepAlive: -1
|
||||||
|
# -- ANTHROPIC CLAUDE API CONFIGURATION --
|
||||||
|
claude:
|
||||||
|
# Enables caching of the system prompt (if supported).
|
||||||
|
# https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||||
|
# -- IMPORTANT! --
|
||||||
|
# Use only when the prompt before the chat history is static and doesn't change between requests
|
||||||
|
# (e.g {{random}} macro or lorebooks not as in-chat injections).
|
||||||
|
# Otherwise, you'll just waste money on cache misses.
|
||||||
|
enableSystemPromptCache: false
|
||||||
# -- SERVER PLUGIN CONFIGURATION --
|
# -- SERVER PLUGIN CONFIGURATION --
|
||||||
enableServerPlugins: false
|
enableServerPlugins: false
|
||||||
# User session timeout *in seconds* (defaults to 24 hours).
|
|
||||||
## Set to a positive number to expire session after a certain time of inactivity
|
|
||||||
## Set to 0 to expire session when the browser is closed
|
|
||||||
## Set to a negative number to disable session expiration
|
|
||||||
sessionTimeout: 86400
|
|
||||||
|
@ -105,6 +105,7 @@ async function sendClaudeRequest(request, response) {
|
|||||||
const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString();
|
const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString();
|
||||||
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE);
|
||||||
const divider = '-'.repeat(process.stdout.columns);
|
const divider = '-'.repeat(process.stdout.columns);
|
||||||
|
const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false);
|
||||||
|
|
||||||
if (!apiKey) {
|
if (!apiKey) {
|
||||||
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
console.log(color.red(`Claude API key is missing.\n${divider}`));
|
||||||
@ -118,8 +119,8 @@ async function sendClaudeRequest(request, response) {
|
|||||||
controller.abort();
|
controller.abort();
|
||||||
});
|
});
|
||||||
const additionalHeaders = {};
|
const additionalHeaders = {};
|
||||||
let use_system_prompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
|
const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
|
||||||
let converted_prompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, use_system_prompt, request.body.human_sysprompt_message, request.body.char_name, request.body.user_name);
|
const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, request.body.human_sysprompt_message, request.body.char_name, request.body.user_name);
|
||||||
// Add custom stop sequences
|
// Add custom stop sequences
|
||||||
const stopSequences = [];
|
const stopSequences = [];
|
||||||
if (Array.isArray(request.body.stop)) {
|
if (Array.isArray(request.body.stop)) {
|
||||||
@ -127,7 +128,7 @@ async function sendClaudeRequest(request, response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const requestBody = {
|
const requestBody = {
|
||||||
messages: converted_prompt.messages,
|
messages: convertedPrompt.messages,
|
||||||
model: request.body.model,
|
model: request.body.model,
|
||||||
max_tokens: request.body.max_tokens,
|
max_tokens: request.body.max_tokens,
|
||||||
stop_sequences: stopSequences,
|
stop_sequences: stopSequences,
|
||||||
@ -136,13 +137,15 @@ async function sendClaudeRequest(request, response) {
|
|||||||
top_k: request.body.top_k,
|
top_k: request.body.top_k,
|
||||||
stream: request.body.stream,
|
stream: request.body.stream,
|
||||||
};
|
};
|
||||||
if (use_system_prompt) {
|
if (useSystemPrompt) {
|
||||||
requestBody.system = converted_prompt.systemPrompt;
|
requestBody.system = enableSystemPromptCache
|
||||||
|
? [{ type: 'text', text: convertedPrompt.systemPrompt, cache_control: { type: 'ephemeral' } }]
|
||||||
|
: convertedPrompt.systemPrompt;
|
||||||
}
|
}
|
||||||
if (Array.isArray(request.body.tools) && request.body.tools.length > 0) {
|
if (Array.isArray(request.body.tools) && request.body.tools.length > 0) {
|
||||||
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
|
// Claude doesn't do prefills on function calls, and doesn't allow empty messages
|
||||||
if (converted_prompt.messages.length && converted_prompt.messages[converted_prompt.messages.length - 1].role === 'assistant') {
|
if (convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') {
|
||||||
converted_prompt.messages.push({ role: 'user', content: '.' });
|
convertedPrompt.messages.push({ role: 'user', content: '.' });
|
||||||
}
|
}
|
||||||
additionalHeaders['anthropic-beta'] = 'tools-2024-05-16';
|
additionalHeaders['anthropic-beta'] = 'tools-2024-05-16';
|
||||||
requestBody.tool_choice = { type: request.body.tool_choice === 'required' ? 'any' : 'auto' };
|
requestBody.tool_choice = { type: request.body.tool_choice === 'required' ? 'any' : 'auto' };
|
||||||
@ -151,6 +154,9 @@ async function sendClaudeRequest(request, response) {
|
|||||||
.map(tool => tool.function)
|
.map(tool => tool.function)
|
||||||
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
|
.map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters }));
|
||||||
}
|
}
|
||||||
|
if (enableSystemPromptCache) {
|
||||||
|
additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31';
|
||||||
|
}
|
||||||
console.log('Claude request:', requestBody);
|
console.log('Claude request:', requestBody);
|
||||||
|
|
||||||
const generateResponse = await fetch(apiUrl + '/messages', {
|
const generateResponse = await fetch(apiUrl + '/messages', {
|
||||||
|
Reference in New Issue
Block a user