mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
OpenRouter: add cache TTL control for Claude
This commit is contained in:
@@ -1342,10 +1342,11 @@ router.post('/generate', function (request, response) {
|
||||
bodyParams['reasoning'] = { effort: request.body.reasoning_effort };
|
||||
}
|
||||
|
||||
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
|
||||
const cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
|
||||
const isClaude3or4 = /anthropic\/claude-(3|opus-4|sonnet-4)/.test(request.body.model);
|
||||
const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
|
||||
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && isClaude3or4) {
|
||||
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);
|
||||
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth, cacheTTL);
|
||||
}
|
||||
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) {
|
||||
apiUrl = request.body.custom_url;
|
||||
|
@@ -906,8 +906,9 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) {
|
||||
* messages array.
|
||||
* @param {object[]} messages Array of messages
|
||||
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
|
||||
* @param {string} ttl TTL value
|
||||
*/
|
||||
export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
||||
export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth, ttl) {
|
||||
//caching the prefill is a terrible idea in general
|
||||
let passedThePrefill = false;
|
||||
//depth here is the number of message role switches
|
||||
@@ -927,12 +928,13 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
||||
messages[i].content = [{
|
||||
type: 'text',
|
||||
text: content,
|
||||
cache_control: { type: 'ephemeral' },
|
||||
cache_control: { type: 'ephemeral', ttl: ttl },
|
||||
}];
|
||||
} else {
|
||||
const contentPartCount = content.length;
|
||||
content[contentPartCount - 1].cache_control = {
|
||||
type: 'ephemeral',
|
||||
ttl: ttl,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user