mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
OpenRouter: add cache TTL control for Claude
This commit is contained in:
@@ -1342,10 +1342,11 @@ router.post('/generate', function (request, response) {
|
|||||||
bodyParams['reasoning'] = { effort: request.body.reasoning_effort };
|
bodyParams['reasoning'] = { effort: request.body.reasoning_effort };
|
||||||
}
|
}
|
||||||
|
|
||||||
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
|
const cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1, 'number');
|
||||||
const isClaude3or4 = /anthropic\/claude-(3|opus-4|sonnet-4)/.test(request.body.model);
|
const isClaude3or4 = /anthropic\/claude-(3|opus-4|sonnet-4)/.test(request.body.model);
|
||||||
|
const cacheTTL = getConfigValue('claude.extendedTTL', false, 'boolean') ? '1h' : '5m';
|
||||||
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && isClaude3or4) {
|
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && isClaude3or4) {
|
||||||
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);
|
cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth, cacheTTL);
|
||||||
}
|
}
|
||||||
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) {
|
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) {
|
||||||
apiUrl = request.body.custom_url;
|
apiUrl = request.body.custom_url;
|
||||||
|
@@ -906,8 +906,9 @@ export function cachingAtDepthForClaude(messages, cachingAtDepth, ttl) {
|
|||||||
* messages array.
|
* messages array.
|
||||||
* @param {object[]} messages Array of messages
|
* @param {object[]} messages Array of messages
|
||||||
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
|
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
|
||||||
|
* @param {string} ttl TTL value
|
||||||
*/
|
*/
|
||||||
export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth, ttl) {
|
||||||
//caching the prefill is a terrible idea in general
|
//caching the prefill is a terrible idea in general
|
||||||
let passedThePrefill = false;
|
let passedThePrefill = false;
|
||||||
//depth here is the number of message role switches
|
//depth here is the number of message role switches
|
||||||
@@ -927,12 +928,13 @@ export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
|
|||||||
messages[i].content = [{
|
messages[i].content = [{
|
||||||
type: 'text',
|
type: 'text',
|
||||||
text: content,
|
text: content,
|
||||||
cache_control: { type: 'ephemeral' },
|
cache_control: { type: 'ephemeral', ttl: ttl },
|
||||||
}];
|
}];
|
||||||
} else {
|
} else {
|
||||||
const contentPartCount = content.length;
|
const contentPartCount = content.length;
|
||||||
content[contentPartCount - 1].cache_control = {
|
content[contentPartCount - 1].cache_control = {
|
||||||
type: 'ephemeral',
|
type: 'ephemeral',
|
||||||
|
ttl: ttl,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user