Moved most of code to prompt converters

This commit is contained in:
Honey Tree
2024-11-18 08:06:27 -03:00
parent befe5a7171
commit c3caa1699e
2 changed files with 78 additions and 50 deletions

View File

@ -26,6 +26,8 @@ import {
convertMistralMessages, convertMistralMessages,
convertAI21Messages, convertAI21Messages,
mergeMessages, mergeMessages,
cachingAtDepthForOpenRouterClaude,
cachingAtDepthForClaude,
} from '../../prompt-converters.js'; } from '../../prompt-converters.js';
import { readSecret, SECRET_KEYS } from '../secrets.js'; import { readSecret, SECRET_KEYS } from '../secrets.js';
@ -145,19 +147,7 @@ async function sendClaudeRequest(request, response) {
} }
if (cachingAtDepth !== -1) { if (cachingAtDepth !== -1) {
// There are extremely few scenarios in which caching the prefill is a good idea, it mostly just breaks everything cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth);
const messageCount = convertedPrompt.messages.length;
cachingAtDepth += convertedPrompt.messages[messageCount - 1].role === 'assistant' ? 1 : 0;
if (messageCount - 1 - cachingAtDepth >= 0) {
const contentCount = convertedPrompt.messages[messageCount - 1 - cachingAtDepth].content.length;
convertedPrompt.messages[messageCount - 1 - cachingAtDepth].content[contentCount - 1]['cache_control'] = { type: 'ephemeral' };
}
if (messageCount - 1 - cachingAtDepth - 2 >= 0) {
const contentCount = convertedPrompt.messages[messageCount - 1 - cachingAtDepth].content.length;
convertedPrompt.messages[messageCount - 1 - cachingAtDepth - 2].content[contentCount - 1]['cache_control'] = { type: 'ephemeral' };
}
} }
if (enableSystemPromptCache || cachingAtDepth !== -1) { if (enableSystemPromptCache || cachingAtDepth !== -1) {
@ -902,43 +892,7 @@ router.post('/generate', jsonParser, function (request, response) {
let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1); let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1);
if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model.startsWith('anthropic/claude-3')) { if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model.startsWith('anthropic/claude-3')) {
//caching the prefill is a terrible idea in general cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth);
let passedThePrefill = false;
//depth here is the number of message role switches
let depth = 0;
let previousRoleName = "";
for (let i = request.body.messages.length - 1; i >= 0; i--) {
if (!passedThePrefill && request.body.messages[i].role === 'assistant') {
continue;
}
passedThePrefill = true;
if (request.body.messages[i].role !== previousRoleName) {
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
const content = request.body.messages[i].content;
if (typeof content === 'string') {
request.body.messages[i].content = [{
type: 'text',
text: content,
cache_control: { type: "ephemeral"},
}];
} else {
const contentPartCount = content.length;
content[contentPartCount - 1].cache_control = {
type: "ephemeral"
}
}
}
if (depth === cachingAtDepth + 2) {
break
}
depth += 1;
previousRoleName = request.body.messages[i].role;
}
}
} }
} else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) { } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) {
apiUrl = request.body.custom_url; apiUrl = request.body.custom_url;

View File

@ -718,3 +718,77 @@ export function convertTextCompletionPrompt(messages) {
}); });
return messageStrings.join('\n') + '\nassistant:'; return messageStrings.join('\n') + '\nassistant:';
} }
export function cachingAtDepthForClaude(messages, cachingAtDepth) {
let passedThePrefill = false;
let depth = 0;
let previousRoleName = "";
for (let i = messages.length - 1; i >= 0; i--) {
if (!passedThePrefill && messages[i].role === 'assistant') {
continue;
}
passedThePrefill = true;
if (messages[i].role !== previousRoleName) {
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
const content = messages[i].content;
content[content.length - 1].cache_control = {type: "ephemeral"};
}
if (depth === cachingAtDepth + 2) {
break;
}
depth += 1;
previousRoleName = messages[i].role;
}
}
}
/**
* Append cache_control headers to an OpenRouter request at depth. Directly modifies the
* messages array.
* @param {object[]} messages Array of messages
* @param {number} cachingAtDepth Depth at which caching is supposed to occur
*/
export function cachingAtDepthForOpenRouterClaude(messages, cachingAtDepth) {
//caching the prefill is a terrible idea in general
let passedThePrefill = false;
//depth here is the number of message role switches
let depth = 0;
let previousRoleName = "";
for (let i = messages.length - 1; i >= 0; i--) {
if (!passedThePrefill && messages[i].role === 'assistant') {
continue;
}
passedThePrefill = true;
if (messages[i].role !== previousRoleName) {
if (depth === cachingAtDepth || depth === cachingAtDepth + 2) {
const content = messages[i].content;
if (typeof content === 'string') {
messages[i].content = [{
type: 'text',
text: content,
cache_control: {type: "ephemeral"},
}];
} else {
const contentPartCount = content.length;
content[contentPartCount - 1].cache_control = {
type: "ephemeral"
}
}
}
if (depth === cachingAtDepth + 2) {
break
}
depth += 1;
previousRoleName = messages[i].role;
}
}
}