mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
implement messages api and selector for nuclaude models
This commit is contained in:
@ -2348,6 +2348,8 @@
|
|||||||
<h4 data-i18n="Claude Model">Claude Model</h4>
|
<h4 data-i18n="Claude Model">Claude Model</h4>
|
||||||
<select id="model_claude_select">
|
<select id="model_claude_select">
|
||||||
<optgroup label="Latest">
|
<optgroup label="Latest">
|
||||||
|
<option value="claude-3-opus">claude-3-opus</option>
|
||||||
|
<option value="claude-3-sonnet">claude-3-sonnet</option>
|
||||||
<option value="claude-2">claude-2</option>
|
<option value="claude-2">claude-2</option>
|
||||||
<option value="claude-v1">claude-v1</option>
|
<option value="claude-v1">claude-v1</option>
|
||||||
<option value="claude-v1-100k">claude-v1-100k</option>
|
<option value="claude-v1-100k">claude-v1-100k</option>
|
||||||
@ -2355,6 +2357,8 @@
|
|||||||
<option value="claude-instant-v1-100k">claude-instant-v1-100k</option>
|
<option value="claude-instant-v1-100k">claude-instant-v1-100k</option>
|
||||||
</optgroup>
|
</optgroup>
|
||||||
<optgroup label="Sub-versions">
|
<optgroup label="Sub-versions">
|
||||||
|
<option value="claude-3-opus-20240229">claude-3-opus-20240229</option>
|
||||||
|
<option value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option>
|
||||||
<option value="claude-2.1">claude-2.1</option>
|
<option value="claude-2.1">claude-2.1</option>
|
||||||
<option value="claude-2.0">claude-2.0</option>
|
<option value="claude-2.0">claude-2.0</option>
|
||||||
<option value="claude-v1.3">claude-v1.3</option>
|
<option value="claude-v1.3">claude-v1.3</option>
|
||||||
|
@ -1751,7 +1751,7 @@ async function sendOpenAIRequest(type, messages, signal) {
|
|||||||
|
|
||||||
function getStreamingReply(data) {
|
function getStreamingReply(data) {
|
||||||
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
||||||
return data?.completion || '';
|
return data?.delta?.text || '';
|
||||||
} else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
|
} else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
|
||||||
return data?.candidates?.[0]?.content?.parts?.[0]?.text || '';
|
return data?.candidates?.[0]?.content?.parts?.[0]?.text || '';
|
||||||
} else {
|
} else {
|
||||||
@ -3439,7 +3439,7 @@ async function onModelChange() {
|
|||||||
if (oai_settings.max_context_unlocked) {
|
if (oai_settings.max_context_unlocked) {
|
||||||
$('#openai_max_context').attr('max', max_200k);
|
$('#openai_max_context').attr('max', max_200k);
|
||||||
}
|
}
|
||||||
else if (value == 'claude-2.1' || value == 'claude-2') {
|
else if (value == 'claude-2.1' || value == 'claude-2' || value.startsWith('claude-3')) {
|
||||||
$('#openai_max_context').attr('max', max_200k);
|
$('#openai_max_context').attr('max', max_200k);
|
||||||
}
|
}
|
||||||
else if (value.endsWith('100k') || value.startsWith('claude-2') || value === 'claude-instant-1.2') {
|
else if (value.endsWith('100k') || value.startsWith('claude-2') || value === 'claude-instant-1.2') {
|
||||||
|
@ -5,7 +5,7 @@ const { Readable } = require('stream');
|
|||||||
const { jsonParser } = require('../../express-common');
|
const { jsonParser } = require('../../express-common');
|
||||||
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
|
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
|
||||||
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
|
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
|
||||||
const { convertClaudePrompt, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
|
const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
|
||||||
|
|
||||||
const { readSecret, SECRET_KEYS } = require('../secrets');
|
const { readSecret, SECRET_KEYS } = require('../secrets');
|
||||||
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
|
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
|
||||||
@ -34,45 +34,8 @@ async function sendClaudeRequest(request, response) {
|
|||||||
request.socket.on('close', function () {
|
request.socket.on('close', function () {
|
||||||
controller.abort();
|
controller.abort();
|
||||||
});
|
});
|
||||||
|
let use_system_prompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
|
||||||
const isSysPromptSupported = request.body.model === 'claude-2' || request.body.model === 'claude-2.1';
|
let converted_prompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, use_system_prompt, request.body.human_sysprompt_message);
|
||||||
const requestPrompt = convertClaudePrompt(request.body.messages, !request.body.exclude_assistant, request.body.assistant_prefill, isSysPromptSupported, request.body.claude_use_sysprompt, request.body.human_sysprompt_message, request.body.claude_exclude_prefixes);
|
|
||||||
|
|
||||||
// Check Claude messages sequence and prefixes presence.
|
|
||||||
let sequenceError = [];
|
|
||||||
const sequence = requestPrompt.split('\n').filter(x => x.startsWith('Human:') || x.startsWith('Assistant:'));
|
|
||||||
const humanFound = sequence.some(line => line.startsWith('Human:'));
|
|
||||||
const assistantFound = sequence.some(line => line.startsWith('Assistant:'));
|
|
||||||
let humanErrorCount = 0;
|
|
||||||
let assistantErrorCount = 0;
|
|
||||||
|
|
||||||
for (let i = 0; i < sequence.length - 1; i++) {
|
|
||||||
if (sequence[i].startsWith(sequence[i + 1].split(':')[0])) {
|
|
||||||
if (sequence[i].startsWith('Human:')) {
|
|
||||||
humanErrorCount++;
|
|
||||||
} else if (sequence[i].startsWith('Assistant:')) {
|
|
||||||
assistantErrorCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!humanFound) {
|
|
||||||
sequenceError.push(`${divider}\nWarning: No 'Human:' prefix found in the prompt.\n${divider}`);
|
|
||||||
}
|
|
||||||
if (!assistantFound) {
|
|
||||||
sequenceError.push(`${divider}\nWarning: No 'Assistant: ' prefix found in the prompt.\n${divider}`);
|
|
||||||
}
|
|
||||||
if (sequence[0] && !sequence[0].startsWith('Human:')) {
|
|
||||||
sequenceError.push(`${divider}\nWarning: The messages sequence should start with 'Human:' prefix.\nMake sure you have '\\n\\nHuman:' prefix at the very beggining of the prompt, or after the system prompt.\n${divider}`);
|
|
||||||
}
|
|
||||||
if (humanErrorCount > 0 || assistantErrorCount > 0) {
|
|
||||||
sequenceError.push(`${divider}\nWarning: Detected incorrect Prefix sequence(s).`);
|
|
||||||
sequenceError.push(`Incorrect "Human:" prefix(es): ${humanErrorCount}.\nIncorrect "Assistant: " prefix(es): ${assistantErrorCount}.`);
|
|
||||||
sequenceError.push('Check the prompt above and fix it in the SillyTavern.');
|
|
||||||
sequenceError.push('\nThe correct sequence in the console should look like this:\n(System prompt msg) <-(for the sysprompt format only, else have \\n\\n above the first human\'s message.)');
|
|
||||||
sequenceError.push(`\\n + <-----(Each message beginning with the "Assistant:/Human:" prefix must have \\n\\n before it.)\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \\n +\n...\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \n${divider}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add custom stop sequences
|
// Add custom stop sequences
|
||||||
const stopSequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
|
const stopSequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
|
||||||
if (Array.isArray(request.body.stop)) {
|
if (Array.isArray(request.body.stop)) {
|
||||||
@ -80,23 +43,21 @@ async function sendClaudeRequest(request, response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const requestBody = {
|
const requestBody = {
|
||||||
prompt: requestPrompt,
|
messages: converted_prompt.messages,
|
||||||
model: request.body.model,
|
model: request.body.model,
|
||||||
max_tokens_to_sample: request.body.max_tokens,
|
max_tokens: request.body.max_tokens,
|
||||||
stop_sequences: stopSequences,
|
stop_sequences: stopSequences,
|
||||||
temperature: request.body.temperature,
|
temperature: request.body.temperature,
|
||||||
top_p: request.body.top_p,
|
top_p: request.body.top_p,
|
||||||
top_k: request.body.top_k,
|
top_k: request.body.top_k,
|
||||||
stream: request.body.stream,
|
stream: request.body.stream,
|
||||||
};
|
};
|
||||||
|
if (use_system_prompt) {
|
||||||
|
requestBody.system = converted_prompt.systemPrompt;
|
||||||
|
}
|
||||||
console.log('Claude request:', requestBody);
|
console.log('Claude request:', requestBody);
|
||||||
|
|
||||||
sequenceError.forEach(sequenceError => {
|
const generateResponse = await fetch(apiUrl + '/messages', {
|
||||||
console.log(color.red(sequenceError));
|
|
||||||
});
|
|
||||||
|
|
||||||
const generateResponse = await fetch(apiUrl + '/complete', {
|
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
signal: controller.signal,
|
signal: controller.signal,
|
||||||
body: JSON.stringify(requestBody),
|
body: JSON.stringify(requestBody),
|
||||||
@ -118,7 +79,7 @@ async function sendClaudeRequest(request, response) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const generateResponseJson = await generateResponse.json();
|
const generateResponseJson = await generateResponse.json();
|
||||||
const responseText = generateResponseJson.completion;
|
const responseText = generateResponseJson.content[0].text;
|
||||||
console.log('Claude response:', generateResponseJson);
|
console.log('Claude response:', generateResponseJson);
|
||||||
|
|
||||||
// Wrap it back to OAI format
|
// Wrap it back to OAI format
|
||||||
|
@ -74,48 +74,58 @@ function convertClaudePrompt(messages, addAssistantPostfix, addAssistantPrefill,
|
|||||||
/**
|
/**
|
||||||
* Convert ChatML objects into working with Anthropic's new Messaging API.
|
* Convert ChatML objects into working with Anthropic's new Messaging API.
|
||||||
* @param {object[]} messages Array of messages
|
* @param {object[]} messages Array of messages
|
||||||
* @param {boolean} addAssistantPostfix Add Assistant postfix.
|
* @param {string} prefillString User determined prefill string
|
||||||
* @param {string} addAssistantPrefill Add Assistant prefill after the assistant postfix.
|
* @param {boolean} useSysPrompt See if we want to use a system prompt
|
||||||
* @param {boolean} withSysPromptSupport Indicates if the Claude model supports the system prompt format.
|
* @param {string} humanMsgFix Add Human message between system prompt and assistant.
|
||||||
* @param {string} addSysHumanMsg Add Human message between system prompt and assistant.
|
|
||||||
*/
|
*/
|
||||||
function convertClaudeMessages(messages, addAssistantPostfix, addAssistantPrefill, addSysHumanMsg) {
|
function convertClaudeMessages(messages, prefillString, useSysPrompt, humanMsgFix) {
|
||||||
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
|
// Since the messaging endpoint only supports user assistant roles in turns, we have to merge messages with the same role if they follow eachother
|
||||||
let systemPrompt = '';
|
let mergedMessages = [];
|
||||||
let i;
|
|
||||||
for (i = 0; i < messages.length; i++) {
|
|
||||||
if (messages[i].role !== 'system') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
systemPrompt += `${messages[i].content}\n\n`;
|
|
||||||
}
|
|
||||||
|
|
||||||
messages.splice(0, i);
|
|
||||||
|
|
||||||
// Check if the first message in the array is of type user, if not, interject with addSysHumanMsg or a blank message.
|
|
||||||
if (messages.length > 0 && messages[0].role !== 'user') {
|
|
||||||
messages.unshift({
|
|
||||||
role: 'user',
|
|
||||||
content: addSysHumanMsg || '',
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Now replace all further messages that have the role 'system' with the role 'user'.
|
|
||||||
messages.forEach((message) => {
|
messages.forEach((message) => {
|
||||||
|
if (mergedMessages.length > 0 && mergedMessages[mergedMessages.length - 1].role === message.role) {
|
||||||
|
mergedMessages[mergedMessages.length - 1].content += '\n\n' + message.content;
|
||||||
|
} else {
|
||||||
|
mergedMessages.push(message);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let systemPrompt = '';
|
||||||
|
if (useSysPrompt) {
|
||||||
|
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
|
||||||
|
let i;
|
||||||
|
for (i = 0; i < mergedMessages.length; i++) {
|
||||||
|
if (mergedMessages[i].role !== 'system') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
systemPrompt += `${mergedMessages[i].content}\n\n`;
|
||||||
|
}
|
||||||
|
|
||||||
|
mergedMessages.splice(0, i);
|
||||||
|
|
||||||
|
// Check if the first message in the array is of type user, if not, interject with humanMsgFix or a blank message.
|
||||||
|
if (mergedMessages.length > 0 && mergedMessages[0].role !== 'user') {
|
||||||
|
mergedMessages.unshift({
|
||||||
|
role: 'user',
|
||||||
|
content: humanMsgFix || '',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Now replace all further messages that have the role 'system' with the role 'user'. (or all if we're not using one)
|
||||||
|
mergedMessages.forEach((message) => {
|
||||||
if (message.role === 'system') {
|
if (message.role === 'system') {
|
||||||
message.role = 'user';
|
message.role = 'user';
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Postfix and prefill
|
// Shouldn't be conditional anymore, messages api expects the last role to be user unless we're explicitly prefilling
|
||||||
if (addAssistantPostfix) {
|
if (prefillString) {
|
||||||
messages.push({
|
mergedMessages.push({
|
||||||
role: 'assistant',
|
role: 'assistant',
|
||||||
content: addAssistantPrefill || '',
|
content: prefillString,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return { messages: messages, systemPrompt: systemPrompt.trim() };
|
return { messages: mergedMessages, systemPrompt: systemPrompt.trim() };
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -249,6 +249,7 @@ async function loadClaudeTokenizer(modelPath) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function countClaudeTokens(tokenizer, messages) {
|
function countClaudeTokens(tokenizer, messages) {
|
||||||
|
// Should be fine if we use the old conversion method instead of the messages API one i think?
|
||||||
const convertedPrompt = convertClaudePrompt(messages, false, false, false);
|
const convertedPrompt = convertClaudePrompt(messages, false, false, false);
|
||||||
|
|
||||||
// Fallback to strlen estimation
|
// Fallback to strlen estimation
|
||||||
|
Reference in New Issue
Block a user