implement messages api and selector for nuclaude models

This commit is contained in:
based 2024-03-05 04:40:19 +10:00
parent adba450752
commit 04bb882e90
5 changed files with 58 additions and 82 deletions

View File

@ -2348,6 +2348,8 @@
<h4 data-i18n="Claude Model">Claude Model</h4>
<select id="model_claude_select">
<optgroup label="Latest">
<option value="claude-3-opus">claude-3-opus</option>
<option value="claude-3-sonnet">claude-3-sonnet</option>
<option value="claude-2">claude-2</option>
<option value="claude-v1">claude-v1</option>
<option value="claude-v1-100k">claude-v1-100k</option>
@ -2355,6 +2357,8 @@
<option value="claude-instant-v1-100k">claude-instant-v1-100k</option>
</optgroup>
<optgroup label="Sub-versions">
<option value="claude-3-opus-20240229">claude-3-opus-20240229</option>
<option value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option>
<option value="claude-2.1">claude-2.1</option>
<option value="claude-2.0">claude-2.0</option>
<option value="claude-v1.3">claude-v1.3</option>

View File

@ -1751,7 +1751,7 @@ async function sendOpenAIRequest(type, messages, signal) {
function getStreamingReply(data) {
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
return data?.completion || '';
return data?.delta?.text || '';
} else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
return data?.candidates?.[0]?.content?.parts?.[0]?.text || '';
} else {
@ -3439,7 +3439,7 @@ async function onModelChange() {
if (oai_settings.max_context_unlocked) {
$('#openai_max_context').attr('max', max_200k);
}
else if (value == 'claude-2.1' || value == 'claude-2') {
else if (value == 'claude-2.1' || value == 'claude-2' || value.startsWith('claude-3')) {
$('#openai_max_context').attr('max', max_200k);
}
else if (value.endsWith('100k') || value.startsWith('claude-2') || value === 'claude-instant-1.2') {

View File

@ -5,7 +5,7 @@ const { Readable } = require('stream');
const { jsonParser } = require('../../express-common');
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
const { convertClaudePrompt, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
const { readSecret, SECRET_KEYS } = require('../secrets');
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
@ -34,45 +34,8 @@ async function sendClaudeRequest(request, response) {
request.socket.on('close', function () {
controller.abort();
});
const isSysPromptSupported = request.body.model === 'claude-2' || request.body.model === 'claude-2.1';
const requestPrompt = convertClaudePrompt(request.body.messages, !request.body.exclude_assistant, request.body.assistant_prefill, isSysPromptSupported, request.body.claude_use_sysprompt, request.body.human_sysprompt_message, request.body.claude_exclude_prefixes);
// Check Claude messages sequence and prefixes presence.
let sequenceError = [];
const sequence = requestPrompt.split('\n').filter(x => x.startsWith('Human:') || x.startsWith('Assistant:'));
const humanFound = sequence.some(line => line.startsWith('Human:'));
const assistantFound = sequence.some(line => line.startsWith('Assistant:'));
let humanErrorCount = 0;
let assistantErrorCount = 0;
for (let i = 0; i < sequence.length - 1; i++) {
if (sequence[i].startsWith(sequence[i + 1].split(':')[0])) {
if (sequence[i].startsWith('Human:')) {
humanErrorCount++;
} else if (sequence[i].startsWith('Assistant:')) {
assistantErrorCount++;
}
}
}
if (!humanFound) {
sequenceError.push(`${divider}\nWarning: No 'Human:' prefix found in the prompt.\n${divider}`);
}
if (!assistantFound) {
sequenceError.push(`${divider}\nWarning: No 'Assistant: ' prefix found in the prompt.\n${divider}`);
}
if (sequence[0] && !sequence[0].startsWith('Human:')) {
sequenceError.push(`${divider}\nWarning: The messages sequence should start with 'Human:' prefix.\nMake sure you have '\\n\\nHuman:' prefix at the very beggining of the prompt, or after the system prompt.\n${divider}`);
}
if (humanErrorCount > 0 || assistantErrorCount > 0) {
sequenceError.push(`${divider}\nWarning: Detected incorrect Prefix sequence(s).`);
sequenceError.push(`Incorrect "Human:" prefix(es): ${humanErrorCount}.\nIncorrect "Assistant: " prefix(es): ${assistantErrorCount}.`);
sequenceError.push('Check the prompt above and fix it in the SillyTavern.');
sequenceError.push('\nThe correct sequence in the console should look like this:\n(System prompt msg) <-(for the sysprompt format only, else have \\n\\n above the first human\'s message.)');
sequenceError.push(`\\n + <-----(Each message beginning with the "Assistant:/Human:" prefix must have \\n\\n before it.)\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \\n +\n...\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \n${divider}`);
}
let use_system_prompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
let converted_prompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, use_system_prompt, request.body.human_sysprompt_message);
// Add custom stop sequences
const stopSequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
if (Array.isArray(request.body.stop)) {
@ -80,23 +43,21 @@ async function sendClaudeRequest(request, response) {
}
const requestBody = {
prompt: requestPrompt,
messages: converted_prompt.messages,
model: request.body.model,
max_tokens_to_sample: request.body.max_tokens,
max_tokens: request.body.max_tokens,
stop_sequences: stopSequences,
temperature: request.body.temperature,
top_p: request.body.top_p,
top_k: request.body.top_k,
stream: request.body.stream,
};
if (use_system_prompt) {
requestBody.system = converted_prompt.systemPrompt;
}
console.log('Claude request:', requestBody);
sequenceError.forEach(sequenceError => {
console.log(color.red(sequenceError));
});
const generateResponse = await fetch(apiUrl + '/complete', {
const generateResponse = await fetch(apiUrl + '/messages', {
method: 'POST',
signal: controller.signal,
body: JSON.stringify(requestBody),
@ -118,7 +79,7 @@ async function sendClaudeRequest(request, response) {
}
const generateResponseJson = await generateResponse.json();
const responseText = generateResponseJson.completion;
const responseText = generateResponseJson.content[0].text;
console.log('Claude response:', generateResponseJson);
// Wrap it back to OAI format

View File

@ -74,48 +74,58 @@ function convertClaudePrompt(messages, addAssistantPostfix, addAssistantPrefill,
/**
* Convert ChatML objects into working with Anthropic's new Messaging API.
* @param {object[]} messages Array of messages
* @param {boolean} addAssistantPostfix Add Assistant postfix.
* @param {string} addAssistantPrefill Add Assistant prefill after the assistant postfix.
* @param {boolean} withSysPromptSupport Indicates if the Claude model supports the system prompt format.
* @param {string} addSysHumanMsg Add Human message between system prompt and assistant.
* @param {string} prefillString User determined prefill string
* @param {boolean} useSysPrompt See if we want to use a system prompt
* @param {string} humanMsgFix Add Human message between system prompt and assistant.
*/
function convertClaudeMessages(messages, addAssistantPostfix, addAssistantPrefill, addSysHumanMsg) {
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
let systemPrompt = '';
let i;
for (i = 0; i < messages.length; i++) {
if (messages[i].role !== 'system') {
break;
}
systemPrompt += `${messages[i].content}\n\n`;
}
messages.splice(0, i);
// Check if the first message in the array is of type user, if not, interject with addSysHumanMsg or a blank message.
if (messages.length > 0 && messages[0].role !== 'user') {
messages.unshift({
role: 'user',
content: addSysHumanMsg || '',
});
}
// Now replace all further messages that have the role 'system' with the role 'user'.
function convertClaudeMessages(messages, prefillString, useSysPrompt, humanMsgFix) {
// Since the messaging endpoint only supports user assistant roles in turns, we have to merge messages with the same role if they follow eachother
let mergedMessages = [];
messages.forEach((message) => {
if (mergedMessages.length > 0 && mergedMessages[mergedMessages.length - 1].role === message.role) {
mergedMessages[mergedMessages.length - 1].content += '\n\n' + message.content;
} else {
mergedMessages.push(message);
}
});
let systemPrompt = '';
if (useSysPrompt) {
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
let i;
for (i = 0; i < mergedMessages.length; i++) {
if (mergedMessages[i].role !== 'system') {
break;
}
systemPrompt += `${mergedMessages[i].content}\n\n`;
}
mergedMessages.splice(0, i);
// Check if the first message in the array is of type user, if not, interject with humanMsgFix or a blank message.
if (mergedMessages.length > 0 && mergedMessages[0].role !== 'user') {
mergedMessages.unshift({
role: 'user',
content: humanMsgFix || '',
});
}
}
// Now replace all further messages that have the role 'system' with the role 'user'. (or all if we're not using one)
mergedMessages.forEach((message) => {
if (message.role === 'system') {
message.role = 'user';
}
});
// Postfix and prefill
if (addAssistantPostfix) {
messages.push({
// Shouldn't be conditional anymore, messages api expects the last role to be user unless we're explicitly prefilling
if (prefillString) {
mergedMessages.push({
role: 'assistant',
content: addAssistantPrefill || '',
content: prefillString,
});
}
return { messages: messages, systemPrompt: systemPrompt.trim() };
return { messages: mergedMessages, systemPrompt: systemPrompt.trim() };
}
/**

View File

@ -249,6 +249,7 @@ async function loadClaudeTokenizer(modelPath) {
}
function countClaudeTokens(tokenizer, messages) {
// Should be fine if we use the old conversion method instead of the messages API one i think?
const convertedPrompt = convertClaudePrompt(messages, false, false, false);
// Fallback to strlen estimation