mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-12 10:00:36 +01:00
implement messages api and selector for nuclaude models
This commit is contained in:
parent
adba450752
commit
04bb882e90
@ -2348,6 +2348,8 @@
|
||||
<h4 data-i18n="Claude Model">Claude Model</h4>
|
||||
<select id="model_claude_select">
|
||||
<optgroup label="Latest">
|
||||
<option value="claude-3-opus">claude-3-opus</option>
|
||||
<option value="claude-3-sonnet">claude-3-sonnet</option>
|
||||
<option value="claude-2">claude-2</option>
|
||||
<option value="claude-v1">claude-v1</option>
|
||||
<option value="claude-v1-100k">claude-v1-100k</option>
|
||||
@ -2355,6 +2357,8 @@
|
||||
<option value="claude-instant-v1-100k">claude-instant-v1-100k</option>
|
||||
</optgroup>
|
||||
<optgroup label="Sub-versions">
|
||||
<option value="claude-3-opus-20240229">claude-3-opus-20240229</option>
|
||||
<option value="claude-3-sonnet-20240229">claude-3-sonnet-20240229</option>
|
||||
<option value="claude-2.1">claude-2.1</option>
|
||||
<option value="claude-2.0">claude-2.0</option>
|
||||
<option value="claude-v1.3">claude-v1.3</option>
|
||||
|
@ -1751,7 +1751,7 @@ async function sendOpenAIRequest(type, messages, signal) {
|
||||
|
||||
function getStreamingReply(data) {
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
||||
return data?.completion || '';
|
||||
return data?.delta?.text || '';
|
||||
} else if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
|
||||
return data?.candidates?.[0]?.content?.parts?.[0]?.text || '';
|
||||
} else {
|
||||
@ -3439,7 +3439,7 @@ async function onModelChange() {
|
||||
if (oai_settings.max_context_unlocked) {
|
||||
$('#openai_max_context').attr('max', max_200k);
|
||||
}
|
||||
else if (value == 'claude-2.1' || value == 'claude-2') {
|
||||
else if (value == 'claude-2.1' || value == 'claude-2' || value.startsWith('claude-3')) {
|
||||
$('#openai_max_context').attr('max', max_200k);
|
||||
}
|
||||
else if (value.endsWith('100k') || value.startsWith('claude-2') || value === 'claude-instant-1.2') {
|
||||
|
@ -5,7 +5,7 @@ const { Readable } = require('stream');
|
||||
const { jsonParser } = require('../../express-common');
|
||||
const { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, BISON_SAFETY, OPENROUTER_HEADERS } = require('../../constants');
|
||||
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color } = require('../../util');
|
||||
const { convertClaudePrompt, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
|
||||
const { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
|
||||
|
||||
const { readSecret, SECRET_KEYS } = require('../secrets');
|
||||
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
|
||||
@ -34,45 +34,8 @@ async function sendClaudeRequest(request, response) {
|
||||
request.socket.on('close', function () {
|
||||
controller.abort();
|
||||
});
|
||||
|
||||
const isSysPromptSupported = request.body.model === 'claude-2' || request.body.model === 'claude-2.1';
|
||||
const requestPrompt = convertClaudePrompt(request.body.messages, !request.body.exclude_assistant, request.body.assistant_prefill, isSysPromptSupported, request.body.claude_use_sysprompt, request.body.human_sysprompt_message, request.body.claude_exclude_prefixes);
|
||||
|
||||
// Check Claude messages sequence and prefixes presence.
|
||||
let sequenceError = [];
|
||||
const sequence = requestPrompt.split('\n').filter(x => x.startsWith('Human:') || x.startsWith('Assistant:'));
|
||||
const humanFound = sequence.some(line => line.startsWith('Human:'));
|
||||
const assistantFound = sequence.some(line => line.startsWith('Assistant:'));
|
||||
let humanErrorCount = 0;
|
||||
let assistantErrorCount = 0;
|
||||
|
||||
for (let i = 0; i < sequence.length - 1; i++) {
|
||||
if (sequence[i].startsWith(sequence[i + 1].split(':')[0])) {
|
||||
if (sequence[i].startsWith('Human:')) {
|
||||
humanErrorCount++;
|
||||
} else if (sequence[i].startsWith('Assistant:')) {
|
||||
assistantErrorCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!humanFound) {
|
||||
sequenceError.push(`${divider}\nWarning: No 'Human:' prefix found in the prompt.\n${divider}`);
|
||||
}
|
||||
if (!assistantFound) {
|
||||
sequenceError.push(`${divider}\nWarning: No 'Assistant: ' prefix found in the prompt.\n${divider}`);
|
||||
}
|
||||
if (sequence[0] && !sequence[0].startsWith('Human:')) {
|
||||
sequenceError.push(`${divider}\nWarning: The messages sequence should start with 'Human:' prefix.\nMake sure you have '\\n\\nHuman:' prefix at the very beggining of the prompt, or after the system prompt.\n${divider}`);
|
||||
}
|
||||
if (humanErrorCount > 0 || assistantErrorCount > 0) {
|
||||
sequenceError.push(`${divider}\nWarning: Detected incorrect Prefix sequence(s).`);
|
||||
sequenceError.push(`Incorrect "Human:" prefix(es): ${humanErrorCount}.\nIncorrect "Assistant: " prefix(es): ${assistantErrorCount}.`);
|
||||
sequenceError.push('Check the prompt above and fix it in the SillyTavern.');
|
||||
sequenceError.push('\nThe correct sequence in the console should look like this:\n(System prompt msg) <-(for the sysprompt format only, else have \\n\\n above the first human\'s message.)');
|
||||
sequenceError.push(`\\n + <-----(Each message beginning with the "Assistant:/Human:" prefix must have \\n\\n before it.)\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \\n +\n...\n\\n +\nHuman: \\n +\n\\n +\nAssistant: \n${divider}`);
|
||||
}
|
||||
|
||||
let use_system_prompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt;
|
||||
let converted_prompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, use_system_prompt, request.body.human_sysprompt_message);
|
||||
// Add custom stop sequences
|
||||
const stopSequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
|
||||
if (Array.isArray(request.body.stop)) {
|
||||
@ -80,23 +43,21 @@ async function sendClaudeRequest(request, response) {
|
||||
}
|
||||
|
||||
const requestBody = {
|
||||
prompt: requestPrompt,
|
||||
messages: converted_prompt.messages,
|
||||
model: request.body.model,
|
||||
max_tokens_to_sample: request.body.max_tokens,
|
||||
max_tokens: request.body.max_tokens,
|
||||
stop_sequences: stopSequences,
|
||||
temperature: request.body.temperature,
|
||||
top_p: request.body.top_p,
|
||||
top_k: request.body.top_k,
|
||||
stream: request.body.stream,
|
||||
};
|
||||
|
||||
if (use_system_prompt) {
|
||||
requestBody.system = converted_prompt.systemPrompt;
|
||||
}
|
||||
console.log('Claude request:', requestBody);
|
||||
|
||||
sequenceError.forEach(sequenceError => {
|
||||
console.log(color.red(sequenceError));
|
||||
});
|
||||
|
||||
const generateResponse = await fetch(apiUrl + '/complete', {
|
||||
const generateResponse = await fetch(apiUrl + '/messages', {
|
||||
method: 'POST',
|
||||
signal: controller.signal,
|
||||
body: JSON.stringify(requestBody),
|
||||
@ -118,7 +79,7 @@ async function sendClaudeRequest(request, response) {
|
||||
}
|
||||
|
||||
const generateResponseJson = await generateResponse.json();
|
||||
const responseText = generateResponseJson.completion;
|
||||
const responseText = generateResponseJson.content[0].text;
|
||||
console.log('Claude response:', generateResponseJson);
|
||||
|
||||
// Wrap it back to OAI format
|
||||
|
@ -74,48 +74,58 @@ function convertClaudePrompt(messages, addAssistantPostfix, addAssistantPrefill,
|
||||
/**
|
||||
* Convert ChatML objects into working with Anthropic's new Messaging API.
|
||||
* @param {object[]} messages Array of messages
|
||||
* @param {boolean} addAssistantPostfix Add Assistant postfix.
|
||||
* @param {string} addAssistantPrefill Add Assistant prefill after the assistant postfix.
|
||||
* @param {boolean} withSysPromptSupport Indicates if the Claude model supports the system prompt format.
|
||||
* @param {string} addSysHumanMsg Add Human message between system prompt and assistant.
|
||||
* @param {string} prefillString User determined prefill string
|
||||
* @param {boolean} useSysPrompt See if we want to use a system prompt
|
||||
* @param {string} humanMsgFix Add Human message between system prompt and assistant.
|
||||
*/
|
||||
function convertClaudeMessages(messages, addAssistantPostfix, addAssistantPrefill, addSysHumanMsg) {
|
||||
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
|
||||
function convertClaudeMessages(messages, prefillString, useSysPrompt, humanMsgFix) {
|
||||
// Since the messaging endpoint only supports user assistant roles in turns, we have to merge messages with the same role if they follow eachother
|
||||
let mergedMessages = [];
|
||||
messages.forEach((message) => {
|
||||
if (mergedMessages.length > 0 && mergedMessages[mergedMessages.length - 1].role === message.role) {
|
||||
mergedMessages[mergedMessages.length - 1].content += '\n\n' + message.content;
|
||||
} else {
|
||||
mergedMessages.push(message);
|
||||
}
|
||||
});
|
||||
|
||||
let systemPrompt = '';
|
||||
if (useSysPrompt) {
|
||||
// Collect all the system messages up until the first instance of a non-system message, and then remove them from the messages array.
|
||||
let i;
|
||||
for (i = 0; i < messages.length; i++) {
|
||||
if (messages[i].role !== 'system') {
|
||||
for (i = 0; i < mergedMessages.length; i++) {
|
||||
if (mergedMessages[i].role !== 'system') {
|
||||
break;
|
||||
}
|
||||
systemPrompt += `${messages[i].content}\n\n`;
|
||||
systemPrompt += `${mergedMessages[i].content}\n\n`;
|
||||
}
|
||||
|
||||
messages.splice(0, i);
|
||||
mergedMessages.splice(0, i);
|
||||
|
||||
// Check if the first message in the array is of type user, if not, interject with addSysHumanMsg or a blank message.
|
||||
if (messages.length > 0 && messages[0].role !== 'user') {
|
||||
messages.unshift({
|
||||
// Check if the first message in the array is of type user, if not, interject with humanMsgFix or a blank message.
|
||||
if (mergedMessages.length > 0 && mergedMessages[0].role !== 'user') {
|
||||
mergedMessages.unshift({
|
||||
role: 'user',
|
||||
content: addSysHumanMsg || '',
|
||||
content: humanMsgFix || '',
|
||||
});
|
||||
}
|
||||
|
||||
// Now replace all further messages that have the role 'system' with the role 'user'.
|
||||
messages.forEach((message) => {
|
||||
}
|
||||
// Now replace all further messages that have the role 'system' with the role 'user'. (or all if we're not using one)
|
||||
mergedMessages.forEach((message) => {
|
||||
if (message.role === 'system') {
|
||||
message.role = 'user';
|
||||
}
|
||||
});
|
||||
|
||||
// Postfix and prefill
|
||||
if (addAssistantPostfix) {
|
||||
messages.push({
|
||||
// Shouldn't be conditional anymore, messages api expects the last role to be user unless we're explicitly prefilling
|
||||
if (prefillString) {
|
||||
mergedMessages.push({
|
||||
role: 'assistant',
|
||||
content: addAssistantPrefill || '',
|
||||
content: prefillString,
|
||||
});
|
||||
}
|
||||
|
||||
return { messages: messages, systemPrompt: systemPrompt.trim() };
|
||||
return { messages: mergedMessages, systemPrompt: systemPrompt.trim() };
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -249,6 +249,7 @@ async function loadClaudeTokenizer(modelPath) {
|
||||
}
|
||||
|
||||
function countClaudeTokens(tokenizer, messages) {
|
||||
// Should be fine if we use the old conversion method instead of the messages API one i think?
|
||||
const convertedPrompt = convertClaudePrompt(messages, false, false, false);
|
||||
|
||||
// Fallback to strlen estimation
|
||||
|
Loading…
x
Reference in New Issue
Block a user