From 92bd766bcb0de3342f44c6ef20165b4580bdb740 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:33:52 -0500 Subject: [PATCH 01/10] Rename chat completions endpoints OpenAI calls this the "Chat Completions API", in contrast to their previous "Text Completions API", so that's what I'm naming it; both because other services besides OpenAI implement it, and to avoid confusion with the existing /api/openai route used for OpenAI extras. --- public/scripts/openai.js | 6 +++--- server.js | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index a9bc5e304..3e8447cb4 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1556,7 +1556,7 @@ async function sendOpenAIRequest(type, messages, signal) { generate_data['seed'] = oai_settings.seed; } - const generate_url = '/generate_openai'; + const generate_url = '/api/backends/chat-completions/generate'; const response = await fetch(generate_url, { method: 'POST', body: JSON.stringify(generate_data), @@ -1646,7 +1646,7 @@ async function calculateLogitBias() { let result = {}; try { - const reply = await fetch(`/openai_bias?model=${getTokenizerModel()}`, { + const reply = await fetch(`/api/backends/chat-completions/bias?model=${getTokenizerModel()}`, { method: 'POST', headers: getRequestHeaders(), body, @@ -2439,7 +2439,7 @@ async function getStatusOpen() { } try { - const response = await fetch('/getstatus_openai', { + const response = await fetch('/api/backends/chat-completions/status', { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify(data), diff --git a/server.js b/server.js index 2374df5a2..6f1c7c8c3 100644 --- a/server.js +++ b/server.js @@ -626,7 +626,7 @@ function cleanUploads() { } /* OpenAI */ -app.post('/getstatus_openai', jsonParser, async function (request, response_getstatus_openai) { +app.post('/api/backends/chat-completions/status', jsonParser, async function (request, response_getstatus_openai) { if (!request.body) return response_getstatus_openai.sendStatus(400); let api_url; @@ -702,7 +702,7 @@ app.post('/getstatus_openai', jsonParser, async function (request, response_gets } }); -app.post('/openai_bias', jsonParser, async function (request, response) { +app.post('/api/backends/chat-completions/bias', jsonParser, async function (request, response) { if (!request.body || !Array.isArray(request.body)) return response.sendStatus(400); @@ -1067,7 +1067,7 @@ async function sendPalmRequest(request, response) { } } -app.post('/generate_openai', jsonParser, function (request, response_generate_openai) { +app.post('/api/backends/chat-completions/generate', jsonParser, function (request, response_generate_openai) { if (!request.body) return response_generate_openai.status(400).send({ error: true }); switch (request.body.chat_completion_source) { From dba66e756a5436cd8dbd3976a271bedd63e491ca Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:50:43 -0500 Subject: [PATCH 02/10] Move chat completions API endpoints to module --- server.js | 619 +----------------- src/chat-completion.js | 77 --- src/endpoints/backends/chat-completions.js | 700 +++++++++++++++++++++ src/endpoints/tokenizers.js | 2 +- 4 files changed, 706 insertions(+), 692 deletions(-) delete mode 100644 src/chat-completion.js create mode 100644 src/endpoints/backends/chat-completions.js diff --git a/server.js b/server.js index 6f1c7c8c3..af0f89a5e 100644 --- a/server.js +++ b/server.js @@ -48,7 +48,6 @@ const { getVersion, getConfigValue, color, - uuidv4, tryParse, clientRelativePath, removeFileExtension, @@ -58,8 +57,7 @@ const { forwardFetchResponse, } = require('./src/util'); const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); -const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); -const { convertClaudePrompt } = require('./src/chat-completion'); +const { loadTokenizers } = require('./src/endpoints/tokenizers'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -127,11 +125,8 @@ const autorun = (getConfigValue('autorun', false) || cliArguments.autorun) && !c const enableExtensions = getConfigValue('enableExtensions', true); const listen = getConfigValue('listen', false); -const API_OPENAI = 'https://api.openai.com/v1'; -const API_CLAUDE = 'https://api.anthropic.com/v1'; - const SETTINGS_FILE = './public/settings.json'; -const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); +const { DIRECTORIES, UPLOADS_PATH, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); // CORS Settings // const CORS = cors({ @@ -625,223 +620,6 @@ function cleanUploads() { } } -/* OpenAI */ -app.post('/api/backends/chat-completions/status', jsonParser, async function (request, response_getstatus_openai) { - if (!request.body) return response_getstatus_openai.sendStatus(400); - - let api_url; - let api_key_openai; - let headers; - - if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); - headers = {}; - } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); - // OpenRouter needs to pass the referer: https://openrouter.ai/docs - headers = { 'HTTP-Referer': request.headers.referer }; - } - - if (!api_key_openai && !request.body.reverse_proxy) { - console.log('OpenAI API key is missing.'); - return response_getstatus_openai.status(400).send({ error: true }); - } - - try { - const response = await fetch(api_url + '/models', { - method: 'GET', - headers: { - 'Authorization': 'Bearer ' + api_key_openai, - ...headers, - }, - }); - - if (response.ok) { - const data = await response.json(); - response_getstatus_openai.send(data); - - if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { - let models = []; - - data.data.forEach(model => { - const context_length = model.context_length; - const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); - const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); - models[model.id] = { - tokens_per_dollar: tokens_rounded + 'k', - context_length: context_length, - }; - }); - - console.log('Available OpenRouter models:', models); - } else { - const models = data?.data; - - if (Array.isArray(models)) { - const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); - console.log('Available OpenAI models:', modelIds); - } else { - console.log('OpenAI endpoint did not return a list of models.'); - } - } - } - else { - console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.'); - response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); - } - } catch (e) { - console.error(e); - - if (!response_getstatus_openai.headersSent) { - response_getstatus_openai.send({ error: true }); - } else { - response_getstatus_openai.end(); - } - } -}); - -app.post('/api/backends/chat-completions/bias', jsonParser, async function (request, response) { - if (!request.body || !Array.isArray(request.body)) - return response.sendStatus(400); - - try { - const result = {}; - const model = getTokenizerModel(String(request.query.model || '')); - - // no bias for claude - if (model == 'claude') { - return response.send(result); - } - - let encodeFunction; - - if (sentencepieceTokenizers.includes(model)) { - const tokenizer = getSentencepiceTokenizer(model); - const instance = await tokenizer?.get(); - encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text)); - } else { - const tokenizer = getTiktokenTokenizer(model); - encodeFunction = (tokenizer.encode.bind(tokenizer)); - } - - for (const entry of request.body) { - if (!entry || !entry.text) { - continue; - } - - try { - const tokens = getEntryTokens(entry.text, encodeFunction); - - for (const token of tokens) { - result[token] = entry.value; - } - } catch { - console.warn('Tokenizer failed to encode:', entry.text); - } - } - - // not needed for cached tokenizers - //tokenizer.free(); - return response.send(result); - - /** - * Gets tokenids for a given entry - * @param {string} text Entry text - * @param {(string) => Uint32Array} encode Function to encode text to token ids - * @returns {Uint32Array} Array of token ids - */ - function getEntryTokens(text, encode) { - // Get raw token ids from JSON array - if (text.trim().startsWith('[') && text.trim().endsWith(']')) { - try { - const json = JSON.parse(text); - if (Array.isArray(json) && json.every(x => typeof x === 'number')) { - return new Uint32Array(json); - } - } catch { - // ignore - } - } - - // Otherwise, get token ids from tokenizer - return encode(text); - } - } catch (error) { - console.error(error); - return response.send({}); - } -}); - -function convertChatMLPrompt(messages) { - if (typeof messages === 'string') { - return messages; - } - - const messageStrings = []; - messages.forEach(m => { - if (m.role === 'system' && m.name === undefined) { - messageStrings.push('System: ' + m.content); - } - else if (m.role === 'system' && m.name !== undefined) { - messageStrings.push(m.name + ': ' + m.content); - } - else { - messageStrings.push(m.role + ': ' + m.content); - } - }); - return messageStrings.join('\n') + '\nassistant:'; -} - -async function sendScaleRequest(request, response) { - - const api_url = new URL(request.body.api_url_scale).toString(); - const api_key_scale = readSecret(SECRET_KEYS.SCALE); - - if (!api_key_scale) { - console.log('Scale API key is missing.'); - return response.status(400).send({ error: true }); - } - - const requestPrompt = convertChatMLPrompt(request.body.messages); - console.log('Scale request:', requestPrompt); - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - const generateResponse = await fetch(api_url, { - method: 'POST', - body: JSON.stringify({ input: { input: requestPrompt } }), - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Basic ${api_key_scale}`, - }, - timeout: 0, - }); - - if (!generateResponse.ok) { - console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - console.log('Scale response:', generateResponseJson); - - const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; - return response.send(reply); - } catch (error) { - console.log(error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - app.post('/generate_altscale', jsonParser, function (request, response_generate_scale) { if (!request.body) return response_generate_scale.sendStatus(400); @@ -908,396 +686,6 @@ app.post('/generate_altscale', jsonParser, function (request, response_generate_ }); -/** - * @param {express.Request} request - * @param {express.Response} response - */ -async function sendClaudeRequest(request, response) { - - const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); - const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - - if (!api_key_claude) { - console.log('Claude API key is missing.'); - return response.status(400).send({ error: true }); - } - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1'; - let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt); - - if (request.body.assistant_prefill && !request.body.exclude_assistant) { - requestPrompt += request.body.assistant_prefill; - } - - console.log('Claude request:', requestPrompt); - const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:']; - - // Add custom stop sequences - if (Array.isArray(request.body.stop)) { - stop_sequences.push(...request.body.stop); - } - - const generateResponse = await fetch(api_url + '/complete', { - method: 'POST', - signal: controller.signal, - body: JSON.stringify({ - prompt: requestPrompt, - model: request.body.model, - max_tokens_to_sample: request.body.max_tokens, - stop_sequences: stop_sequences, - temperature: request.body.temperature, - top_p: request.body.top_p, - top_k: request.body.top_k, - stream: request.body.stream, - }), - headers: { - 'Content-Type': 'application/json', - 'anthropic-version': '2023-06-01', - 'x-api-key': api_key_claude, - }, - timeout: 0, - }); - - if (request.body.stream) { - // Pipe remote SSE stream to Express response - forwardFetchResponse(generateResponse, response); - } else { - if (!generateResponse.ok) { - console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson.completion; - console.log('Claude response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); - } - } catch (error) { - console.log('Error communicating with Claude: ', error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -/** - * @param {express.Request} request - * @param {express.Response} response - */ -async function sendPalmRequest(request, response) { - const api_key_palm = readSecret(SECRET_KEYS.PALM); - - if (!api_key_palm) { - console.log('Palm API key is missing.'); - return response.status(400).send({ error: true }); - } - - const body = { - prompt: { - text: request.body.messages, - }, - stopSequences: request.body.stop, - safetySettings: PALM_SAFETY, - temperature: request.body.temperature, - topP: request.body.top_p, - topK: request.body.top_k || undefined, - maxOutputTokens: request.body.max_tokens, - candidate_count: 1, - }; - - console.log('Palm request:', body); - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { - body: JSON.stringify(body), - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - signal: controller.signal, - timeout: 0, - }); - - if (!generateResponse.ok) { - console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates[0]?.output; - - if (!responseText) { - console.log('Palm API returned no response', generateResponseJson); - let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; - - // Check for filters - if (generateResponseJson?.filters[0]?.message) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; - } - - return response.send({ error: { message } }); - } - - console.log('Palm response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); - } catch (error) { - console.log('Error communicating with Palm API: ', error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -app.post('/api/backends/chat-completions/generate', jsonParser, function (request, response_generate_openai) { - if (!request.body) return response_generate_openai.status(400).send({ error: true }); - - switch (request.body.chat_completion_source) { - case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); - } - - let api_url; - let api_key_openai; - let headers; - let bodyParams; - - if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); - headers = {}; - bodyParams = {}; - - if (getConfigValue('openai.randomizeUserId', false)) { - bodyParams['user'] = uuidv4(); - } - } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); - // OpenRouter needs to pass the referer: https://openrouter.ai/docs - headers = { 'HTTP-Referer': request.headers.referer }; - bodyParams = { 'transforms': ['middle-out'] }; - - if (request.body.use_fallback) { - bodyParams['route'] = 'fallback'; - } - } - - if (!api_key_openai && !request.body.reverse_proxy) { - console.log('OpenAI API key is missing.'); - return response_generate_openai.status(400).send({ error: true }); - } - - // Add custom stop sequences - if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { - bodyParams['stop'] = request.body.stop; - } - - const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; - const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; - const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? - `${api_url}/completions` : - `${api_url}/chat/completions`; - - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - /** @type {import('node-fetch').RequestInit} */ - const config = { - method: 'post', - headers: { - 'Content-Type': 'application/json', - 'Authorization': 'Bearer ' + api_key_openai, - ...headers, - }, - body: JSON.stringify({ - 'messages': isTextCompletion === false ? request.body.messages : undefined, - 'prompt': isTextCompletion === true ? textPrompt : undefined, - 'model': request.body.model, - 'temperature': request.body.temperature, - 'max_tokens': request.body.max_tokens, - 'stream': request.body.stream, - 'presence_penalty': request.body.presence_penalty, - 'frequency_penalty': request.body.frequency_penalty, - 'top_p': request.body.top_p, - 'top_k': request.body.top_k, - 'stop': isTextCompletion === false ? request.body.stop : undefined, - 'logit_bias': request.body.logit_bias, - 'seed': request.body.seed, - ...bodyParams, - }), - signal: controller.signal, - timeout: 0, - }; - - console.log(JSON.parse(String(config.body))); - - makeRequest(config, response_generate_openai, request); - - /** - * - * @param {*} config - * @param {express.Response} response_generate_openai - * @param {express.Request} request - * @param {Number} retries - * @param {Number} timeout - */ - async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { - try { - const fetchResponse = await fetch(endpointUrl, config); - - if (request.body.stream) { - console.log('Streaming request in progress'); - forwardFetchResponse(fetchResponse, response_generate_openai); - return; - } - - if (fetchResponse.ok) { - let json = await fetchResponse.json(); - response_generate_openai.send(json); - console.log(json); - console.log(json?.choices[0]?.message); - } else if (fetchResponse.status === 429 && retries > 0) { - console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); - setTimeout(() => { - timeout *= 2; - makeRequest(config, response_generate_openai, request, retries - 1, timeout); - }, timeout); - } else { - await handleErrorResponse(fetchResponse); - } - } catch (error) { - console.log('Generation failed', error); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: true }); - } else { - response_generate_openai.end(); - } - } - } - - async function handleErrorResponse(response) { - const responseText = await response.text(); - const errorData = tryParse(responseText); - - const statusMessages = { - 400: 'Bad request', - 401: 'Unauthorized', - 402: 'Credit limit reached', - 403: 'Forbidden', - 404: 'Not found', - 429: 'Too many requests', - 451: 'Unavailable for legal reasons', - 502: 'Bad gateway', - }; - - const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; - const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; - console.log(message); - - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: { message }, quota_error: quota_error }); - } else if (!response_generate_openai.writableEnded) { - response_generate_openai.write(response); - } else { - response_generate_openai.end(); - } - } -}); - -async function sendAI21Request(request, response) { - if (!request.body) return response.sendStatus(400); - const controller = new AbortController(); - console.log(request.body.messages); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - const options = { - method: 'POST', - headers: { - accept: 'application/json', - 'content-type': 'application/json', - Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`, - }, - body: JSON.stringify({ - numResults: 1, - maxTokens: request.body.max_tokens, - minTokens: 0, - temperature: request.body.temperature, - topP: request.body.top_p, - stopSequences: request.body.stop_tokens, - topKReturn: request.body.top_k, - frequencyPenalty: { - scale: request.body.frequency_penalty * 100, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - presencePenalty: { - scale: request.body.presence_penalty, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - countPenalty: { - scale: request.body.count_pen, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - prompt: request.body.messages, - }), - signal: controller.signal, - }; - - fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) - .then(r => r.json()) - .then(r => { - if (r.completions === undefined) { - console.log(r); - } else { - console.log(r.completions[0].data.text); - } - const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] }; - return response.send(reply); - }) - .catch(err => { - console.error(err); - return response.send({ error: true }); - }); - -} - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. @@ -1443,6 +831,9 @@ app.use('/api/backends/text-completions', require('./src/endpoints/backends/text // KoboldAI app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').router); +// OpenAI chat completions +app.use('/api/backends/chat-completions', require('./src/endpoints/backends/chat-completions').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/chat-completion.js b/src/chat-completion.js deleted file mode 100644 index 4fc21a550..000000000 --- a/src/chat-completion.js +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Convert a prompt from the ChatML objects to the format used by Claude. - * @param {object[]} messages Array of messages - * @param {boolean} addHumanPrefix Add Human prefix - * @param {boolean} addAssistantPostfix Add Assistant postfix - * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " - * @returns {string} Prompt for Claude - * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). - */ -function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { - // Claude doesn't support message names, so we'll just add them to the message content. - for (const message of messages) { - if (message.name && message.role !== 'system') { - message.content = message.name + ': ' + message.content; - delete message.name; - } - } - - let systemPrompt = ''; - if (withSystemPrompt) { - let lastSystemIdx = -1; - - for (let i = 0; i < messages.length - 1; i++) { - const message = messages[i]; - if (message.role === 'system' && !message.name) { - systemPrompt += message.content + '\n\n'; - } else { - lastSystemIdx = i - 1; - break; - } - } - if (lastSystemIdx >= 0) { - messages.splice(0, lastSystemIdx + 1); - } - } - - let requestPrompt = messages.map((v) => { - let prefix = ''; - switch (v.role) { - case 'assistant': - prefix = '\n\nAssistant: '; - break; - case 'user': - prefix = '\n\nHuman: '; - break; - case 'system': - // According to the Claude docs, H: and A: should be used for example conversations. - if (v.name === 'example_assistant') { - prefix = '\n\nA: '; - } else if (v.name === 'example_user') { - prefix = '\n\nH: '; - } else { - prefix = '\n\n'; - } - break; - } - return prefix + v.content; - }).join(''); - - if (addHumanPrefix) { - requestPrompt = '\n\nHuman: ' + requestPrompt; - } - - if (addAssistantPostfix) { - requestPrompt = requestPrompt + '\n\nAssistant: '; - } - - if (withSystemPrompt) { - requestPrompt = systemPrompt + requestPrompt; - } - - return requestPrompt; -} - -module.exports = { - convertClaudePrompt, -}; diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js new file mode 100644 index 000000000..ec7dafe5c --- /dev/null +++ b/src/endpoints/backends/chat-completions.js @@ -0,0 +1,700 @@ +const express = require('express'); +const fetch = require('node-fetch').default; + +const { jsonParser } = require('../../express-common'); +const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants'); +const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util'); + +const { readSecret, SECRET_KEYS } = require('../secrets'); +const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); + +const API_OPENAI = 'https://api.openai.com/v1'; +const API_CLAUDE = 'https://api.anthropic.com/v1'; + +/** + * Convert a prompt from the ChatML objects to the format used by Claude. + * @param {object[]} messages Array of messages + * @param {boolean} addHumanPrefix Add Human prefix + * @param {boolean} addAssistantPostfix Add Assistant postfix + * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " + * @returns {string} Prompt for Claude + * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). + */ +function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { + // Claude doesn't support message names, so we'll just add them to the message content. + for (const message of messages) { + if (message.name && message.role !== 'system') { + message.content = message.name + ': ' + message.content; + delete message.name; + } + } + + let systemPrompt = ''; + if (withSystemPrompt) { + let lastSystemIdx = -1; + + for (let i = 0; i < messages.length - 1; i++) { + const message = messages[i]; + if (message.role === 'system' && !message.name) { + systemPrompt += message.content + '\n\n'; + } else { + lastSystemIdx = i - 1; + break; + } + } + if (lastSystemIdx >= 0) { + messages.splice(0, lastSystemIdx + 1); + } + } + + let requestPrompt = messages.map((v) => { + let prefix = ''; + switch (v.role) { + case 'assistant': + prefix = '\n\nAssistant: '; + break; + case 'user': + prefix = '\n\nHuman: '; + break; + case 'system': + // According to the Claude docs, H: and A: should be used for example conversations. + if (v.name === 'example_assistant') { + prefix = '\n\nA: '; + } else if (v.name === 'example_user') { + prefix = '\n\nH: '; + } else { + prefix = '\n\n'; + } + break; + } + return prefix + v.content; + }).join(''); + + if (addHumanPrefix) { + requestPrompt = '\n\nHuman: ' + requestPrompt; + } + + if (addAssistantPostfix) { + requestPrompt = requestPrompt + '\n\nAssistant: '; + } + + if (withSystemPrompt) { + requestPrompt = systemPrompt + requestPrompt; + } + + return requestPrompt; +} + +/** + * @param {express.Request} request + * @param {express.Response} response + */ +async function sendClaudeRequest(request, response) { + + const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); + const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); + + if (!api_key_claude) { + console.log('Claude API key is missing.'); + return response.status(400).send({ error: true }); + } + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1'; + let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt); + + if (request.body.assistant_prefill && !request.body.exclude_assistant) { + requestPrompt += request.body.assistant_prefill; + } + + console.log('Claude request:', requestPrompt); + const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:']; + + // Add custom stop sequences + if (Array.isArray(request.body.stop)) { + stop_sequences.push(...request.body.stop); + } + + const generateResponse = await fetch(api_url + '/complete', { + method: 'POST', + signal: controller.signal, + body: JSON.stringify({ + prompt: requestPrompt, + model: request.body.model, + max_tokens_to_sample: request.body.max_tokens, + stop_sequences: stop_sequences, + temperature: request.body.temperature, + top_p: request.body.top_p, + top_k: request.body.top_k, + stream: request.body.stream, + }), + headers: { + 'Content-Type': 'application/json', + 'anthropic-version': '2023-06-01', + 'x-api-key': api_key_claude, + }, + timeout: 0, + }); + + if (request.body.stream) { + // Pipe remote SSE stream to Express response + forwardFetchResponse(generateResponse, response); + } else { + if (!generateResponse.ok) { + console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson.completion; + console.log('Claude response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); + } + } catch (error) { + console.log('Error communicating with Claude: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +function convertChatMLPrompt(messages) { + if (typeof messages === 'string') { + return messages; + } + + const messageStrings = []; + messages.forEach(m => { + if (m.role === 'system' && m.name === undefined) { + messageStrings.push('System: ' + m.content); + } + else if (m.role === 'system' && m.name !== undefined) { + messageStrings.push(m.name + ': ' + m.content); + } + else { + messageStrings.push(m.role + ': ' + m.content); + } + }); + return messageStrings.join('\n') + '\nassistant:'; +} + +async function sendScaleRequest(request, response) { + + const api_url = new URL(request.body.api_url_scale).toString(); + const api_key_scale = readSecret(SECRET_KEYS.SCALE); + + if (!api_key_scale) { + console.log('Scale API key is missing.'); + return response.status(400).send({ error: true }); + } + + const requestPrompt = convertChatMLPrompt(request.body.messages); + console.log('Scale request:', requestPrompt); + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + const generateResponse = await fetch(api_url, { + method: 'POST', + body: JSON.stringify({ input: { input: requestPrompt } }), + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Basic ${api_key_scale}`, + }, + timeout: 0, + }); + + if (!generateResponse.ok) { + console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + console.log('Scale response:', generateResponseJson); + + const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; + return response.send(reply); + } catch (error) { + console.log(error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +/** + * @param {express.Request} request + * @param {express.Response} response + */ +async function sendPalmRequest(request, response) { + const api_key_palm = readSecret(SECRET_KEYS.PALM); + + if (!api_key_palm) { + console.log('Palm API key is missing.'); + return response.status(400).send({ error: true }); + } + + const body = { + prompt: { + text: request.body.messages, + }, + stopSequences: request.body.stop, + safetySettings: PALM_SAFETY, + temperature: request.body.temperature, + topP: request.body.top_p, + topK: request.body.top_k || undefined, + maxOutputTokens: request.body.max_tokens, + candidate_count: 1, + }; + + console.log('Palm request:', body); + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { + body: JSON.stringify(body), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + signal: controller.signal, + timeout: 0, + }); + + if (!generateResponse.ok) { + console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson?.candidates[0]?.output; + + if (!responseText) { + console.log('Palm API returned no response', generateResponseJson); + let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; + + // Check for filters + if (generateResponseJson?.filters[0]?.message) { + message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; + } + + return response.send({ error: { message } }); + } + + console.log('Palm response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); + } catch (error) { + console.log('Error communicating with Palm API: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +async function sendAI21Request(request, response) { + if (!request.body) return response.sendStatus(400); + const controller = new AbortController(); + console.log(request.body.messages); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + const options = { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`, + }, + body: JSON.stringify({ + numResults: 1, + maxTokens: request.body.max_tokens, + minTokens: 0, + temperature: request.body.temperature, + topP: request.body.top_p, + stopSequences: request.body.stop_tokens, + topKReturn: request.body.top_k, + frequencyPenalty: { + scale: request.body.frequency_penalty * 100, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + presencePenalty: { + scale: request.body.presence_penalty, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + countPenalty: { + scale: request.body.count_pen, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + prompt: request.body.messages, + }), + signal: controller.signal, + }; + + fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) + .then(r => r.json()) + .then(r => { + if (r.completions === undefined) { + console.log(r); + } else { + console.log(r.completions[0].data.text); + } + const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] }; + return response.send(reply); + }) + .catch(err => { + console.error(err); + return response.send({ error: true }); + }); + +} + +const router = express.Router(); + +router.post('/status', jsonParser, async function (request, response_getstatus_openai) { + if (!request.body) return response_getstatus_openai.sendStatus(400); + + let api_url; + let api_key_openai; + let headers; + + if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { + api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + headers = {}; + } else { + api_url = 'https://openrouter.ai/api/v1'; + api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + // OpenRouter needs to pass the referer: https://openrouter.ai/docs + headers = { 'HTTP-Referer': request.headers.referer }; + } + + if (!api_key_openai && !request.body.reverse_proxy) { + console.log('OpenAI API key is missing.'); + return response_getstatus_openai.status(400).send({ error: true }); + } + + try { + const response = await fetch(api_url + '/models', { + method: 'GET', + headers: { + 'Authorization': 'Bearer ' + api_key_openai, + ...headers, + }, + }); + + if (response.ok) { + const data = await response.json(); + response_getstatus_openai.send(data); + + if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { + let models = []; + + data.data.forEach(model => { + const context_length = model.context_length; + const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); + const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); + models[model.id] = { + tokens_per_dollar: tokens_rounded + 'k', + context_length: context_length, + }; + }); + + console.log('Available OpenRouter models:', models); + } else { + const models = data?.data; + + if (Array.isArray(models)) { + const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); + console.log('Available OpenAI models:', modelIds); + } else { + console.log('OpenAI endpoint did not return a list of models.'); + } + } + } + else { + console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.'); + response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); + } + } catch (e) { + console.error(e); + + if (!response_getstatus_openai.headersSent) { + response_getstatus_openai.send({ error: true }); + } else { + response_getstatus_openai.end(); + } + } +}); + +router.post('/bias', jsonParser, async function (request, response) { + if (!request.body || !Array.isArray(request.body)) + return response.sendStatus(400); + + try { + const result = {}; + const model = getTokenizerModel(String(request.query.model || '')); + + // no bias for claude + if (model == 'claude') { + return response.send(result); + } + + let encodeFunction; + + if (sentencepieceTokenizers.includes(model)) { + const tokenizer = getSentencepiceTokenizer(model); + const instance = await tokenizer?.get(); + encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text)); + } else { + const tokenizer = getTiktokenTokenizer(model); + encodeFunction = (tokenizer.encode.bind(tokenizer)); + } + + for (const entry of request.body) { + if (!entry || !entry.text) { + continue; + } + + try { + const tokens = getEntryTokens(entry.text, encodeFunction); + + for (const token of tokens) { + result[token] = entry.value; + } + } catch { + console.warn('Tokenizer failed to encode:', entry.text); + } + } + + // not needed for cached tokenizers + //tokenizer.free(); + return response.send(result); + + /** + * Gets tokenids for a given entry + * @param {string} text Entry text + * @param {(string) => Uint32Array} encode Function to encode text to token ids + * @returns {Uint32Array} Array of token ids + */ + function getEntryTokens(text, encode) { + // Get raw token ids from JSON array + if (text.trim().startsWith('[') && text.trim().endsWith(']')) { + try { + const json = JSON.parse(text); + if (Array.isArray(json) && json.every(x => typeof x === 'number')) { + return new Uint32Array(json); + } + } catch { + // ignore + } + } + + // Otherwise, get token ids from tokenizer + return encode(text); + } + } catch (error) { + console.error(error); + return response.send({}); + } +}); + + +router.post('/generate', jsonParser, function (request, response_generate_openai) { + if (!request.body) return response_generate_openai.status(400).send({ error: true }); + + switch (request.body.chat_completion_source) { + case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); + } + + let api_url; + let api_key_openai; + let headers; + let bodyParams; + + if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { + api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + headers = {}; + bodyParams = {}; + + if (getConfigValue('openai.randomizeUserId', false)) { + bodyParams['user'] = uuidv4(); + } + } else { + api_url = 'https://openrouter.ai/api/v1'; + api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + // OpenRouter needs to pass the referer: https://openrouter.ai/docs + headers = { 'HTTP-Referer': request.headers.referer }; + bodyParams = { 'transforms': ['middle-out'] }; + + if (request.body.use_fallback) { + bodyParams['route'] = 'fallback'; + } + } + + if (!api_key_openai && !request.body.reverse_proxy) { + console.log('OpenAI API key is missing.'); + return response_generate_openai.status(400).send({ error: true }); + } + + // Add custom stop sequences + if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { + bodyParams['stop'] = request.body.stop; + } + + const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; + const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; + const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? + `${api_url}/completions` : + `${api_url}/chat/completions`; + + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + /** @type {import('node-fetch').RequestInit} */ + const config = { + method: 'post', + headers: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + api_key_openai, + ...headers, + }, + body: JSON.stringify({ + 'messages': isTextCompletion === false ? request.body.messages : undefined, + 'prompt': isTextCompletion === true ? textPrompt : undefined, + 'model': request.body.model, + 'temperature': request.body.temperature, + 'max_tokens': request.body.max_tokens, + 'stream': request.body.stream, + 'presence_penalty': request.body.presence_penalty, + 'frequency_penalty': request.body.frequency_penalty, + 'top_p': request.body.top_p, + 'top_k': request.body.top_k, + 'stop': isTextCompletion === false ? request.body.stop : undefined, + 'logit_bias': request.body.logit_bias, + 'seed': request.body.seed, + ...bodyParams, + }), + signal: controller.signal, + timeout: 0, + }; + + console.log(JSON.parse(String(config.body))); + + makeRequest(config, response_generate_openai, request); + + /** + * + * @param {*} config + * @param {express.Response} response_generate_openai + * @param {express.Request} request + * @param {Number} retries + * @param {Number} timeout + */ + async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { + try { + const fetchResponse = await fetch(endpointUrl, config); + + if (request.body.stream) { + console.log('Streaming request in progress'); + forwardFetchResponse(fetchResponse, response_generate_openai); + return; + } + + if (fetchResponse.ok) { + let json = await fetchResponse.json(); + response_generate_openai.send(json); + console.log(json); + console.log(json?.choices[0]?.message); + } else if (fetchResponse.status === 429 && retries > 0) { + console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); + setTimeout(() => { + timeout *= 2; + makeRequest(config, response_generate_openai, request, retries - 1, timeout); + }, timeout); + } else { + await handleErrorResponse(fetchResponse); + } + } catch (error) { + console.log('Generation failed', error); + if (!response_generate_openai.headersSent) { + response_generate_openai.send({ error: true }); + } else { + response_generate_openai.end(); + } + } + } + + async function handleErrorResponse(response) { + const responseText = await response.text(); + const errorData = tryParse(responseText); + + const statusMessages = { + 400: 'Bad request', + 401: 'Unauthorized', + 402: 'Credit limit reached', + 403: 'Forbidden', + 404: 'Not found', + 429: 'Too many requests', + 451: 'Unavailable for legal reasons', + 502: 'Bad gateway', + }; + + const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; + const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; + console.log(message); + + if (!response_generate_openai.headersSent) { + response_generate_openai.send({ error: { message }, quota_error: quota_error }); + } else if (!response_generate_openai.writableEnded) { + response_generate_openai.write(response); + } else { + response_generate_openai.end(); + } + } +}); + +module.exports = { + router, + convertClaudePrompt, +}; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index a81779d97..bf43ef343 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('../chat-completion'); +const { convertClaudePrompt } = require('./textgen/chat-completions'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); From 22e048b5af6fc84aeffbf5fc38814448407d03d4 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:54:47 -0500 Subject: [PATCH 03/10] Rename generate_altscale endpoint --- public/scripts/openai.js | 2 +- server.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/openai.js b/public/scripts/openai.js index 3e8447cb4..b8cdc818f 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1382,7 +1382,7 @@ function openRouterGroupByVendor(array) { } async function sendAltScaleRequest(messages, logit_bias, signal, type) { - const generate_url = '/generate_altscale'; + const generate_url = '/api/backends/scale-alt/generate'; let firstSysMsgs = []; for (let msg of messages) { diff --git a/server.js b/server.js index af0f89a5e..e5d98bcb7 100644 --- a/server.js +++ b/server.js @@ -620,7 +620,7 @@ function cleanUploads() { } } -app.post('/generate_altscale', jsonParser, function (request, response_generate_scale) { +app.post('/api/backends/scale-alt/generate', jsonParser, function (request, response_generate_scale) { if (!request.body) return response_generate_scale.sendStatus(400); fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { From b55ea8df04e50c54e2223b567ad07a5445b7301c Mon Sep 17 00:00:00 2001 From: valadaptive Date: Mon, 11 Dec 2023 23:56:55 -0500 Subject: [PATCH 04/10] Move alt Scale generation to its own module --- server.js | 71 ++------------------------- src/endpoints/backends/scale-alt.js | 76 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 67 deletions(-) create mode 100644 src/endpoints/backends/scale-alt.js diff --git a/server.js b/server.js index e5d98bcb7..dfc013265 100644 --- a/server.js +++ b/server.js @@ -43,7 +43,7 @@ util.inspect.defaultOptions.maxStringLength = null; const basicAuthMiddleware = require('./src/middleware/basicAuthMiddleware'); const { jsonParser, urlencodedParser } = require('./src/express-common.js'); const contentManager = require('./src/endpoints/content-manager'); -const { readSecret, migrateSecrets, SECRET_KEYS } = require('./src/endpoints/secrets'); +const { migrateSecrets } = require('./src/endpoints/secrets'); const { getVersion, getConfigValue, @@ -620,72 +620,6 @@ function cleanUploads() { } } -app.post('/api/backends/scale-alt/generate', jsonParser, function (request, response_generate_scale) { - if (!request.body) return response_generate_scale.sendStatus(400); - - fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`, - }, - body: JSON.stringify({ - json: { - variant: { - name: 'New Variant', - appId: '', - taxonomy: null, - }, - prompt: { - id: '', - template: '{{input}}\n', - exampleVariables: {}, - variablesSourceDataId: null, - systemMessage: request.body.sysprompt, - }, - modelParameters: { - id: '', - modelId: 'GPT4', - modelType: 'OpenAi', - maxTokens: request.body.max_tokens, - temperature: request.body.temp, - stop: 'user:', - suffix: null, - topP: request.body.top_p, - logprobs: null, - logitBias: request.body.logit_bias, - }, - inputs: [ - { - index: '-1', - valueByName: { - input: request.body.prompt, - }, - }, - ], - }, - meta: { - values: { - 'variant.taxonomy': ['undefined'], - 'prompt.variablesSourceDataId': ['undefined'], - 'modelParameters.suffix': ['undefined'], - 'modelParameters.logprobs': ['undefined'], - }, - }, - }), - }) - .then(response => response.json()) - .then(data => { - console.log(data.result.data.json.outputs[0]); - return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); - }) - .catch((error) => { - console.error('Error:', error); - return response_generate_scale.send({ error: true }); - }); - -}); - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. @@ -834,6 +768,9 @@ app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').route // OpenAI chat completions app.use('/api/backends/chat-completions', require('./src/endpoints/backends/chat-completions').router); +// Scale (alt method) +app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/endpoints/backends/scale-alt.js b/src/endpoints/backends/scale-alt.js new file mode 100644 index 000000000..240e169b3 --- /dev/null +++ b/src/endpoints/backends/scale-alt.js @@ -0,0 +1,76 @@ +const express = require('express'); +const fetch = require('node-fetch').default; + +const { jsonParser } = require('../../express-common'); + +const { readSecret, SECRET_KEYS } = require('../secrets'); + +const router = express.Router(); + +router.post('/generate', jsonParser, function (request, response_generate_scale) { + if (!request.body) return response_generate_scale.sendStatus(400); + + fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`, + }, + body: JSON.stringify({ + json: { + variant: { + name: 'New Variant', + appId: '', + taxonomy: null, + }, + prompt: { + id: '', + template: '{{input}}\n', + exampleVariables: {}, + variablesSourceDataId: null, + systemMessage: request.body.sysprompt, + }, + modelParameters: { + id: '', + modelId: 'GPT4', + modelType: 'OpenAi', + maxTokens: request.body.max_tokens, + temperature: request.body.temp, + stop: 'user:', + suffix: null, + topP: request.body.top_p, + logprobs: null, + logitBias: request.body.logit_bias, + }, + inputs: [ + { + index: '-1', + valueByName: { + input: request.body.prompt, + }, + }, + ], + }, + meta: { + values: { + 'variant.taxonomy': ['undefined'], + 'prompt.variablesSourceDataId': ['undefined'], + 'modelParameters.suffix': ['undefined'], + 'modelParameters.logprobs': ['undefined'], + }, + }, + }), + }) + .then(response => response.json()) + .then(data => { + console.log(data.result.data.json.outputs[0]); + return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); + }) + .catch((error) => { + console.error('Error:', error); + return response_generate_scale.send({ error: true }); + }); + +}); + +module.exports = { router }; From 3d8160cf25281ce04b7f2ca87b348feb4dda9d50 Mon Sep 17 00:00:00 2001 From: kingbri Date: Wed, 13 Dec 2023 21:39:07 -0500 Subject: [PATCH 05/10] Server: Update CORS proxy body limit The body-parser middleware only accepted 50mb of data, bump this value to 200mb. Signed-off-by: kingbri --- server.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server.js b/server.js index 2374df5a2..4f734ba78 100644 --- a/server.js +++ b/server.js @@ -222,7 +222,9 @@ if (!cliArguments.disableCsrf) { if (getConfigValue('enableCorsProxy', false) || cliArguments.corsProxy) { const bodyParser = require('body-parser'); - app.use(bodyParser.json()); + app.use(bodyParser.json({ + limit: '200mb', + })); console.log('Enabling CORS proxy'); app.use('/proxy/:url(*)', async (req, res) => { From 348253fd9830d97bc0c193361b159769c7ccd772 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 15:36:44 +0200 Subject: [PATCH 06/10] Fix import path --- src/endpoints/tokenizers.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index bf43ef343..b3ee6a0b8 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('./textgen/chat-completions'); +const { convertClaudePrompt } = require('./backends/chat-completions'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); From 2a5340232d4652a42e7fbfe1868bcd2c0b48ecdd Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:00:17 +0200 Subject: [PATCH 07/10] Move prompt converters to a separate module. Camelcase local variables and add missing JSDocs. --- src/endpoints/backends/chat-completions.js | 225 +++++++-------------- src/endpoints/prompt-converters.js | 103 ++++++++++ src/endpoints/tokenizers.js | 2 +- 3 files changed, 176 insertions(+), 154 deletions(-) create mode 100644 src/endpoints/prompt-converters.js diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index ec7dafe5c..0b08d2700 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -4,6 +4,7 @@ const fetch = require('node-fetch').default; const { jsonParser } = require('../../express-common'); const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants'); const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util'); +const { convertClaudePrompt, convertTextCompletionPrompt } = require('../prompt-converters'); const { readSecret, SECRET_KEYS } = require('../secrets'); const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); @@ -12,89 +13,15 @@ const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; /** - * Convert a prompt from the ChatML objects to the format used by Claude. - * @param {object[]} messages Array of messages - * @param {boolean} addHumanPrefix Add Human prefix - * @param {boolean} addAssistantPostfix Add Assistant postfix - * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " - * @returns {string} Prompt for Claude - * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). - */ -function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { - // Claude doesn't support message names, so we'll just add them to the message content. - for (const message of messages) { - if (message.name && message.role !== 'system') { - message.content = message.name + ': ' + message.content; - delete message.name; - } - } - - let systemPrompt = ''; - if (withSystemPrompt) { - let lastSystemIdx = -1; - - for (let i = 0; i < messages.length - 1; i++) { - const message = messages[i]; - if (message.role === 'system' && !message.name) { - systemPrompt += message.content + '\n\n'; - } else { - lastSystemIdx = i - 1; - break; - } - } - if (lastSystemIdx >= 0) { - messages.splice(0, lastSystemIdx + 1); - } - } - - let requestPrompt = messages.map((v) => { - let prefix = ''; - switch (v.role) { - case 'assistant': - prefix = '\n\nAssistant: '; - break; - case 'user': - prefix = '\n\nHuman: '; - break; - case 'system': - // According to the Claude docs, H: and A: should be used for example conversations. - if (v.name === 'example_assistant') { - prefix = '\n\nA: '; - } else if (v.name === 'example_user') { - prefix = '\n\nH: '; - } else { - prefix = '\n\n'; - } - break; - } - return prefix + v.content; - }).join(''); - - if (addHumanPrefix) { - requestPrompt = '\n\nHuman: ' + requestPrompt; - } - - if (addAssistantPostfix) { - requestPrompt = requestPrompt + '\n\nAssistant: '; - } - - if (withSystemPrompt) { - requestPrompt = systemPrompt + requestPrompt; - } - - return requestPrompt; -} - -/** - * @param {express.Request} request - * @param {express.Response} response + * Sends a request to Claude API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response */ async function sendClaudeRequest(request, response) { + const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); + const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); - const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - - if (!api_key_claude) { + if (!apiKey) { console.log('Claude API key is missing.'); return response.status(400).send({ error: true }); } @@ -121,7 +48,7 @@ async function sendClaudeRequest(request, response) { stop_sequences.push(...request.body.stop); } - const generateResponse = await fetch(api_url + '/complete', { + const generateResponse = await fetch(apiUrl + '/complete', { method: 'POST', signal: controller.signal, body: JSON.stringify({ @@ -137,7 +64,7 @@ async function sendClaudeRequest(request, response) { headers: { 'Content-Type': 'application/json', 'anthropic-version': '2023-06-01', - 'x-api-key': api_key_claude, + 'x-api-key': apiKey, }, timeout: 0, }); @@ -167,37 +94,21 @@ async function sendClaudeRequest(request, response) { } } -function convertChatMLPrompt(messages) { - if (typeof messages === 'string') { - return messages; - } - - const messageStrings = []; - messages.forEach(m => { - if (m.role === 'system' && m.name === undefined) { - messageStrings.push('System: ' + m.content); - } - else if (m.role === 'system' && m.name !== undefined) { - messageStrings.push(m.name + ': ' + m.content); - } - else { - messageStrings.push(m.role + ': ' + m.content); - } - }); - return messageStrings.join('\n') + '\nassistant:'; -} - +/** + * Sends a request to Scale Spellbook API. + * @param {import("express").Request} request Express request + * @param {import("express").Response} response Express response + */ async function sendScaleRequest(request, response) { + const apiUrl = new URL(request.body.api_url_scale).toString(); + const apiKey = readSecret(SECRET_KEYS.SCALE); - const api_url = new URL(request.body.api_url_scale).toString(); - const api_key_scale = readSecret(SECRET_KEYS.SCALE); - - if (!api_key_scale) { + if (!apiKey) { console.log('Scale API key is missing.'); return response.status(400).send({ error: true }); } - const requestPrompt = convertChatMLPrompt(request.body.messages); + const requestPrompt = convertTextCompletionPrompt(request.body.messages); console.log('Scale request:', requestPrompt); try { @@ -207,12 +118,12 @@ async function sendScaleRequest(request, response) { controller.abort(); }); - const generateResponse = await fetch(api_url, { + const generateResponse = await fetch(apiUrl, { method: 'POST', body: JSON.stringify({ input: { input: requestPrompt } }), headers: { 'Content-Type': 'application/json', - 'Authorization': `Basic ${api_key_scale}`, + 'Authorization': `Basic ${apiKey}`, }, timeout: 0, }); @@ -236,8 +147,9 @@ async function sendScaleRequest(request, response) { } /** - * @param {express.Request} request - * @param {express.Response} response + * Sends a request to Google AI API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response */ async function sendPalmRequest(request, response) { const api_key_palm = readSecret(SECRET_KEYS.PALM); @@ -312,6 +224,11 @@ async function sendPalmRequest(request, response) { } } +/** + * Sends a request to Google AI API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response + */ async function sendAI21Request(request, response) { if (!request.body) return response.sendStatus(400); const controller = new AbortController(); @@ -533,24 +450,24 @@ router.post('/bias', jsonParser, async function (request, response) { }); -router.post('/generate', jsonParser, function (request, response_generate_openai) { - if (!request.body) return response_generate_openai.status(400).send({ error: true }); +router.post('/generate', jsonParser, function (request, response) { + if (!request.body) return response.status(400).send({ error: true }); switch (request.body.chat_completion_source) { - case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); + case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response); + case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response); + case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response); + case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response); } - let api_url; - let api_key_openai; + let apiUrl; + let apiKey; let headers; let bodyParams; if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + apiUrl = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); headers = {}; bodyParams = {}; @@ -558,8 +475,8 @@ router.post('/generate', jsonParser, function (request, response_generate_openai bodyParams['user'] = uuidv4(); } } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + apiUrl = 'https://openrouter.ai/api/v1'; + apiKey = readSecret(SECRET_KEYS.OPENROUTER); // OpenRouter needs to pass the referer: https://openrouter.ai/docs headers = { 'HTTP-Referer': request.headers.referer }; bodyParams = { 'transforms': ['middle-out'] }; @@ -569,9 +486,9 @@ router.post('/generate', jsonParser, function (request, response_generate_openai } } - if (!api_key_openai && !request.body.reverse_proxy) { + if (!apiKey && !request.body.reverse_proxy) { console.log('OpenAI API key is missing.'); - return response_generate_openai.status(400).send({ error: true }); + return response.status(400).send({ error: true }); } // Add custom stop sequences @@ -580,10 +497,10 @@ router.post('/generate', jsonParser, function (request, response_generate_openai } const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; - const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; + const textPrompt = isTextCompletion ? convertTextCompletionPrompt(request.body.messages) : ''; const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? - `${api_url}/completions` : - `${api_url}/chat/completions`; + `${apiUrl}/completions` : + `${apiUrl}/chat/completions`; const controller = new AbortController(); request.socket.removeAllListeners('close'); @@ -596,7 +513,7 @@ router.post('/generate', jsonParser, function (request, response_generate_openai method: 'post', headers: { 'Content-Type': 'application/json', - 'Authorization': 'Bearer ' + api_key_openai, + 'Authorization': 'Bearer ' + apiKey, ...headers, }, body: JSON.stringify({ @@ -621,52 +538,55 @@ router.post('/generate', jsonParser, function (request, response_generate_openai console.log(JSON.parse(String(config.body))); - makeRequest(config, response_generate_openai, request); + makeRequest(config, response, request); /** - * - * @param {*} config - * @param {express.Response} response_generate_openai - * @param {express.Request} request - * @param {Number} retries - * @param {Number} timeout + * Makes a fetch request to the OpenAI API endpoint. + * @param {import('node-fetch').RequestInit} config Fetch config + * @param {express.Response} response Express response + * @param {express.Request} request Express request + * @param {Number} retries Number of retries left + * @param {Number} timeout Request timeout in ms */ - async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { + async function makeRequest(config, response, request, retries = 5, timeout = 5000) { try { const fetchResponse = await fetch(endpointUrl, config); if (request.body.stream) { console.log('Streaming request in progress'); - forwardFetchResponse(fetchResponse, response_generate_openai); + forwardFetchResponse(fetchResponse, response); return; } if (fetchResponse.ok) { let json = await fetchResponse.json(); - response_generate_openai.send(json); + response.send(json); console.log(json); console.log(json?.choices[0]?.message); } else if (fetchResponse.status === 429 && retries > 0) { console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); setTimeout(() => { timeout *= 2; - makeRequest(config, response_generate_openai, request, retries - 1, timeout); + makeRequest(config, response, request, retries - 1, timeout); }, timeout); } else { await handleErrorResponse(fetchResponse); } } catch (error) { console.log('Generation failed', error); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: true }); + if (!response.headersSent) { + response.send({ error: true }); } else { - response_generate_openai.end(); + response.end(); } } } - async function handleErrorResponse(response) { - const responseText = await response.text(); + /** + * @param {import("node-fetch").Response} errorResponse + */ + async function handleErrorResponse(errorResponse) { + const responseText = await errorResponse.text(); const errorData = tryParse(responseText); const statusMessages = { @@ -680,21 +600,20 @@ router.post('/generate', jsonParser, function (request, response_generate_openai 502: 'Bad gateway', }; - const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; - const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; + const message = errorData?.error?.message || statusMessages[errorResponse.status] || 'Unknown error occurred'; + const quota_error = errorResponse.status === 429 && errorData?.error?.type === 'insufficient_quota'; console.log(message); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: { message }, quota_error: quota_error }); - } else if (!response_generate_openai.writableEnded) { - response_generate_openai.write(response); + if (!response.headersSent) { + response.send({ error: { message }, quota_error: quota_error }); + } else if (!response.writableEnded) { + response.write(errorResponse); } else { - response_generate_openai.end(); + response.end(); } } }); module.exports = { router, - convertClaudePrompt, }; diff --git a/src/endpoints/prompt-converters.js b/src/endpoints/prompt-converters.js new file mode 100644 index 000000000..4ffdb459e --- /dev/null +++ b/src/endpoints/prompt-converters.js @@ -0,0 +1,103 @@ +/** + * Convert a prompt from the ChatML objects to the format used by Claude. + * @param {object[]} messages Array of messages + * @param {boolean} addHumanPrefix Add Human prefix + * @param {boolean} addAssistantPostfix Add Assistant postfix + * @param {boolean} withSystemPrompt Build system prompt before "\n\nHuman: " + * @returns {string} Prompt for Claude + * @copyright Prompt Conversion script taken from RisuAI by kwaroran (GPLv3). + */ +function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, withSystemPrompt) { + // Claude doesn't support message names, so we'll just add them to the message content. + for (const message of messages) { + if (message.name && message.role !== 'system') { + message.content = message.name + ': ' + message.content; + delete message.name; + } + } + + let systemPrompt = ''; + if (withSystemPrompt) { + let lastSystemIdx = -1; + + for (let i = 0; i < messages.length - 1; i++) { + const message = messages[i]; + if (message.role === 'system' && !message.name) { + systemPrompt += message.content + '\n\n'; + } else { + lastSystemIdx = i - 1; + break; + } + } + if (lastSystemIdx >= 0) { + messages.splice(0, lastSystemIdx + 1); + } + } + + let requestPrompt = messages.map((v) => { + let prefix = ''; + switch (v.role) { + case 'assistant': + prefix = '\n\nAssistant: '; + break; + case 'user': + prefix = '\n\nHuman: '; + break; + case 'system': + // According to the Claude docs, H: and A: should be used for example conversations. + if (v.name === 'example_assistant') { + prefix = '\n\nA: '; + } else if (v.name === 'example_user') { + prefix = '\n\nH: '; + } else { + prefix = '\n\n'; + } + break; + } + return prefix + v.content; + }).join(''); + + if (addHumanPrefix) { + requestPrompt = '\n\nHuman: ' + requestPrompt; + } + + if (addAssistantPostfix) { + requestPrompt = requestPrompt + '\n\nAssistant: '; + } + + if (withSystemPrompt) { + requestPrompt = systemPrompt + requestPrompt; + } + + return requestPrompt; +} + +/** + * Convert a prompt from the ChatML objects to the format used by Text Completion API. + * @param {object[]} messages Array of messages + * @returns {string} Prompt for Text Completion API + */ +function convertTextCompletionPrompt(messages) { + if (typeof messages === 'string') { + return messages; + } + + const messageStrings = []; + messages.forEach(m => { + if (m.role === 'system' && m.name === undefined) { + messageStrings.push('System: ' + m.content); + } + else if (m.role === 'system' && m.name !== undefined) { + messageStrings.push(m.name + ': ' + m.content); + } + else { + messageStrings.push(m.role + ': ' + m.content); + } + }); + return messageStrings.join('\n') + '\nassistant:'; +} + +module.exports = { + convertClaudePrompt, + convertTextCompletionPrompt, +}; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index b3ee6a0b8..38c04f864 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('./backends/chat-completions'); +const { convertClaudePrompt } = require('./prompt-converters'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); From 40e15f576285f424ce5dbcbfcd57ea07d213fdce Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:18:10 +0200 Subject: [PATCH 08/10] Fix conditional access to Palm response body --- src/endpoints/backends/chat-completions.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js index 0b08d2700..af463bd21 100644 --- a/src/endpoints/backends/chat-completions.js +++ b/src/endpoints/backends/chat-completions.js @@ -197,15 +197,15 @@ async function sendPalmRequest(request, response) { } const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates[0]?.output; + const responseText = generateResponseJson?.candidates?.[0]?.output; if (!responseText) { console.log('Palm API returned no response', generateResponseJson); let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; // Check for filters - if (generateResponseJson?.filters[0]?.message) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; + if (generateResponseJson?.filters?.[0]?.reason) { + message = `Palm filter triggered: ${generateResponseJson.filters[0].reason}`; } return response.send({ error: { message } }); From b52487054409eb6bb5dadf3f6afcf4755d1133e6 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 16:56:39 +0200 Subject: [PATCH 09/10] Fix AI21 icon styles --- public/style.css | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/style.css b/public/style.css index 28796454f..741e83189 100644 --- a/public/style.css +++ b/public/style.css @@ -3665,12 +3665,13 @@ a { } .icon-svg { - fill: currentColor; /* Takes on the color of the surrounding text */ + fill: currentColor; width: auto; height: 14px; - vertical-align: middle; + aspect-ratio: 1; /* To align with adjacent text */ + place-self: center; } .paginationjs { From 38a34bf1d5b74f5e5f3633d6483af0b646c304ef Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:14:05 +0200 Subject: [PATCH 10/10] Fix silly argument naming --- src/endpoints/backends/scale-alt.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/endpoints/backends/scale-alt.js b/src/endpoints/backends/scale-alt.js index 240e169b3..edcb7f83f 100644 --- a/src/endpoints/backends/scale-alt.js +++ b/src/endpoints/backends/scale-alt.js @@ -7,8 +7,8 @@ const { readSecret, SECRET_KEYS } = require('../secrets'); const router = express.Router(); -router.post('/generate', jsonParser, function (request, response_generate_scale) { - if (!request.body) return response_generate_scale.sendStatus(400); +router.post('/generate', jsonParser, function (request, response) { + if (!request.body) return response.sendStatus(400); fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { method: 'POST', @@ -61,14 +61,14 @@ router.post('/generate', jsonParser, function (request, response_generate_scale) }, }), }) - .then(response => response.json()) + .then(res => res.json()) .then(data => { console.log(data.result.data.json.outputs[0]); - return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); + return response.send({ output: data.result.data.json.outputs[0] }); }) .catch((error) => { console.error('Error:', error); - return response_generate_scale.send({ error: true }); + return response.send({ error: true }); }); });