diff --git a/public/scripts/openai.js b/public/scripts/openai.js index a9bc5e304..b8cdc818f 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -1382,7 +1382,7 @@ function openRouterGroupByVendor(array) { } async function sendAltScaleRequest(messages, logit_bias, signal, type) { - const generate_url = '/generate_altscale'; + const generate_url = '/api/backends/scale-alt/generate'; let firstSysMsgs = []; for (let msg of messages) { @@ -1556,7 +1556,7 @@ async function sendOpenAIRequest(type, messages, signal) { generate_data['seed'] = oai_settings.seed; } - const generate_url = '/generate_openai'; + const generate_url = '/api/backends/chat-completions/generate'; const response = await fetch(generate_url, { method: 'POST', body: JSON.stringify(generate_data), @@ -1646,7 +1646,7 @@ async function calculateLogitBias() { let result = {}; try { - const reply = await fetch(`/openai_bias?model=${getTokenizerModel()}`, { + const reply = await fetch(`/api/backends/chat-completions/bias?model=${getTokenizerModel()}`, { method: 'POST', headers: getRequestHeaders(), body, @@ -2439,7 +2439,7 @@ async function getStatusOpen() { } try { - const response = await fetch('/getstatus_openai', { + const response = await fetch('/api/backends/chat-completions/status', { method: 'POST', headers: getRequestHeaders(), body: JSON.stringify(data), diff --git a/public/style.css b/public/style.css index 28796454f..741e83189 100644 --- a/public/style.css +++ b/public/style.css @@ -3665,12 +3665,13 @@ a { } .icon-svg { - fill: currentColor; /* Takes on the color of the surrounding text */ + fill: currentColor; width: auto; height: 14px; - vertical-align: middle; + aspect-ratio: 1; /* To align with adjacent text */ + place-self: center; } .paginationjs { diff --git a/server.js b/server.js index 2374df5a2..41e074551 100644 --- a/server.js +++ b/server.js @@ -43,12 +43,11 @@ util.inspect.defaultOptions.maxStringLength = null; const basicAuthMiddleware = require('./src/middleware/basicAuthMiddleware'); const { jsonParser, urlencodedParser } = require('./src/express-common.js'); const contentManager = require('./src/endpoints/content-manager'); -const { readSecret, migrateSecrets, SECRET_KEYS } = require('./src/endpoints/secrets'); +const { migrateSecrets } = require('./src/endpoints/secrets'); const { getVersion, getConfigValue, color, - uuidv4, tryParse, clientRelativePath, removeFileExtension, @@ -58,8 +57,7 @@ const { forwardFetchResponse, } = require('./src/util'); const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); -const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); -const { convertClaudePrompt } = require('./src/chat-completion'); +const { loadTokenizers } = require('./src/endpoints/tokenizers'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -127,11 +125,8 @@ const autorun = (getConfigValue('autorun', false) || cliArguments.autorun) && !c const enableExtensions = getConfigValue('enableExtensions', true); const listen = getConfigValue('listen', false); -const API_OPENAI = 'https://api.openai.com/v1'; -const API_CLAUDE = 'https://api.anthropic.com/v1'; - const SETTINGS_FILE = './public/settings.json'; -const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); +const { DIRECTORIES, UPLOADS_PATH, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); // CORS Settings // const CORS = cors({ @@ -222,7 +217,9 @@ if (!cliArguments.disableCsrf) { if (getConfigValue('enableCorsProxy', false) || cliArguments.corsProxy) { const bodyParser = require('body-parser'); - app.use(bodyParser.json()); + app.use(bodyParser.json({ + limit: '200mb', + })); console.log('Enabling CORS proxy'); app.use('/proxy/:url(*)', async (req, res) => { @@ -625,679 +622,6 @@ function cleanUploads() { } } -/* OpenAI */ -app.post('/getstatus_openai', jsonParser, async function (request, response_getstatus_openai) { - if (!request.body) return response_getstatus_openai.sendStatus(400); - - let api_url; - let api_key_openai; - let headers; - - if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); - headers = {}; - } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); - // OpenRouter needs to pass the referer: https://openrouter.ai/docs - headers = { 'HTTP-Referer': request.headers.referer }; - } - - if (!api_key_openai && !request.body.reverse_proxy) { - console.log('OpenAI API key is missing.'); - return response_getstatus_openai.status(400).send({ error: true }); - } - - try { - const response = await fetch(api_url + '/models', { - method: 'GET', - headers: { - 'Authorization': 'Bearer ' + api_key_openai, - ...headers, - }, - }); - - if (response.ok) { - const data = await response.json(); - response_getstatus_openai.send(data); - - if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { - let models = []; - - data.data.forEach(model => { - const context_length = model.context_length; - const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); - const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); - models[model.id] = { - tokens_per_dollar: tokens_rounded + 'k', - context_length: context_length, - }; - }); - - console.log('Available OpenRouter models:', models); - } else { - const models = data?.data; - - if (Array.isArray(models)) { - const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); - console.log('Available OpenAI models:', modelIds); - } else { - console.log('OpenAI endpoint did not return a list of models.'); - } - } - } - else { - console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.'); - response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); - } - } catch (e) { - console.error(e); - - if (!response_getstatus_openai.headersSent) { - response_getstatus_openai.send({ error: true }); - } else { - response_getstatus_openai.end(); - } - } -}); - -app.post('/openai_bias', jsonParser, async function (request, response) { - if (!request.body || !Array.isArray(request.body)) - return response.sendStatus(400); - - try { - const result = {}; - const model = getTokenizerModel(String(request.query.model || '')); - - // no bias for claude - if (model == 'claude') { - return response.send(result); - } - - let encodeFunction; - - if (sentencepieceTokenizers.includes(model)) { - const tokenizer = getSentencepiceTokenizer(model); - const instance = await tokenizer?.get(); - encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text)); - } else { - const tokenizer = getTiktokenTokenizer(model); - encodeFunction = (tokenizer.encode.bind(tokenizer)); - } - - for (const entry of request.body) { - if (!entry || !entry.text) { - continue; - } - - try { - const tokens = getEntryTokens(entry.text, encodeFunction); - - for (const token of tokens) { - result[token] = entry.value; - } - } catch { - console.warn('Tokenizer failed to encode:', entry.text); - } - } - - // not needed for cached tokenizers - //tokenizer.free(); - return response.send(result); - - /** - * Gets tokenids for a given entry - * @param {string} text Entry text - * @param {(string) => Uint32Array} encode Function to encode text to token ids - * @returns {Uint32Array} Array of token ids - */ - function getEntryTokens(text, encode) { - // Get raw token ids from JSON array - if (text.trim().startsWith('[') && text.trim().endsWith(']')) { - try { - const json = JSON.parse(text); - if (Array.isArray(json) && json.every(x => typeof x === 'number')) { - return new Uint32Array(json); - } - } catch { - // ignore - } - } - - // Otherwise, get token ids from tokenizer - return encode(text); - } - } catch (error) { - console.error(error); - return response.send({}); - } -}); - -function convertChatMLPrompt(messages) { - if (typeof messages === 'string') { - return messages; - } - - const messageStrings = []; - messages.forEach(m => { - if (m.role === 'system' && m.name === undefined) { - messageStrings.push('System: ' + m.content); - } - else if (m.role === 'system' && m.name !== undefined) { - messageStrings.push(m.name + ': ' + m.content); - } - else { - messageStrings.push(m.role + ': ' + m.content); - } - }); - return messageStrings.join('\n') + '\nassistant:'; -} - -async function sendScaleRequest(request, response) { - - const api_url = new URL(request.body.api_url_scale).toString(); - const api_key_scale = readSecret(SECRET_KEYS.SCALE); - - if (!api_key_scale) { - console.log('Scale API key is missing.'); - return response.status(400).send({ error: true }); - } - - const requestPrompt = convertChatMLPrompt(request.body.messages); - console.log('Scale request:', requestPrompt); - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - const generateResponse = await fetch(api_url, { - method: 'POST', - body: JSON.stringify({ input: { input: requestPrompt } }), - headers: { - 'Content-Type': 'application/json', - 'Authorization': `Basic ${api_key_scale}`, - }, - timeout: 0, - }); - - if (!generateResponse.ok) { - console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - console.log('Scale response:', generateResponseJson); - - const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; - return response.send(reply); - } catch (error) { - console.log(error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -app.post('/generate_altscale', jsonParser, function (request, response_generate_scale) { - if (!request.body) return response_generate_scale.sendStatus(400); - - fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`, - }, - body: JSON.stringify({ - json: { - variant: { - name: 'New Variant', - appId: '', - taxonomy: null, - }, - prompt: { - id: '', - template: '{{input}}\n', - exampleVariables: {}, - variablesSourceDataId: null, - systemMessage: request.body.sysprompt, - }, - modelParameters: { - id: '', - modelId: 'GPT4', - modelType: 'OpenAi', - maxTokens: request.body.max_tokens, - temperature: request.body.temp, - stop: 'user:', - suffix: null, - topP: request.body.top_p, - logprobs: null, - logitBias: request.body.logit_bias, - }, - inputs: [ - { - index: '-1', - valueByName: { - input: request.body.prompt, - }, - }, - ], - }, - meta: { - values: { - 'variant.taxonomy': ['undefined'], - 'prompt.variablesSourceDataId': ['undefined'], - 'modelParameters.suffix': ['undefined'], - 'modelParameters.logprobs': ['undefined'], - }, - }, - }), - }) - .then(response => response.json()) - .then(data => { - console.log(data.result.data.json.outputs[0]); - return response_generate_scale.send({ output: data.result.data.json.outputs[0] }); - }) - .catch((error) => { - console.error('Error:', error); - return response_generate_scale.send({ error: true }); - }); - -}); - -/** - * @param {express.Request} request - * @param {express.Response} response - */ -async function sendClaudeRequest(request, response) { - - const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); - const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); - - if (!api_key_claude) { - console.log('Claude API key is missing.'); - return response.status(400).send({ error: true }); - } - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1'; - let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt); - - if (request.body.assistant_prefill && !request.body.exclude_assistant) { - requestPrompt += request.body.assistant_prefill; - } - - console.log('Claude request:', requestPrompt); - const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:']; - - // Add custom stop sequences - if (Array.isArray(request.body.stop)) { - stop_sequences.push(...request.body.stop); - } - - const generateResponse = await fetch(api_url + '/complete', { - method: 'POST', - signal: controller.signal, - body: JSON.stringify({ - prompt: requestPrompt, - model: request.body.model, - max_tokens_to_sample: request.body.max_tokens, - stop_sequences: stop_sequences, - temperature: request.body.temperature, - top_p: request.body.top_p, - top_k: request.body.top_k, - stream: request.body.stream, - }), - headers: { - 'Content-Type': 'application/json', - 'anthropic-version': '2023-06-01', - 'x-api-key': api_key_claude, - }, - timeout: 0, - }); - - if (request.body.stream) { - // Pipe remote SSE stream to Express response - forwardFetchResponse(generateResponse, response); - } else { - if (!generateResponse.ok) { - console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson.completion; - console.log('Claude response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); - } - } catch (error) { - console.log('Error communicating with Claude: ', error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -/** - * @param {express.Request} request - * @param {express.Response} response - */ -async function sendPalmRequest(request, response) { - const api_key_palm = readSecret(SECRET_KEYS.PALM); - - if (!api_key_palm) { - console.log('Palm API key is missing.'); - return response.status(400).send({ error: true }); - } - - const body = { - prompt: { - text: request.body.messages, - }, - stopSequences: request.body.stop, - safetySettings: PALM_SAFETY, - temperature: request.body.temperature, - topP: request.body.top_p, - topK: request.body.top_k || undefined, - maxOutputTokens: request.body.max_tokens, - candidate_count: 1, - }; - - console.log('Palm request:', body); - - try { - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { - body: JSON.stringify(body), - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - signal: controller.signal, - timeout: 0, - }); - - if (!generateResponse.ok) { - console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); - return response.status(generateResponse.status).send({ error: true }); - } - - const generateResponseJson = await generateResponse.json(); - const responseText = generateResponseJson?.candidates[0]?.output; - - if (!responseText) { - console.log('Palm API returned no response', generateResponseJson); - let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; - - // Check for filters - if (generateResponseJson?.filters[0]?.message) { - message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`; - } - - return response.send({ error: { message } }); - } - - console.log('Palm response:', responseText); - - // Wrap it back to OAI format - const reply = { choices: [{ 'message': { 'content': responseText } }] }; - return response.send(reply); - } catch (error) { - console.log('Error communicating with Palm API: ', error); - if (!response.headersSent) { - return response.status(500).send({ error: true }); - } - } -} - -app.post('/generate_openai', jsonParser, function (request, response_generate_openai) { - if (!request.body) return response_generate_openai.status(400).send({ error: true }); - - switch (request.body.chat_completion_source) { - case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai); - case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai); - } - - let api_url; - let api_key_openai; - let headers; - let bodyParams; - - if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { - api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); - api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); - headers = {}; - bodyParams = {}; - - if (getConfigValue('openai.randomizeUserId', false)) { - bodyParams['user'] = uuidv4(); - } - } else { - api_url = 'https://openrouter.ai/api/v1'; - api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); - // OpenRouter needs to pass the referer: https://openrouter.ai/docs - headers = { 'HTTP-Referer': request.headers.referer }; - bodyParams = { 'transforms': ['middle-out'] }; - - if (request.body.use_fallback) { - bodyParams['route'] = 'fallback'; - } - } - - if (!api_key_openai && !request.body.reverse_proxy) { - console.log('OpenAI API key is missing.'); - return response_generate_openai.status(400).send({ error: true }); - } - - // Add custom stop sequences - if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { - bodyParams['stop'] = request.body.stop; - } - - const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; - const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : ''; - const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? - `${api_url}/completions` : - `${api_url}/chat/completions`; - - const controller = new AbortController(); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - - /** @type {import('node-fetch').RequestInit} */ - const config = { - method: 'post', - headers: { - 'Content-Type': 'application/json', - 'Authorization': 'Bearer ' + api_key_openai, - ...headers, - }, - body: JSON.stringify({ - 'messages': isTextCompletion === false ? request.body.messages : undefined, - 'prompt': isTextCompletion === true ? textPrompt : undefined, - 'model': request.body.model, - 'temperature': request.body.temperature, - 'max_tokens': request.body.max_tokens, - 'stream': request.body.stream, - 'presence_penalty': request.body.presence_penalty, - 'frequency_penalty': request.body.frequency_penalty, - 'top_p': request.body.top_p, - 'top_k': request.body.top_k, - 'stop': isTextCompletion === false ? request.body.stop : undefined, - 'logit_bias': request.body.logit_bias, - 'seed': request.body.seed, - ...bodyParams, - }), - signal: controller.signal, - timeout: 0, - }; - - console.log(JSON.parse(String(config.body))); - - makeRequest(config, response_generate_openai, request); - - /** - * - * @param {*} config - * @param {express.Response} response_generate_openai - * @param {express.Request} request - * @param {Number} retries - * @param {Number} timeout - */ - async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) { - try { - const fetchResponse = await fetch(endpointUrl, config); - - if (request.body.stream) { - console.log('Streaming request in progress'); - forwardFetchResponse(fetchResponse, response_generate_openai); - return; - } - - if (fetchResponse.ok) { - let json = await fetchResponse.json(); - response_generate_openai.send(json); - console.log(json); - console.log(json?.choices[0]?.message); - } else if (fetchResponse.status === 429 && retries > 0) { - console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); - setTimeout(() => { - timeout *= 2; - makeRequest(config, response_generate_openai, request, retries - 1, timeout); - }, timeout); - } else { - await handleErrorResponse(fetchResponse); - } - } catch (error) { - console.log('Generation failed', error); - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: true }); - } else { - response_generate_openai.end(); - } - } - } - - async function handleErrorResponse(response) { - const responseText = await response.text(); - const errorData = tryParse(responseText); - - const statusMessages = { - 400: 'Bad request', - 401: 'Unauthorized', - 402: 'Credit limit reached', - 403: 'Forbidden', - 404: 'Not found', - 429: 'Too many requests', - 451: 'Unavailable for legal reasons', - 502: 'Bad gateway', - }; - - const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred'; - const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota'; - console.log(message); - - if (!response_generate_openai.headersSent) { - response_generate_openai.send({ error: { message }, quota_error: quota_error }); - } else if (!response_generate_openai.writableEnded) { - response_generate_openai.write(response); - } else { - response_generate_openai.end(); - } - } -}); - -async function sendAI21Request(request, response) { - if (!request.body) return response.sendStatus(400); - const controller = new AbortController(); - console.log(request.body.messages); - request.socket.removeAllListeners('close'); - request.socket.on('close', function () { - controller.abort(); - }); - const options = { - method: 'POST', - headers: { - accept: 'application/json', - 'content-type': 'application/json', - Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`, - }, - body: JSON.stringify({ - numResults: 1, - maxTokens: request.body.max_tokens, - minTokens: 0, - temperature: request.body.temperature, - topP: request.body.top_p, - stopSequences: request.body.stop_tokens, - topKReturn: request.body.top_k, - frequencyPenalty: { - scale: request.body.frequency_penalty * 100, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - presencePenalty: { - scale: request.body.presence_penalty, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - countPenalty: { - scale: request.body.count_pen, - applyToWhitespaces: false, - applyToPunctuations: false, - applyToNumbers: false, - applyToStopwords: false, - applyToEmojis: false, - }, - prompt: request.body.messages, - }), - signal: controller.signal, - }; - - fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) - .then(r => r.json()) - .then(r => { - if (r.completions === undefined) { - console.log(r); - } else { - console.log(r.completions[0].data.text); - } - const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] }; - return response.send(reply); - }) - .catch(err => { - console.error(err); - return response.send({ error: true }); - }); - -} - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. @@ -1443,6 +767,12 @@ app.use('/api/backends/text-completions', require('./src/endpoints/backends/text // KoboldAI app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').router); +// OpenAI chat completions +app.use('/api/backends/chat-completions', require('./src/endpoints/backends/chat-completions').router); + +// Scale (alt method) +app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/endpoints/backends/chat-completions.js b/src/endpoints/backends/chat-completions.js new file mode 100644 index 000000000..af463bd21 --- /dev/null +++ b/src/endpoints/backends/chat-completions.js @@ -0,0 +1,619 @@ +const express = require('express'); +const fetch = require('node-fetch').default; + +const { jsonParser } = require('../../express-common'); +const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants'); +const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util'); +const { convertClaudePrompt, convertTextCompletionPrompt } = require('../prompt-converters'); + +const { readSecret, SECRET_KEYS } = require('../secrets'); +const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers'); + +const API_OPENAI = 'https://api.openai.com/v1'; +const API_CLAUDE = 'https://api.anthropic.com/v1'; + +/** + * Sends a request to Claude API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response + */ +async function sendClaudeRequest(request, response) { + const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); + const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE); + + if (!apiKey) { + console.log('Claude API key is missing.'); + return response.status(400).send({ error: true }); + } + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1'; + let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt); + + if (request.body.assistant_prefill && !request.body.exclude_assistant) { + requestPrompt += request.body.assistant_prefill; + } + + console.log('Claude request:', requestPrompt); + const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:']; + + // Add custom stop sequences + if (Array.isArray(request.body.stop)) { + stop_sequences.push(...request.body.stop); + } + + const generateResponse = await fetch(apiUrl + '/complete', { + method: 'POST', + signal: controller.signal, + body: JSON.stringify({ + prompt: requestPrompt, + model: request.body.model, + max_tokens_to_sample: request.body.max_tokens, + stop_sequences: stop_sequences, + temperature: request.body.temperature, + top_p: request.body.top_p, + top_k: request.body.top_k, + stream: request.body.stream, + }), + headers: { + 'Content-Type': 'application/json', + 'anthropic-version': '2023-06-01', + 'x-api-key': apiKey, + }, + timeout: 0, + }); + + if (request.body.stream) { + // Pipe remote SSE stream to Express response + forwardFetchResponse(generateResponse, response); + } else { + if (!generateResponse.ok) { + console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson.completion; + console.log('Claude response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); + } + } catch (error) { + console.log('Error communicating with Claude: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +/** + * Sends a request to Scale Spellbook API. + * @param {import("express").Request} request Express request + * @param {import("express").Response} response Express response + */ +async function sendScaleRequest(request, response) { + const apiUrl = new URL(request.body.api_url_scale).toString(); + const apiKey = readSecret(SECRET_KEYS.SCALE); + + if (!apiKey) { + console.log('Scale API key is missing.'); + return response.status(400).send({ error: true }); + } + + const requestPrompt = convertTextCompletionPrompt(request.body.messages); + console.log('Scale request:', requestPrompt); + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + const generateResponse = await fetch(apiUrl, { + method: 'POST', + body: JSON.stringify({ input: { input: requestPrompt } }), + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Basic ${apiKey}`, + }, + timeout: 0, + }); + + if (!generateResponse.ok) { + console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + console.log('Scale response:', generateResponseJson); + + const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; + return response.send(reply); + } catch (error) { + console.log(error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +/** + * Sends a request to Google AI API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response + */ +async function sendPalmRequest(request, response) { + const api_key_palm = readSecret(SECRET_KEYS.PALM); + + if (!api_key_palm) { + console.log('Palm API key is missing.'); + return response.status(400).send({ error: true }); + } + + const body = { + prompt: { + text: request.body.messages, + }, + stopSequences: request.body.stop, + safetySettings: PALM_SAFETY, + temperature: request.body.temperature, + topP: request.body.top_p, + topK: request.body.top_k || undefined, + maxOutputTokens: request.body.max_tokens, + candidate_count: 1, + }; + + console.log('Palm request:', body); + + try { + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, { + body: JSON.stringify(body), + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + signal: controller.signal, + timeout: 0, + }); + + if (!generateResponse.ok) { + console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); + return response.status(generateResponse.status).send({ error: true }); + } + + const generateResponseJson = await generateResponse.json(); + const responseText = generateResponseJson?.candidates?.[0]?.output; + + if (!responseText) { + console.log('Palm API returned no response', generateResponseJson); + let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`; + + // Check for filters + if (generateResponseJson?.filters?.[0]?.reason) { + message = `Palm filter triggered: ${generateResponseJson.filters[0].reason}`; + } + + return response.send({ error: { message } }); + } + + console.log('Palm response:', responseText); + + // Wrap it back to OAI format + const reply = { choices: [{ 'message': { 'content': responseText } }] }; + return response.send(reply); + } catch (error) { + console.log('Error communicating with Palm API: ', error); + if (!response.headersSent) { + return response.status(500).send({ error: true }); + } + } +} + +/** + * Sends a request to Google AI API. + * @param {express.Request} request Express request + * @param {express.Response} response Express response + */ +async function sendAI21Request(request, response) { + if (!request.body) return response.sendStatus(400); + const controller = new AbortController(); + console.log(request.body.messages); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + const options = { + method: 'POST', + headers: { + accept: 'application/json', + 'content-type': 'application/json', + Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`, + }, + body: JSON.stringify({ + numResults: 1, + maxTokens: request.body.max_tokens, + minTokens: 0, + temperature: request.body.temperature, + topP: request.body.top_p, + stopSequences: request.body.stop_tokens, + topKReturn: request.body.top_k, + frequencyPenalty: { + scale: request.body.frequency_penalty * 100, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + presencePenalty: { + scale: request.body.presence_penalty, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + countPenalty: { + scale: request.body.count_pen, + applyToWhitespaces: false, + applyToPunctuations: false, + applyToNumbers: false, + applyToStopwords: false, + applyToEmojis: false, + }, + prompt: request.body.messages, + }), + signal: controller.signal, + }; + + fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options) + .then(r => r.json()) + .then(r => { + if (r.completions === undefined) { + console.log(r); + } else { + console.log(r.completions[0].data.text); + } + const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] }; + return response.send(reply); + }) + .catch(err => { + console.error(err); + return response.send({ error: true }); + }); + +} + +const router = express.Router(); + +router.post('/status', jsonParser, async function (request, response_getstatus_openai) { + if (!request.body) return response_getstatus_openai.sendStatus(400); + + let api_url; + let api_key_openai; + let headers; + + if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { + api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + headers = {}; + } else { + api_url = 'https://openrouter.ai/api/v1'; + api_key_openai = readSecret(SECRET_KEYS.OPENROUTER); + // OpenRouter needs to pass the referer: https://openrouter.ai/docs + headers = { 'HTTP-Referer': request.headers.referer }; + } + + if (!api_key_openai && !request.body.reverse_proxy) { + console.log('OpenAI API key is missing.'); + return response_getstatus_openai.status(400).send({ error: true }); + } + + try { + const response = await fetch(api_url + '/models', { + method: 'GET', + headers: { + 'Authorization': 'Bearer ' + api_key_openai, + ...headers, + }, + }); + + if (response.ok) { + const data = await response.json(); + response_getstatus_openai.send(data); + + if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { + let models = []; + + data.data.forEach(model => { + const context_length = model.context_length; + const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); + const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); + models[model.id] = { + tokens_per_dollar: tokens_rounded + 'k', + context_length: context_length, + }; + }); + + console.log('Available OpenRouter models:', models); + } else { + const models = data?.data; + + if (Array.isArray(models)) { + const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); + console.log('Available OpenAI models:', modelIds); + } else { + console.log('OpenAI endpoint did not return a list of models.'); + } + } + } + else { + console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.'); + response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); + } + } catch (e) { + console.error(e); + + if (!response_getstatus_openai.headersSent) { + response_getstatus_openai.send({ error: true }); + } else { + response_getstatus_openai.end(); + } + } +}); + +router.post('/bias', jsonParser, async function (request, response) { + if (!request.body || !Array.isArray(request.body)) + return response.sendStatus(400); + + try { + const result = {}; + const model = getTokenizerModel(String(request.query.model || '')); + + // no bias for claude + if (model == 'claude') { + return response.send(result); + } + + let encodeFunction; + + if (sentencepieceTokenizers.includes(model)) { + const tokenizer = getSentencepiceTokenizer(model); + const instance = await tokenizer?.get(); + encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text)); + } else { + const tokenizer = getTiktokenTokenizer(model); + encodeFunction = (tokenizer.encode.bind(tokenizer)); + } + + for (const entry of request.body) { + if (!entry || !entry.text) { + continue; + } + + try { + const tokens = getEntryTokens(entry.text, encodeFunction); + + for (const token of tokens) { + result[token] = entry.value; + } + } catch { + console.warn('Tokenizer failed to encode:', entry.text); + } + } + + // not needed for cached tokenizers + //tokenizer.free(); + return response.send(result); + + /** + * Gets tokenids for a given entry + * @param {string} text Entry text + * @param {(string) => Uint32Array} encode Function to encode text to token ids + * @returns {Uint32Array} Array of token ids + */ + function getEntryTokens(text, encode) { + // Get raw token ids from JSON array + if (text.trim().startsWith('[') && text.trim().endsWith(']')) { + try { + const json = JSON.parse(text); + if (Array.isArray(json) && json.every(x => typeof x === 'number')) { + return new Uint32Array(json); + } + } catch { + // ignore + } + } + + // Otherwise, get token ids from tokenizer + return encode(text); + } + } catch (error) { + console.error(error); + return response.send({}); + } +}); + + +router.post('/generate', jsonParser, function (request, response) { + if (!request.body) return response.status(400).send({ error: true }); + + switch (request.body.chat_completion_source) { + case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response); + case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response); + case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response); + case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response); + } + + let apiUrl; + let apiKey; + let headers; + let bodyParams; + + if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) { + apiUrl = new URL(request.body.reverse_proxy || API_OPENAI).toString(); + apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI); + headers = {}; + bodyParams = {}; + + if (getConfigValue('openai.randomizeUserId', false)) { + bodyParams['user'] = uuidv4(); + } + } else { + apiUrl = 'https://openrouter.ai/api/v1'; + apiKey = readSecret(SECRET_KEYS.OPENROUTER); + // OpenRouter needs to pass the referer: https://openrouter.ai/docs + headers = { 'HTTP-Referer': request.headers.referer }; + bodyParams = { 'transforms': ['middle-out'] }; + + if (request.body.use_fallback) { + bodyParams['route'] = 'fallback'; + } + } + + if (!apiKey && !request.body.reverse_proxy) { + console.log('OpenAI API key is missing.'); + return response.status(400).send({ error: true }); + } + + // Add custom stop sequences + if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { + bodyParams['stop'] = request.body.stop; + } + + const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; + const textPrompt = isTextCompletion ? convertTextCompletionPrompt(request.body.messages) : ''; + const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? + `${apiUrl}/completions` : + `${apiUrl}/chat/completions`; + + const controller = new AbortController(); + request.socket.removeAllListeners('close'); + request.socket.on('close', function () { + controller.abort(); + }); + + /** @type {import('node-fetch').RequestInit} */ + const config = { + method: 'post', + headers: { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + apiKey, + ...headers, + }, + body: JSON.stringify({ + 'messages': isTextCompletion === false ? request.body.messages : undefined, + 'prompt': isTextCompletion === true ? textPrompt : undefined, + 'model': request.body.model, + 'temperature': request.body.temperature, + 'max_tokens': request.body.max_tokens, + 'stream': request.body.stream, + 'presence_penalty': request.body.presence_penalty, + 'frequency_penalty': request.body.frequency_penalty, + 'top_p': request.body.top_p, + 'top_k': request.body.top_k, + 'stop': isTextCompletion === false ? request.body.stop : undefined, + 'logit_bias': request.body.logit_bias, + 'seed': request.body.seed, + ...bodyParams, + }), + signal: controller.signal, + timeout: 0, + }; + + console.log(JSON.parse(String(config.body))); + + makeRequest(config, response, request); + + /** + * Makes a fetch request to the OpenAI API endpoint. + * @param {import('node-fetch').RequestInit} config Fetch config + * @param {express.Response} response Express response + * @param {express.Request} request Express request + * @param {Number} retries Number of retries left + * @param {Number} timeout Request timeout in ms + */ + async function makeRequest(config, response, request, retries = 5, timeout = 5000) { + try { + const fetchResponse = await fetch(endpointUrl, config); + + if (request.body.stream) { + console.log('Streaming request in progress'); + forwardFetchResponse(fetchResponse, response); + return; + } + + if (fetchResponse.ok) { + let json = await fetchResponse.json(); + response.send(json); + console.log(json); + console.log(json?.choices[0]?.message); + } else if (fetchResponse.status === 429 && retries > 0) { + console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); + setTimeout(() => { + timeout *= 2; + makeRequest(config, response, request, retries - 1, timeout); + }, timeout); + } else { + await handleErrorResponse(fetchResponse); + } + } catch (error) { + console.log('Generation failed', error); + if (!response.headersSent) { + response.send({ error: true }); + } else { + response.end(); + } + } + } + + /** + * @param {import("node-fetch").Response} errorResponse + */ + async function handleErrorResponse(errorResponse) { + const responseText = await errorResponse.text(); + const errorData = tryParse(responseText); + + const statusMessages = { + 400: 'Bad request', + 401: 'Unauthorized', + 402: 'Credit limit reached', + 403: 'Forbidden', + 404: 'Not found', + 429: 'Too many requests', + 451: 'Unavailable for legal reasons', + 502: 'Bad gateway', + }; + + const message = errorData?.error?.message || statusMessages[errorResponse.status] || 'Unknown error occurred'; + const quota_error = errorResponse.status === 429 && errorData?.error?.type === 'insufficient_quota'; + console.log(message); + + if (!response.headersSent) { + response.send({ error: { message }, quota_error: quota_error }); + } else if (!response.writableEnded) { + response.write(errorResponse); + } else { + response.end(); + } + } +}); + +module.exports = { + router, +}; diff --git a/src/endpoints/backends/scale-alt.js b/src/endpoints/backends/scale-alt.js new file mode 100644 index 000000000..edcb7f83f --- /dev/null +++ b/src/endpoints/backends/scale-alt.js @@ -0,0 +1,76 @@ +const express = require('express'); +const fetch = require('node-fetch').default; + +const { jsonParser } = require('../../express-common'); + +const { readSecret, SECRET_KEYS } = require('../secrets'); + +const router = express.Router(); + +router.post('/generate', jsonParser, function (request, response) { + if (!request.body) return response.sendStatus(400); + + fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`, + }, + body: JSON.stringify({ + json: { + variant: { + name: 'New Variant', + appId: '', + taxonomy: null, + }, + prompt: { + id: '', + template: '{{input}}\n', + exampleVariables: {}, + variablesSourceDataId: null, + systemMessage: request.body.sysprompt, + }, + modelParameters: { + id: '', + modelId: 'GPT4', + modelType: 'OpenAi', + maxTokens: request.body.max_tokens, + temperature: request.body.temp, + stop: 'user:', + suffix: null, + topP: request.body.top_p, + logprobs: null, + logitBias: request.body.logit_bias, + }, + inputs: [ + { + index: '-1', + valueByName: { + input: request.body.prompt, + }, + }, + ], + }, + meta: { + values: { + 'variant.taxonomy': ['undefined'], + 'prompt.variablesSourceDataId': ['undefined'], + 'modelParameters.suffix': ['undefined'], + 'modelParameters.logprobs': ['undefined'], + }, + }, + }), + }) + .then(res => res.json()) + .then(data => { + console.log(data.result.data.json.outputs[0]); + return response.send({ output: data.result.data.json.outputs[0] }); + }) + .catch((error) => { + console.error('Error:', error); + return response.send({ error: true }); + }); + +}); + +module.exports = { router }; diff --git a/src/chat-completion.js b/src/endpoints/prompt-converters.js similarity index 75% rename from src/chat-completion.js rename to src/endpoints/prompt-converters.js index 4fc21a550..4ffdb459e 100644 --- a/src/chat-completion.js +++ b/src/endpoints/prompt-converters.js @@ -72,6 +72,32 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with return requestPrompt; } +/** + * Convert a prompt from the ChatML objects to the format used by Text Completion API. + * @param {object[]} messages Array of messages + * @returns {string} Prompt for Text Completion API + */ +function convertTextCompletionPrompt(messages) { + if (typeof messages === 'string') { + return messages; + } + + const messageStrings = []; + messages.forEach(m => { + if (m.role === 'system' && m.name === undefined) { + messageStrings.push('System: ' + m.content); + } + else if (m.role === 'system' && m.name !== undefined) { + messageStrings.push(m.name + ': ' + m.content); + } + else { + messageStrings.push(m.role + ': ' + m.content); + } + }); + return messageStrings.join('\n') + '\nassistant:'; +} + module.exports = { convertClaudePrompt, + convertTextCompletionPrompt, }; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index a81779d97..38c04f864 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -4,7 +4,7 @@ const express = require('express'); const { SentencePieceProcessor } = require('@agnai/sentencepiece-js'); const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); -const { convertClaudePrompt } = require('../chat-completion'); +const { convertClaudePrompt } = require('./prompt-converters'); const { readSecret, SECRET_KEYS } = require('./secrets'); const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common');