Merge branch 'staging' into cleanup-sampler-order

This commit is contained in:
Cohee 2023-12-14 18:32:30 +02:00
commit 68bb616be3
7 changed files with 741 additions and 689 deletions

View File

@ -1382,7 +1382,7 @@ function openRouterGroupByVendor(array) {
}
async function sendAltScaleRequest(messages, logit_bias, signal, type) {
const generate_url = '/generate_altscale';
const generate_url = '/api/backends/scale-alt/generate';
let firstSysMsgs = [];
for (let msg of messages) {
@ -1556,7 +1556,7 @@ async function sendOpenAIRequest(type, messages, signal) {
generate_data['seed'] = oai_settings.seed;
}
const generate_url = '/generate_openai';
const generate_url = '/api/backends/chat-completions/generate';
const response = await fetch(generate_url, {
method: 'POST',
body: JSON.stringify(generate_data),
@ -1646,7 +1646,7 @@ async function calculateLogitBias() {
let result = {};
try {
const reply = await fetch(`/openai_bias?model=${getTokenizerModel()}`, {
const reply = await fetch(`/api/backends/chat-completions/bias?model=${getTokenizerModel()}`, {
method: 'POST',
headers: getRequestHeaders(),
body,
@ -2439,7 +2439,7 @@ async function getStatusOpen() {
}
try {
const response = await fetch('/getstatus_openai', {
const response = await fetch('/api/backends/chat-completions/status', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify(data),

View File

@ -3665,12 +3665,13 @@ a {
}
.icon-svg {
fill: currentColor;
/* Takes on the color of the surrounding text */
fill: currentColor;
width: auto;
height: 14px;
vertical-align: middle;
aspect-ratio: 1;
/* To align with adjacent text */
place-self: center;
}
.paginationjs {

694
server.js
View File

@ -43,12 +43,11 @@ util.inspect.defaultOptions.maxStringLength = null;
const basicAuthMiddleware = require('./src/middleware/basicAuthMiddleware');
const { jsonParser, urlencodedParser } = require('./src/express-common.js');
const contentManager = require('./src/endpoints/content-manager');
const { readSecret, migrateSecrets, SECRET_KEYS } = require('./src/endpoints/secrets');
const { migrateSecrets } = require('./src/endpoints/secrets');
const {
getVersion,
getConfigValue,
color,
uuidv4,
tryParse,
clientRelativePath,
removeFileExtension,
@ -58,8 +57,7 @@ const {
forwardFetchResponse,
} = require('./src/util');
const { ensureThumbnailCache } = require('./src/endpoints/thumbnails');
const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers');
const { convertClaudePrompt } = require('./src/chat-completion');
const { loadTokenizers } = require('./src/endpoints/tokenizers');
// Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0.
// https://github.com/nodejs/node/issues/47822#issuecomment-1564708870
@ -127,11 +125,8 @@ const autorun = (getConfigValue('autorun', false) || cliArguments.autorun) && !c
const enableExtensions = getConfigValue('enableExtensions', true);
const listen = getConfigValue('listen', false);
const API_OPENAI = 'https://api.openai.com/v1';
const API_CLAUDE = 'https://api.anthropic.com/v1';
const SETTINGS_FILE = './public/settings.json';
const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants');
const { DIRECTORIES, UPLOADS_PATH, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants');
// CORS Settings //
const CORS = cors({
@ -222,7 +217,9 @@ if (!cliArguments.disableCsrf) {
if (getConfigValue('enableCorsProxy', false) || cliArguments.corsProxy) {
const bodyParser = require('body-parser');
app.use(bodyParser.json());
app.use(bodyParser.json({
limit: '200mb',
}));
console.log('Enabling CORS proxy');
app.use('/proxy/:url(*)', async (req, res) => {
@ -625,679 +622,6 @@ function cleanUploads() {
}
}
/* OpenAI */
app.post('/getstatus_openai', jsonParser, async function (request, response_getstatus_openai) {
if (!request.body) return response_getstatus_openai.sendStatus(400);
let api_url;
let api_key_openai;
let headers;
if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) {
api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString();
api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI);
headers = {};
} else {
api_url = 'https://openrouter.ai/api/v1';
api_key_openai = readSecret(SECRET_KEYS.OPENROUTER);
// OpenRouter needs to pass the referer: https://openrouter.ai/docs
headers = { 'HTTP-Referer': request.headers.referer };
}
if (!api_key_openai && !request.body.reverse_proxy) {
console.log('OpenAI API key is missing.');
return response_getstatus_openai.status(400).send({ error: true });
}
try {
const response = await fetch(api_url + '/models', {
method: 'GET',
headers: {
'Authorization': 'Bearer ' + api_key_openai,
...headers,
},
});
if (response.ok) {
const data = await response.json();
response_getstatus_openai.send(data);
if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) {
let models = [];
data.data.forEach(model => {
const context_length = model.context_length;
const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt));
const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0);
models[model.id] = {
tokens_per_dollar: tokens_rounded + 'k',
context_length: context_length,
};
});
console.log('Available OpenRouter models:', models);
} else {
const models = data?.data;
if (Array.isArray(models)) {
const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort();
console.log('Available OpenAI models:', modelIds);
} else {
console.log('OpenAI endpoint did not return a list of models.');
}
}
}
else {
console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.');
response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } });
}
} catch (e) {
console.error(e);
if (!response_getstatus_openai.headersSent) {
response_getstatus_openai.send({ error: true });
} else {
response_getstatus_openai.end();
}
}
});
app.post('/openai_bias', jsonParser, async function (request, response) {
if (!request.body || !Array.isArray(request.body))
return response.sendStatus(400);
try {
const result = {};
const model = getTokenizerModel(String(request.query.model || ''));
// no bias for claude
if (model == 'claude') {
return response.send(result);
}
let encodeFunction;
if (sentencepieceTokenizers.includes(model)) {
const tokenizer = getSentencepiceTokenizer(model);
const instance = await tokenizer?.get();
encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text));
} else {
const tokenizer = getTiktokenTokenizer(model);
encodeFunction = (tokenizer.encode.bind(tokenizer));
}
for (const entry of request.body) {
if (!entry || !entry.text) {
continue;
}
try {
const tokens = getEntryTokens(entry.text, encodeFunction);
for (const token of tokens) {
result[token] = entry.value;
}
} catch {
console.warn('Tokenizer failed to encode:', entry.text);
}
}
// not needed for cached tokenizers
//tokenizer.free();
return response.send(result);
/**
* Gets tokenids for a given entry
* @param {string} text Entry text
* @param {(string) => Uint32Array} encode Function to encode text to token ids
* @returns {Uint32Array} Array of token ids
*/
function getEntryTokens(text, encode) {
// Get raw token ids from JSON array
if (text.trim().startsWith('[') && text.trim().endsWith(']')) {
try {
const json = JSON.parse(text);
if (Array.isArray(json) && json.every(x => typeof x === 'number')) {
return new Uint32Array(json);
}
} catch {
// ignore
}
}
// Otherwise, get token ids from tokenizer
return encode(text);
}
} catch (error) {
console.error(error);
return response.send({});
}
});
function convertChatMLPrompt(messages) {
if (typeof messages === 'string') {
return messages;
}
const messageStrings = [];
messages.forEach(m => {
if (m.role === 'system' && m.name === undefined) {
messageStrings.push('System: ' + m.content);
}
else if (m.role === 'system' && m.name !== undefined) {
messageStrings.push(m.name + ': ' + m.content);
}
else {
messageStrings.push(m.role + ': ' + m.content);
}
});
return messageStrings.join('\n') + '\nassistant:';
}
async function sendScaleRequest(request, response) {
const api_url = new URL(request.body.api_url_scale).toString();
const api_key_scale = readSecret(SECRET_KEYS.SCALE);
if (!api_key_scale) {
console.log('Scale API key is missing.');
return response.status(400).send({ error: true });
}
const requestPrompt = convertChatMLPrompt(request.body.messages);
console.log('Scale request:', requestPrompt);
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
const generateResponse = await fetch(api_url, {
method: 'POST',
body: JSON.stringify({ input: { input: requestPrompt } }),
headers: {
'Content-Type': 'application/json',
'Authorization': `Basic ${api_key_scale}`,
},
timeout: 0,
});
if (!generateResponse.ok) {
console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
console.log('Scale response:', generateResponseJson);
const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] };
return response.send(reply);
} catch (error) {
console.log(error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
}
app.post('/generate_altscale', jsonParser, function (request, response_generate_scale) {
if (!request.body) return response_generate_scale.sendStatus(400);
fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`,
},
body: JSON.stringify({
json: {
variant: {
name: 'New Variant',
appId: '',
taxonomy: null,
},
prompt: {
id: '',
template: '{{input}}\n',
exampleVariables: {},
variablesSourceDataId: null,
systemMessage: request.body.sysprompt,
},
modelParameters: {
id: '',
modelId: 'GPT4',
modelType: 'OpenAi',
maxTokens: request.body.max_tokens,
temperature: request.body.temp,
stop: 'user:',
suffix: null,
topP: request.body.top_p,
logprobs: null,
logitBias: request.body.logit_bias,
},
inputs: [
{
index: '-1',
valueByName: {
input: request.body.prompt,
},
},
],
},
meta: {
values: {
'variant.taxonomy': ['undefined'],
'prompt.variablesSourceDataId': ['undefined'],
'modelParameters.suffix': ['undefined'],
'modelParameters.logprobs': ['undefined'],
},
},
}),
})
.then(response => response.json())
.then(data => {
console.log(data.result.data.json.outputs[0]);
return response_generate_scale.send({ output: data.result.data.json.outputs[0] });
})
.catch((error) => {
console.error('Error:', error);
return response_generate_scale.send({ error: true });
});
});
/**
* @param {express.Request} request
* @param {express.Response} response
*/
async function sendClaudeRequest(request, response) {
const api_url = new URL(request.body.reverse_proxy || API_CLAUDE).toString();
const api_key_claude = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE);
if (!api_key_claude) {
console.log('Claude API key is missing.');
return response.status(400).send({ error: true });
}
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1';
let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt);
if (request.body.assistant_prefill && !request.body.exclude_assistant) {
requestPrompt += request.body.assistant_prefill;
}
console.log('Claude request:', requestPrompt);
const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
// Add custom stop sequences
if (Array.isArray(request.body.stop)) {
stop_sequences.push(...request.body.stop);
}
const generateResponse = await fetch(api_url + '/complete', {
method: 'POST',
signal: controller.signal,
body: JSON.stringify({
prompt: requestPrompt,
model: request.body.model,
max_tokens_to_sample: request.body.max_tokens,
stop_sequences: stop_sequences,
temperature: request.body.temperature,
top_p: request.body.top_p,
top_k: request.body.top_k,
stream: request.body.stream,
}),
headers: {
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'x-api-key': api_key_claude,
},
timeout: 0,
});
if (request.body.stream) {
// Pipe remote SSE stream to Express response
forwardFetchResponse(generateResponse, response);
} else {
if (!generateResponse.ok) {
console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
const responseText = generateResponseJson.completion;
console.log('Claude response:', responseText);
// Wrap it back to OAI format
const reply = { choices: [{ 'message': { 'content': responseText } }] };
return response.send(reply);
}
} catch (error) {
console.log('Error communicating with Claude: ', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
}
/**
* @param {express.Request} request
* @param {express.Response} response
*/
async function sendPalmRequest(request, response) {
const api_key_palm = readSecret(SECRET_KEYS.PALM);
if (!api_key_palm) {
console.log('Palm API key is missing.');
return response.status(400).send({ error: true });
}
const body = {
prompt: {
text: request.body.messages,
},
stopSequences: request.body.stop,
safetySettings: PALM_SAFETY,
temperature: request.body.temperature,
topP: request.body.top_p,
topK: request.body.top_k || undefined,
maxOutputTokens: request.body.max_tokens,
candidate_count: 1,
};
console.log('Palm request:', body);
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, {
body: JSON.stringify(body),
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
signal: controller.signal,
timeout: 0,
});
if (!generateResponse.ok) {
console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
const responseText = generateResponseJson?.candidates[0]?.output;
if (!responseText) {
console.log('Palm API returned no response', generateResponseJson);
let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`;
// Check for filters
if (generateResponseJson?.filters[0]?.message) {
message = `Palm filter triggered: ${generateResponseJson.filters[0].message}`;
}
return response.send({ error: { message } });
}
console.log('Palm response:', responseText);
// Wrap it back to OAI format
const reply = { choices: [{ 'message': { 'content': responseText } }] };
return response.send(reply);
} catch (error) {
console.log('Error communicating with Palm API: ', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
}
app.post('/generate_openai', jsonParser, function (request, response_generate_openai) {
if (!request.body) return response_generate_openai.status(400).send({ error: true });
switch (request.body.chat_completion_source) {
case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response_generate_openai);
case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response_generate_openai);
case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response_generate_openai);
case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response_generate_openai);
}
let api_url;
let api_key_openai;
let headers;
let bodyParams;
if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) {
api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString();
api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI);
headers = {};
bodyParams = {};
if (getConfigValue('openai.randomizeUserId', false)) {
bodyParams['user'] = uuidv4();
}
} else {
api_url = 'https://openrouter.ai/api/v1';
api_key_openai = readSecret(SECRET_KEYS.OPENROUTER);
// OpenRouter needs to pass the referer: https://openrouter.ai/docs
headers = { 'HTTP-Referer': request.headers.referer };
bodyParams = { 'transforms': ['middle-out'] };
if (request.body.use_fallback) {
bodyParams['route'] = 'fallback';
}
}
if (!api_key_openai && !request.body.reverse_proxy) {
console.log('OpenAI API key is missing.');
return response_generate_openai.status(400).send({ error: true });
}
// Add custom stop sequences
if (Array.isArray(request.body.stop) && request.body.stop.length > 0) {
bodyParams['stop'] = request.body.stop;
}
const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string';
const textPrompt = isTextCompletion ? convertChatMLPrompt(request.body.messages) : '';
const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ?
`${api_url}/completions` :
`${api_url}/chat/completions`;
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
/** @type {import('node-fetch').RequestInit} */
const config = {
method: 'post',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + api_key_openai,
...headers,
},
body: JSON.stringify({
'messages': isTextCompletion === false ? request.body.messages : undefined,
'prompt': isTextCompletion === true ? textPrompt : undefined,
'model': request.body.model,
'temperature': request.body.temperature,
'max_tokens': request.body.max_tokens,
'stream': request.body.stream,
'presence_penalty': request.body.presence_penalty,
'frequency_penalty': request.body.frequency_penalty,
'top_p': request.body.top_p,
'top_k': request.body.top_k,
'stop': isTextCompletion === false ? request.body.stop : undefined,
'logit_bias': request.body.logit_bias,
'seed': request.body.seed,
...bodyParams,
}),
signal: controller.signal,
timeout: 0,
};
console.log(JSON.parse(String(config.body)));
makeRequest(config, response_generate_openai, request);
/**
*
* @param {*} config
* @param {express.Response} response_generate_openai
* @param {express.Request} request
* @param {Number} retries
* @param {Number} timeout
*/
async function makeRequest(config, response_generate_openai, request, retries = 5, timeout = 5000) {
try {
const fetchResponse = await fetch(endpointUrl, config);
if (request.body.stream) {
console.log('Streaming request in progress');
forwardFetchResponse(fetchResponse, response_generate_openai);
return;
}
if (fetchResponse.ok) {
let json = await fetchResponse.json();
response_generate_openai.send(json);
console.log(json);
console.log(json?.choices[0]?.message);
} else if (fetchResponse.status === 429 && retries > 0) {
console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`);
setTimeout(() => {
timeout *= 2;
makeRequest(config, response_generate_openai, request, retries - 1, timeout);
}, timeout);
} else {
await handleErrorResponse(fetchResponse);
}
} catch (error) {
console.log('Generation failed', error);
if (!response_generate_openai.headersSent) {
response_generate_openai.send({ error: true });
} else {
response_generate_openai.end();
}
}
}
async function handleErrorResponse(response) {
const responseText = await response.text();
const errorData = tryParse(responseText);
const statusMessages = {
400: 'Bad request',
401: 'Unauthorized',
402: 'Credit limit reached',
403: 'Forbidden',
404: 'Not found',
429: 'Too many requests',
451: 'Unavailable for legal reasons',
502: 'Bad gateway',
};
const message = errorData?.error?.message || statusMessages[response.status] || 'Unknown error occurred';
const quota_error = response.status === 429 && errorData?.error?.type === 'insufficient_quota';
console.log(message);
if (!response_generate_openai.headersSent) {
response_generate_openai.send({ error: { message }, quota_error: quota_error });
} else if (!response_generate_openai.writableEnded) {
response_generate_openai.write(response);
} else {
response_generate_openai.end();
}
}
});
async function sendAI21Request(request, response) {
if (!request.body) return response.sendStatus(400);
const controller = new AbortController();
console.log(request.body.messages);
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
const options = {
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`,
},
body: JSON.stringify({
numResults: 1,
maxTokens: request.body.max_tokens,
minTokens: 0,
temperature: request.body.temperature,
topP: request.body.top_p,
stopSequences: request.body.stop_tokens,
topKReturn: request.body.top_k,
frequencyPenalty: {
scale: request.body.frequency_penalty * 100,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
presencePenalty: {
scale: request.body.presence_penalty,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
countPenalty: {
scale: request.body.count_pen,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
prompt: request.body.messages,
}),
signal: controller.signal,
};
fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options)
.then(r => r.json())
.then(r => {
if (r.completions === undefined) {
console.log(r);
} else {
console.log(r.completions[0].data.text);
}
const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] };
return response.send(reply);
})
.catch(err => {
console.error(err);
return response.send({ error: true });
});
}
/**
* Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow
* redirects, this is transparent to client-side code.
@ -1443,6 +767,12 @@ app.use('/api/backends/text-completions', require('./src/endpoints/backends/text
// KoboldAI
app.use('/api/backends/kobold', require('./src/endpoints/backends/kobold').router);
// OpenAI chat completions
app.use('/api/backends/chat-completions', require('./src/endpoints/backends/chat-completions').router);
// Scale (alt method)
app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt').router);
const tavernUrl = new URL(
(cliArguments.ssl ? 'https://' : 'http://') +
(listen ? '0.0.0.0' : '127.0.0.1') +

View File

@ -0,0 +1,619 @@
const express = require('express');
const fetch = require('node-fetch').default;
const { jsonParser } = require('../../express-common');
const { CHAT_COMPLETION_SOURCES, PALM_SAFETY } = require('../../constants');
const { forwardFetchResponse, getConfigValue, tryParse, uuidv4 } = require('../../util');
const { convertClaudePrompt, convertTextCompletionPrompt } = require('../prompt-converters');
const { readSecret, SECRET_KEYS } = require('../secrets');
const { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS } = require('../tokenizers');
const API_OPENAI = 'https://api.openai.com/v1';
const API_CLAUDE = 'https://api.anthropic.com/v1';
/**
* Sends a request to Claude API.
* @param {express.Request} request Express request
* @param {express.Response} response Express response
*/
async function sendClaudeRequest(request, response) {
const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString();
const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.CLAUDE);
if (!apiKey) {
console.log('Claude API key is missing.');
return response.status(400).send({ error: true });
}
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
let doSystemPrompt = request.body.model === 'claude-2' || request.body.model === 'claude-2.1';
let requestPrompt = convertClaudePrompt(request.body.messages, true, !request.body.exclude_assistant, doSystemPrompt);
if (request.body.assistant_prefill && !request.body.exclude_assistant) {
requestPrompt += request.body.assistant_prefill;
}
console.log('Claude request:', requestPrompt);
const stop_sequences = ['\n\nHuman:', '\n\nSystem:', '\n\nAssistant:'];
// Add custom stop sequences
if (Array.isArray(request.body.stop)) {
stop_sequences.push(...request.body.stop);
}
const generateResponse = await fetch(apiUrl + '/complete', {
method: 'POST',
signal: controller.signal,
body: JSON.stringify({
prompt: requestPrompt,
model: request.body.model,
max_tokens_to_sample: request.body.max_tokens,
stop_sequences: stop_sequences,
temperature: request.body.temperature,
top_p: request.body.top_p,
top_k: request.body.top_k,
stream: request.body.stream,
}),
headers: {
'Content-Type': 'application/json',
'anthropic-version': '2023-06-01',
'x-api-key': apiKey,
},
timeout: 0,
});
if (request.body.stream) {
// Pipe remote SSE stream to Express response
forwardFetchResponse(generateResponse, response);
} else {
if (!generateResponse.ok) {
console.log(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
const responseText = generateResponseJson.completion;
console.log('Claude response:', responseText);
// Wrap it back to OAI format
const reply = { choices: [{ 'message': { 'content': responseText } }] };
return response.send(reply);
}
} catch (error) {
console.log('Error communicating with Claude: ', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
}
/**
* Sends a request to Scale Spellbook API.
* @param {import("express").Request} request Express request
* @param {import("express").Response} response Express response
*/
async function sendScaleRequest(request, response) {
const apiUrl = new URL(request.body.api_url_scale).toString();
const apiKey = readSecret(SECRET_KEYS.SCALE);
if (!apiKey) {
console.log('Scale API key is missing.');
return response.status(400).send({ error: true });
}
const requestPrompt = convertTextCompletionPrompt(request.body.messages);
console.log('Scale request:', requestPrompt);
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
const generateResponse = await fetch(apiUrl, {
method: 'POST',
body: JSON.stringify({ input: { input: requestPrompt } }),
headers: {
'Content-Type': 'application/json',
'Authorization': `Basic ${apiKey}`,
},
timeout: 0,
});
if (!generateResponse.ok) {
console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
console.log('Scale response:', generateResponseJson);
const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] };
return response.send(reply);
} catch (error) {
console.log(error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
}
/**
* Sends a request to Google AI API.
* @param {express.Request} request Express request
* @param {express.Response} response Express response
*/
async function sendPalmRequest(request, response) {
const api_key_palm = readSecret(SECRET_KEYS.PALM);
if (!api_key_palm) {
console.log('Palm API key is missing.');
return response.status(400).send({ error: true });
}
const body = {
prompt: {
text: request.body.messages,
},
stopSequences: request.body.stop,
safetySettings: PALM_SAFETY,
temperature: request.body.temperature,
topP: request.body.top_p,
topK: request.body.top_k || undefined,
maxOutputTokens: request.body.max_tokens,
candidate_count: 1,
};
console.log('Palm request:', body);
try {
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
const generateResponse = await fetch(`https://generativelanguage.googleapis.com/v1beta2/models/text-bison-001:generateText?key=${api_key_palm}`, {
body: JSON.stringify(body),
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
signal: controller.signal,
timeout: 0,
});
if (!generateResponse.ok) {
console.log(`Palm API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`);
return response.status(generateResponse.status).send({ error: true });
}
const generateResponseJson = await generateResponse.json();
const responseText = generateResponseJson?.candidates?.[0]?.output;
if (!responseText) {
console.log('Palm API returned no response', generateResponseJson);
let message = `Palm API returned no response: ${JSON.stringify(generateResponseJson)}`;
// Check for filters
if (generateResponseJson?.filters?.[0]?.reason) {
message = `Palm filter triggered: ${generateResponseJson.filters[0].reason}`;
}
return response.send({ error: { message } });
}
console.log('Palm response:', responseText);
// Wrap it back to OAI format
const reply = { choices: [{ 'message': { 'content': responseText } }] };
return response.send(reply);
} catch (error) {
console.log('Error communicating with Palm API: ', error);
if (!response.headersSent) {
return response.status(500).send({ error: true });
}
}
}
/**
* Sends a request to Google AI API.
* @param {express.Request} request Express request
* @param {express.Response} response Express response
*/
async function sendAI21Request(request, response) {
if (!request.body) return response.sendStatus(400);
const controller = new AbortController();
console.log(request.body.messages);
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
const options = {
method: 'POST',
headers: {
accept: 'application/json',
'content-type': 'application/json',
Authorization: `Bearer ${readSecret(SECRET_KEYS.AI21)}`,
},
body: JSON.stringify({
numResults: 1,
maxTokens: request.body.max_tokens,
minTokens: 0,
temperature: request.body.temperature,
topP: request.body.top_p,
stopSequences: request.body.stop_tokens,
topKReturn: request.body.top_k,
frequencyPenalty: {
scale: request.body.frequency_penalty * 100,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
presencePenalty: {
scale: request.body.presence_penalty,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
countPenalty: {
scale: request.body.count_pen,
applyToWhitespaces: false,
applyToPunctuations: false,
applyToNumbers: false,
applyToStopwords: false,
applyToEmojis: false,
},
prompt: request.body.messages,
}),
signal: controller.signal,
};
fetch(`https://api.ai21.com/studio/v1/${request.body.model}/complete`, options)
.then(r => r.json())
.then(r => {
if (r.completions === undefined) {
console.log(r);
} else {
console.log(r.completions[0].data.text);
}
const reply = { choices: [{ 'message': { 'content': r.completions[0].data.text } }] };
return response.send(reply);
})
.catch(err => {
console.error(err);
return response.send({ error: true });
});
}
const router = express.Router();
router.post('/status', jsonParser, async function (request, response_getstatus_openai) {
if (!request.body) return response_getstatus_openai.sendStatus(400);
let api_url;
let api_key_openai;
let headers;
if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) {
api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString();
api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI);
headers = {};
} else {
api_url = 'https://openrouter.ai/api/v1';
api_key_openai = readSecret(SECRET_KEYS.OPENROUTER);
// OpenRouter needs to pass the referer: https://openrouter.ai/docs
headers = { 'HTTP-Referer': request.headers.referer };
}
if (!api_key_openai && !request.body.reverse_proxy) {
console.log('OpenAI API key is missing.');
return response_getstatus_openai.status(400).send({ error: true });
}
try {
const response = await fetch(api_url + '/models', {
method: 'GET',
headers: {
'Authorization': 'Bearer ' + api_key_openai,
...headers,
},
});
if (response.ok) {
const data = await response.json();
response_getstatus_openai.send(data);
if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) {
let models = [];
data.data.forEach(model => {
const context_length = model.context_length;
const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt));
const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0);
models[model.id] = {
tokens_per_dollar: tokens_rounded + 'k',
context_length: context_length,
};
});
console.log('Available OpenRouter models:', models);
} else {
const models = data?.data;
if (Array.isArray(models)) {
const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort();
console.log('Available OpenAI models:', modelIds);
} else {
console.log('OpenAI endpoint did not return a list of models.');
}
}
}
else {
console.log('OpenAI status check failed. Either Access Token is incorrect or API endpoint is down.');
response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } });
}
} catch (e) {
console.error(e);
if (!response_getstatus_openai.headersSent) {
response_getstatus_openai.send({ error: true });
} else {
response_getstatus_openai.end();
}
}
});
router.post('/bias', jsonParser, async function (request, response) {
if (!request.body || !Array.isArray(request.body))
return response.sendStatus(400);
try {
const result = {};
const model = getTokenizerModel(String(request.query.model || ''));
// no bias for claude
if (model == 'claude') {
return response.send(result);
}
let encodeFunction;
if (sentencepieceTokenizers.includes(model)) {
const tokenizer = getSentencepiceTokenizer(model);
const instance = await tokenizer?.get();
encodeFunction = (text) => new Uint32Array(instance?.encodeIds(text));
} else {
const tokenizer = getTiktokenTokenizer(model);
encodeFunction = (tokenizer.encode.bind(tokenizer));
}
for (const entry of request.body) {
if (!entry || !entry.text) {
continue;
}
try {
const tokens = getEntryTokens(entry.text, encodeFunction);
for (const token of tokens) {
result[token] = entry.value;
}
} catch {
console.warn('Tokenizer failed to encode:', entry.text);
}
}
// not needed for cached tokenizers
//tokenizer.free();
return response.send(result);
/**
* Gets tokenids for a given entry
* @param {string} text Entry text
* @param {(string) => Uint32Array} encode Function to encode text to token ids
* @returns {Uint32Array} Array of token ids
*/
function getEntryTokens(text, encode) {
// Get raw token ids from JSON array
if (text.trim().startsWith('[') && text.trim().endsWith(']')) {
try {
const json = JSON.parse(text);
if (Array.isArray(json) && json.every(x => typeof x === 'number')) {
return new Uint32Array(json);
}
} catch {
// ignore
}
}
// Otherwise, get token ids from tokenizer
return encode(text);
}
} catch (error) {
console.error(error);
return response.send({});
}
});
router.post('/generate', jsonParser, function (request, response) {
if (!request.body) return response.status(400).send({ error: true });
switch (request.body.chat_completion_source) {
case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response);
case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response);
case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response);
case CHAT_COMPLETION_SOURCES.PALM: return sendPalmRequest(request, response);
}
let apiUrl;
let apiKey;
let headers;
let bodyParams;
if (request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER) {
apiUrl = new URL(request.body.reverse_proxy || API_OPENAI).toString();
apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(SECRET_KEYS.OPENAI);
headers = {};
bodyParams = {};
if (getConfigValue('openai.randomizeUserId', false)) {
bodyParams['user'] = uuidv4();
}
} else {
apiUrl = 'https://openrouter.ai/api/v1';
apiKey = readSecret(SECRET_KEYS.OPENROUTER);
// OpenRouter needs to pass the referer: https://openrouter.ai/docs
headers = { 'HTTP-Referer': request.headers.referer };
bodyParams = { 'transforms': ['middle-out'] };
if (request.body.use_fallback) {
bodyParams['route'] = 'fallback';
}
}
if (!apiKey && !request.body.reverse_proxy) {
console.log('OpenAI API key is missing.');
return response.status(400).send({ error: true });
}
// Add custom stop sequences
if (Array.isArray(request.body.stop) && request.body.stop.length > 0) {
bodyParams['stop'] = request.body.stop;
}
const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string';
const textPrompt = isTextCompletion ? convertTextCompletionPrompt(request.body.messages) : '';
const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ?
`${apiUrl}/completions` :
`${apiUrl}/chat/completions`;
const controller = new AbortController();
request.socket.removeAllListeners('close');
request.socket.on('close', function () {
controller.abort();
});
/** @type {import('node-fetch').RequestInit} */
const config = {
method: 'post',
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + apiKey,
...headers,
},
body: JSON.stringify({
'messages': isTextCompletion === false ? request.body.messages : undefined,
'prompt': isTextCompletion === true ? textPrompt : undefined,
'model': request.body.model,
'temperature': request.body.temperature,
'max_tokens': request.body.max_tokens,
'stream': request.body.stream,
'presence_penalty': request.body.presence_penalty,
'frequency_penalty': request.body.frequency_penalty,
'top_p': request.body.top_p,
'top_k': request.body.top_k,
'stop': isTextCompletion === false ? request.body.stop : undefined,
'logit_bias': request.body.logit_bias,
'seed': request.body.seed,
...bodyParams,
}),
signal: controller.signal,
timeout: 0,
};
console.log(JSON.parse(String(config.body)));
makeRequest(config, response, request);
/**
* Makes a fetch request to the OpenAI API endpoint.
* @param {import('node-fetch').RequestInit} config Fetch config
* @param {express.Response} response Express response
* @param {express.Request} request Express request
* @param {Number} retries Number of retries left
* @param {Number} timeout Request timeout in ms
*/
async function makeRequest(config, response, request, retries = 5, timeout = 5000) {
try {
const fetchResponse = await fetch(endpointUrl, config);
if (request.body.stream) {
console.log('Streaming request in progress');
forwardFetchResponse(fetchResponse, response);
return;
}
if (fetchResponse.ok) {
let json = await fetchResponse.json();
response.send(json);
console.log(json);
console.log(json?.choices[0]?.message);
} else if (fetchResponse.status === 429 && retries > 0) {
console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`);
setTimeout(() => {
timeout *= 2;
makeRequest(config, response, request, retries - 1, timeout);
}, timeout);
} else {
await handleErrorResponse(fetchResponse);
}
} catch (error) {
console.log('Generation failed', error);
if (!response.headersSent) {
response.send({ error: true });
} else {
response.end();
}
}
}
/**
* @param {import("node-fetch").Response} errorResponse
*/
async function handleErrorResponse(errorResponse) {
const responseText = await errorResponse.text();
const errorData = tryParse(responseText);
const statusMessages = {
400: 'Bad request',
401: 'Unauthorized',
402: 'Credit limit reached',
403: 'Forbidden',
404: 'Not found',
429: 'Too many requests',
451: 'Unavailable for legal reasons',
502: 'Bad gateway',
};
const message = errorData?.error?.message || statusMessages[errorResponse.status] || 'Unknown error occurred';
const quota_error = errorResponse.status === 429 && errorData?.error?.type === 'insufficient_quota';
console.log(message);
if (!response.headersSent) {
response.send({ error: { message }, quota_error: quota_error });
} else if (!response.writableEnded) {
response.write(errorResponse);
} else {
response.end();
}
}
});
module.exports = {
router,
};

View File

@ -0,0 +1,76 @@
const express = require('express');
const fetch = require('node-fetch').default;
const { jsonParser } = require('../../express-common');
const { readSecret, SECRET_KEYS } = require('../secrets');
const router = express.Router();
router.post('/generate', jsonParser, function (request, response) {
if (!request.body) return response.sendStatus(400);
fetch('https://dashboard.scale.com/spellbook/api/trpc/v2.variant.run', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'cookie': `_jwt=${readSecret(SECRET_KEYS.SCALE_COOKIE)}`,
},
body: JSON.stringify({
json: {
variant: {
name: 'New Variant',
appId: '',
taxonomy: null,
},
prompt: {
id: '',
template: '{{input}}\n',
exampleVariables: {},
variablesSourceDataId: null,
systemMessage: request.body.sysprompt,
},
modelParameters: {
id: '',
modelId: 'GPT4',
modelType: 'OpenAi',
maxTokens: request.body.max_tokens,
temperature: request.body.temp,
stop: 'user:',
suffix: null,
topP: request.body.top_p,
logprobs: null,
logitBias: request.body.logit_bias,
},
inputs: [
{
index: '-1',
valueByName: {
input: request.body.prompt,
},
},
],
},
meta: {
values: {
'variant.taxonomy': ['undefined'],
'prompt.variablesSourceDataId': ['undefined'],
'modelParameters.suffix': ['undefined'],
'modelParameters.logprobs': ['undefined'],
},
},
}),
})
.then(res => res.json())
.then(data => {
console.log(data.result.data.json.outputs[0]);
return response.send({ output: data.result.data.json.outputs[0] });
})
.catch((error) => {
console.error('Error:', error);
return response.send({ error: true });
});
});
module.exports = { router };

View File

@ -72,6 +72,32 @@ function convertClaudePrompt(messages, addHumanPrefix, addAssistantPostfix, with
return requestPrompt;
}
/**
* Convert a prompt from the ChatML objects to the format used by Text Completion API.
* @param {object[]} messages Array of messages
* @returns {string} Prompt for Text Completion API
*/
function convertTextCompletionPrompt(messages) {
if (typeof messages === 'string') {
return messages;
}
const messageStrings = [];
messages.forEach(m => {
if (m.role === 'system' && m.name === undefined) {
messageStrings.push('System: ' + m.content);
}
else if (m.role === 'system' && m.name !== undefined) {
messageStrings.push(m.name + ': ' + m.content);
}
else {
messageStrings.push(m.role + ': ' + m.content);
}
});
return messageStrings.join('\n') + '\nassistant:';
}
module.exports = {
convertClaudePrompt,
convertTextCompletionPrompt,
};

View File

@ -4,7 +4,7 @@ const express = require('express');
const { SentencePieceProcessor } = require('@agnai/sentencepiece-js');
const tiktoken = require('@dqbd/tiktoken');
const { Tokenizer } = require('@agnai/web-tokenizers');
const { convertClaudePrompt } = require('../chat-completion');
const { convertClaudePrompt } = require('./prompt-converters');
const { readSecret, SECRET_KEYS } = require('./secrets');
const { TEXTGEN_TYPES } = require('../constants');
const { jsonParser } = require('../express-common');