import process from 'node:process'; import express from 'express'; import fetch from 'node-fetch'; import { jsonParser } from '../../express-common.js'; import { CHAT_COMPLETION_SOURCES, GEMINI_SAFETY, OPENROUTER_HEADERS, } from '../../constants.js'; import { forwardFetchResponse, getConfigValue, tryParse, uuidv4, mergeObjectWithYaml, excludeKeysByYaml, color, } from '../../util.js'; import { convertClaudeMessages, convertGooglePrompt, convertTextCompletionPrompt, convertCohereMessages, convertMistralMessages, convertAI21Messages, mergeMessages, cachingAtDepthForOpenRouterClaude, cachingAtDepthForClaude, getPromptNames, } from '../../prompt-converters.js'; import { readSecret, SECRET_KEYS } from '../secrets.js'; import { getTokenizerModel, getSentencepiceTokenizer, getTiktokenTokenizer, sentencepieceTokenizers, TEXT_COMPLETION_MODELS, } from '../tokenizers.js'; const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; const API_MISTRAL = 'https://api.mistral.ai/v1'; const API_COHERE_V1 = 'https://api.cohere.ai/v1'; const API_COHERE_V2 = 'https://api.cohere.ai/v2'; const API_PERPLEXITY = 'https://api.perplexity.ai'; const API_GROQ = 'https://api.groq.com/openai/v1'; const API_MAKERSUITE = 'https://generativelanguage.googleapis.com'; const API_01AI = 'https://api.01.ai/v1'; const API_BLOCKENTROPY = 'https://api.blockentropy.ai/v1'; const API_AI21 = 'https://api.ai21.com/studio/v1'; const API_NANOGPT = 'https://nano-gpt.com/api/v1'; const API_DEEPSEEK = 'https://api.deepseek.com/beta'; /** * Applies a post-processing step to the generated messages. * @param {object[]} messages Messages to post-process * @param {string} type Prompt conversion type * @param {import('../../prompt-converters.js').PromptNames} names Prompt names * @returns */ function postProcessPrompt(messages, type, names) { const addAssistantPrefix = x => x.length && (x[x.length - 1].role !== 'assistant' || (x[x.length - 1].prefix = true)) ? x : x; switch (type) { case 'merge': case 'claude': return mergeMessages(messages, names, false, false); case 'semi': return mergeMessages(messages, names, true, false); case 'strict': return mergeMessages(messages, names, true, true); case 'deepseek': return addAssistantPrefix(mergeMessages(messages, names, true, false)); case 'deepseek-reasoner': return addAssistantPrefix(mergeMessages(messages, names, true, true)); default: return messages; } } /** * Gets OpenRouter transforms based on the request. * @param {import('express').Request} request Express request * @returns {string[] | undefined} OpenRouter transforms */ function getOpenRouterTransforms(request) { switch (request.body.middleout) { case 'on': return ['middle-out']; case 'off': return []; case 'auto': return undefined; } } /** * Sends a request to Claude API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ async function sendClaudeRequest(request, response) { const apiUrl = new URL(request.body.reverse_proxy || API_CLAUDE).toString(); const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.CLAUDE); const divider = '-'.repeat(process.stdout.columns); const enableSystemPromptCache = getConfigValue('claude.enableSystemPromptCache', false) && request.body.model.startsWith('claude-3'); let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1); // Disabled if not an integer or negative, or if the model doesn't support it if (!Number.isInteger(cachingAtDepth) || cachingAtDepth < 0 || !request.body.model.startsWith('claude-3')) { cachingAtDepth = -1; } if (!apiKey) { console.log(color.red(`Claude API key is missing.\n${divider}`)); return response.status(400).send({ error: true }); } try { const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); const additionalHeaders = {}; const useTools = request.body.model.startsWith('claude-3') && Array.isArray(request.body.tools) && request.body.tools.length > 0; const useSystemPrompt = (request.body.model.startsWith('claude-2') || request.body.model.startsWith('claude-3')) && request.body.claude_use_sysprompt; const convertedPrompt = convertClaudeMessages(request.body.messages, request.body.assistant_prefill, useSystemPrompt, useTools, getPromptNames(request)); // Add custom stop sequences const stopSequences = []; if (Array.isArray(request.body.stop)) { stopSequences.push(...request.body.stop); } const requestBody = { /** @type {any} */ system: [], messages: convertedPrompt.messages, model: request.body.model, max_tokens: request.body.max_tokens, stop_sequences: stopSequences, temperature: request.body.temperature, top_p: request.body.top_p, top_k: request.body.top_k, stream: request.body.stream, }; if (useSystemPrompt) { if (enableSystemPromptCache && Array.isArray(convertedPrompt.systemPrompt) && convertedPrompt.systemPrompt.length) { convertedPrompt.systemPrompt[convertedPrompt.systemPrompt.length - 1]['cache_control'] = { type: 'ephemeral' }; } requestBody.system = convertedPrompt.systemPrompt; } else { delete requestBody.system; } if (useTools) { additionalHeaders['anthropic-beta'] = 'tools-2024-05-16'; requestBody.tool_choice = { type: request.body.tool_choice }; requestBody.tools = request.body.tools .filter(tool => tool.type === 'function') .map(tool => tool.function) .map(fn => ({ name: fn.name, description: fn.description, input_schema: fn.parameters })); // Claude doesn't do prefills on function calls, and doesn't allow empty messages if (requestBody.tools.length && convertedPrompt.messages.length && convertedPrompt.messages[convertedPrompt.messages.length - 1].role === 'assistant') { convertedPrompt.messages.push({ role: 'user', content: [{ type: 'text', text: '\u200b' }] }); } if (enableSystemPromptCache && requestBody.tools.length) { requestBody.tools[requestBody.tools.length - 1]['cache_control'] = { type: 'ephemeral' }; } } if (cachingAtDepth !== -1) { cachingAtDepthForClaude(convertedPrompt.messages, cachingAtDepth); } if (enableSystemPromptCache || cachingAtDepth !== -1) { additionalHeaders['anthropic-beta'] = 'prompt-caching-2024-07-31'; } console.log('Claude request:', requestBody); const generateResponse = await fetch(apiUrl + '/messages', { method: 'POST', signal: controller.signal, body: JSON.stringify(requestBody), headers: { 'Content-Type': 'application/json', 'anthropic-version': '2023-06-01', 'x-api-key': apiKey, ...additionalHeaders, }, }); if (request.body.stream) { // Pipe remote SSE stream to Express response forwardFetchResponse(generateResponse, response); } else { if (!generateResponse.ok) { const generateResponseText = await generateResponse.text(); console.log(color.red(`Claude API returned error: ${generateResponse.status} ${generateResponse.statusText}\n${generateResponseText}\n${divider}`)); return response.status(generateResponse.status).send({ error: true }); } /** @type {any} */ const generateResponseJson = await generateResponse.json(); const responseText = generateResponseJson?.content?.[0]?.text || ''; console.log('Claude response:', generateResponseJson); // Wrap it back to OAI format + save the original content const reply = { choices: [{ 'message': { 'content': responseText } }], content: generateResponseJson.content }; return response.send(reply); } } catch (error) { console.log(color.red(`Error communicating with Claude: ${error}\n${divider}`)); if (!response.headersSent) { return response.status(500).send({ error: true }); } } } /** * Sends a request to Scale Spellbook API. * @param {import("express").Request} request Express request * @param {import("express").Response} response Express response */ async function sendScaleRequest(request, response) { const apiUrl = new URL(request.body.api_url_scale).toString(); const apiKey = readSecret(request.user.directories, SECRET_KEYS.SCALE); if (!apiKey) { console.log('Scale API key is missing.'); return response.status(400).send({ error: true }); } const requestPrompt = convertTextCompletionPrompt(request.body.messages); console.log('Scale request:', requestPrompt); try { const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); const generateResponse = await fetch(apiUrl, { method: 'POST', body: JSON.stringify({ input: { input: requestPrompt } }), headers: { 'Content-Type': 'application/json', 'Authorization': `Basic ${apiKey}`, }, }); if (!generateResponse.ok) { console.log(`Scale API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); return response.status(500).send({ error: true }); } /** @type {any} */ const generateResponseJson = await generateResponse.json(); console.log('Scale response:', generateResponseJson); const reply = { choices: [{ 'message': { 'content': generateResponseJson.output } }] }; return response.send(reply); } catch (error) { console.log(error); if (!response.headersSent) { return response.status(500).send({ error: true }); } } } /** * Sends a request to Google AI API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ async function sendMakerSuiteRequest(request, response) { const apiUrl = new URL(request.body.reverse_proxy || API_MAKERSUITE); const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MAKERSUITE); if (!request.body.reverse_proxy && !apiKey) { console.log('Google AI Studio API key is missing.'); return response.status(400).send({ error: true }); } const model = String(request.body.model); const stream = Boolean(request.body.stream); const showThoughts = Boolean(request.body.show_thoughts); const generationConfig = { stopSequences: request.body.stop, candidateCount: 1, maxOutputTokens: request.body.max_tokens, temperature: request.body.temperature, topP: request.body.top_p, topK: request.body.top_k || undefined, }; function getGeminiBody() { if (!Array.isArray(generationConfig.stopSequences) || !generationConfig.stopSequences.length) { delete generationConfig.stopSequences; } const should_use_system_prompt = ( model.includes('gemini-2.0-flash-thinking-exp') || model.includes('gemini-2.0-flash-exp') || model.includes('gemini-1.5-flash') || model.includes('gemini-1.5-pro') || model.startsWith('gemini-exp') ) && request.body.use_makersuite_sysprompt; const prompt = convertGooglePrompt(request.body.messages, model, should_use_system_prompt, getPromptNames(request)); let safetySettings = GEMINI_SAFETY; if (model.includes('gemini-2.0-flash-exp')) { safetySettings = GEMINI_SAFETY.map(setting => ({ ...setting, threshold: 'OFF' })); } let body = { contents: prompt.contents, safetySettings: safetySettings, generationConfig: generationConfig, }; if (should_use_system_prompt) { body.systemInstruction = prompt.system_instruction; } return body; } const body = getGeminiBody(); console.log('Google AI Studio request:', body); try { const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); const isThinking = model.includes('thinking'); const apiVersion = isThinking ? 'v1alpha' : 'v1beta'; const responseType = (stream ? 'streamGenerateContent' : 'generateContent'); const generateResponse = await fetch(`${apiUrl.toString().replace(/\/$/, '')}/${apiVersion}/models/${model}:${responseType}?key=${apiKey}${stream ? '&alt=sse' : ''}`, { body: JSON.stringify(body), method: 'POST', headers: { 'Content-Type': 'application/json', }, signal: controller.signal, }); // have to do this because of their busted ass streaming endpoint if (stream) { try { // Pipe remote SSE stream to Express response forwardFetchResponse(generateResponse, response); } catch (error) { console.log('Error forwarding streaming response:', error); if (!response.headersSent) { return response.status(500).send({ error: true }); } } } else { if (!generateResponse.ok) { console.log(`Google AI Studio API returned error: ${generateResponse.status} ${generateResponse.statusText} ${await generateResponse.text()}`); return response.status(generateResponse.status).send({ error: true }); } /** @type {any} */ const generateResponseJson = await generateResponse.json(); const candidates = generateResponseJson?.candidates; if (!candidates || candidates.length === 0) { let message = 'Google AI Studio API returned no candidate'; console.log(message, generateResponseJson); if (generateResponseJson?.promptFeedback?.blockReason) { message += `\nPrompt was blocked due to : ${generateResponseJson.promptFeedback.blockReason}`; } return response.send({ error: { message } }); } const responseContent = candidates[0].content ?? candidates[0].output; console.log('Google AI Studio response:', responseContent); if (Array.isArray(responseContent?.parts) && isThinking && !showThoughts) { responseContent.parts = responseContent.parts.filter(part => !part.thought); } const responseText = typeof responseContent === 'string' ? responseContent : responseContent?.parts?.map(part => part.text)?.join('\n\n'); if (!responseText) { let message = 'Google AI Studio Candidate text empty'; console.log(message, generateResponseJson); return response.send({ error: { message } }); } // Wrap it back to OAI format const reply = { choices: [{ 'message': { 'content': responseText } }] }; return response.send(reply); } } catch (error) { console.log('Error communicating with Google AI Studio API: ', error); if (!response.headersSent) { return response.status(500).send({ error: true }); } } } /** * Sends a request to AI21 API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ async function sendAI21Request(request, response) { if (!request.body) return response.sendStatus(400); const controller = new AbortController(); console.log(request.body.messages); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); const convertedPrompt = convertAI21Messages(request.body.messages, getPromptNames(request)); const body = { messages: convertedPrompt, model: request.body.model, max_tokens: request.body.max_tokens, temperature: request.body.temperature, top_p: request.body.top_p, stop: request.body.stop, stream: request.body.stream, }; const options = { method: 'POST', headers: { accept: 'application/json', 'content-type': 'application/json', Authorization: `Bearer ${readSecret(request.user.directories, SECRET_KEYS.AI21)}`, }, body: JSON.stringify(body), signal: controller.signal, }; console.log('AI21 request:', body); try { const generateResponse = await fetch(API_AI21 + '/chat/completions', options); if (request.body.stream) { forwardFetchResponse(generateResponse, response); } else { if (!generateResponse.ok) { const errorText = await generateResponse.text(); console.log(`AI21 API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); const errorJson = tryParse(errorText) ?? { error: true }; return response.status(500).send(errorJson); } const generateResponseJson = await generateResponse.json(); console.log('AI21 response:', generateResponseJson); return response.send(generateResponseJson); } } catch (error) { console.log('Error communicating with AI21 API: ', error); if (!response.headersSent) { response.send({ error: true }); } else { response.end(); } } } /** * Sends a request to MistralAI API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ async function sendMistralAIRequest(request, response) { const apiUrl = new URL(request.body.reverse_proxy || API_MISTRAL).toString(); const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MISTRALAI); if (!apiKey) { console.log('MistralAI API key is missing.'); return response.status(400).send({ error: true }); } try { const messages = convertMistralMessages(request.body.messages, getPromptNames(request)); const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); const requestBody = { 'model': request.body.model, 'messages': messages, 'temperature': request.body.temperature, 'top_p': request.body.top_p, 'frequency_penalty': request.body.frequency_penalty, 'presence_penalty': request.body.presence_penalty, 'max_tokens': request.body.max_tokens, 'stream': request.body.stream, 'safe_prompt': request.body.safe_prompt, 'random_seed': request.body.seed === -1 ? undefined : request.body.seed, }; if (Array.isArray(request.body.tools) && request.body.tools.length > 0) { requestBody['tools'] = request.body.tools; requestBody['tool_choice'] = request.body.tool_choice; } const config = { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + apiKey, }, body: JSON.stringify(requestBody), signal: controller.signal, timeout: 0, }; console.log('MisralAI request:', requestBody); const generateResponse = await fetch(apiUrl + '/chat/completions', config); if (request.body.stream) { forwardFetchResponse(generateResponse, response); } else { if (!generateResponse.ok) { const errorText = await generateResponse.text(); console.log(`MistralAI API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); const errorJson = tryParse(errorText) ?? { error: true }; return response.status(500).send(errorJson); } const generateResponseJson = await generateResponse.json(); console.log('MistralAI response:', generateResponseJson); return response.send(generateResponseJson); } } catch (error) { console.log('Error communicating with MistralAI API: ', error); if (!response.headersSent) { response.send({ error: true }); } else { response.end(); } } } /** * Sends a request to Cohere API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ async function sendCohereRequest(request, response) { const apiKey = readSecret(request.user.directories, SECRET_KEYS.COHERE); const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); if (!apiKey) { console.log('Cohere API key is missing.'); return response.status(400).send({ error: true }); } try { const convertedHistory = convertCohereMessages(request.body.messages, getPromptNames(request)); const tools = []; if (Array.isArray(request.body.tools) && request.body.tools.length > 0) { tools.push(...request.body.tools); tools.forEach(tool => { if (tool?.function?.parameters?.$schema) { delete tool.function.parameters.$schema; } }); } // https://docs.cohere.com/reference/chat const requestBody = { stream: Boolean(request.body.stream), model: request.body.model, messages: convertedHistory.chatHistory, temperature: request.body.temperature, max_tokens: request.body.max_tokens, k: request.body.top_k, p: request.body.top_p, seed: request.body.seed, stop_sequences: request.body.stop, frequency_penalty: request.body.frequency_penalty, presence_penalty: request.body.presence_penalty, documents: [], tools: tools, }; const canDoSafetyMode = String(request.body.model).endsWith('08-2024'); if (canDoSafetyMode) { requestBody.safety_mode = 'OFF'; } console.log('Cohere request:', requestBody); const config = { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + apiKey, }, body: JSON.stringify(requestBody), signal: controller.signal, timeout: 0, }; const apiUrl = API_COHERE_V2 + '/chat'; if (request.body.stream) { const stream = await fetch(apiUrl, config); forwardFetchResponse(stream, response); } else { const generateResponse = await fetch(apiUrl, config); if (!generateResponse.ok) { const errorText = await generateResponse.text(); console.log(`Cohere API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); const errorJson = tryParse(errorText) ?? { error: true }; return response.status(500).send(errorJson); } const generateResponseJson = await generateResponse.json(); console.log('Cohere response:', generateResponseJson); return response.send(generateResponseJson); } } catch (error) { console.log('Error communicating with Cohere API: ', error); if (!response.headersSent) { response.send({ error: true }); } else { response.end(); } } } /** * Sends a request to DeepSeek API. * @param {express.Request} request Express request * @param {express.Response} response Express response */ async function sendDeepSeekRequest(request, response) { const apiUrl = new URL(request.body.reverse_proxy || API_DEEPSEEK).toString(); const apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.DEEPSEEK); if (!apiKey && !request.body.reverse_proxy) { console.log('DeepSeek API key is missing.'); return response.status(400).send({ error: true }); } const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); try { let bodyParams = {}; if (request.body.logprobs > 0) { bodyParams['top_logprobs'] = request.body.logprobs; bodyParams['logprobs'] = true; } const postProcessType = String(request.body.model).endsWith('-reasoner') ? 'deepseek-reasoner' : 'deepseek'; const processedMessages = postProcessPrompt(request.body.messages, postProcessType, getPromptNames(request)); const requestBody = { 'messages': processedMessages, 'model': request.body.model, 'temperature': request.body.temperature, 'max_tokens': request.body.max_tokens, 'stream': request.body.stream, 'presence_penalty': request.body.presence_penalty, 'frequency_penalty': request.body.frequency_penalty, 'top_p': request.body.top_p, 'stop': request.body.stop, 'seed': request.body.seed, ...bodyParams, }; const config = { method: 'POST', headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + apiKey, }, body: JSON.stringify(requestBody), signal: controller.signal, }; console.log('DeepSeek request:', requestBody); const generateResponse = await fetch(apiUrl + '/chat/completions', config); if (request.body.stream) { forwardFetchResponse(generateResponse, response); } else { if (!generateResponse.ok) { const errorText = await generateResponse.text(); console.log(`DeepSeek API returned error: ${generateResponse.status} ${generateResponse.statusText} ${errorText}`); const errorJson = tryParse(errorText) ?? { error: true }; return response.status(500).send(errorJson); } const generateResponseJson = await generateResponse.json(); console.log('DeepSeek response:', generateResponseJson); return response.send(generateResponseJson); } } catch (error) { console.log('Error communicating with DeepSeek API: ', error); if (!response.headersSent) { response.send({ error: true }); } else { response.end(); } } } export const router = express.Router(); router.post('/status', jsonParser, async function (request, response_getstatus_openai) { if (!request.body) return response_getstatus_openai.sendStatus(400); let api_url; let api_key_openai; let headers; if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENAI) { api_url = new URL(request.body.reverse_proxy || API_OPENAI).toString(); api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.OPENAI); headers = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER) { api_url = 'https://openrouter.ai/api/v1'; api_key_openai = readSecret(request.user.directories, SECRET_KEYS.OPENROUTER); // OpenRouter needs to pass the Referer and X-Title: https://openrouter.ai/docs#requests headers = { ...OPENROUTER_HEADERS }; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.MISTRALAI) { api_url = new URL(request.body.reverse_proxy || API_MISTRAL).toString(); api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.MISTRALAI); headers = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) { api_url = request.body.custom_url; api_key_openai = readSecret(request.user.directories, SECRET_KEYS.CUSTOM); headers = {}; mergeObjectWithYaml(headers, request.body.custom_include_headers); } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.COHERE) { api_url = API_COHERE_V1; api_key_openai = readSecret(request.user.directories, SECRET_KEYS.COHERE); headers = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.ZEROONEAI) { api_url = API_01AI; api_key_openai = readSecret(request.user.directories, SECRET_KEYS.ZEROONEAI); headers = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BLOCKENTROPY) { api_url = API_BLOCKENTROPY; api_key_openai = readSecret(request.user.directories, SECRET_KEYS.BLOCKENTROPY); headers = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.NANOGPT) { api_url = API_NANOGPT; api_key_openai = readSecret(request.user.directories, SECRET_KEYS.NANOGPT); headers = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.DEEPSEEK) { api_url = new URL(request.body.reverse_proxy || API_DEEPSEEK.replace('/beta', '')); api_key_openai = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.DEEPSEEK); headers = {}; } else { console.log('This chat completion source is not supported yet.'); return response_getstatus_openai.status(400).send({ error: true }); } if (!api_key_openai && !request.body.reverse_proxy && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.CUSTOM) { console.log('Chat Completion API key is missing.'); return response_getstatus_openai.status(400).send({ error: true }); } try { const response = await fetch(api_url + '/models', { method: 'GET', headers: { 'Authorization': 'Bearer ' + api_key_openai, ...headers, }, }); if (response.ok) { /** @type {any} */ const data = await response.json(); response_getstatus_openai.send(data); if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.COHERE && Array.isArray(data?.models)) { data.data = data.models.map(model => ({ id: model.name, ...model })); } if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER && Array.isArray(data?.data)) { let models = []; data.data.forEach(model => { const context_length = model.context_length; const tokens_dollar = Number(1 / (1000 * model.pricing?.prompt)); const tokens_rounded = (Math.round(tokens_dollar * 1000) / 1000).toFixed(0); models[model.id] = { tokens_per_dollar: tokens_rounded + 'k', context_length: context_length, }; }); console.log('Available OpenRouter models:', models); } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.MISTRALAI) { const models = data?.data; console.log(models); } else { const models = data?.data; if (Array.isArray(models)) { const modelIds = models.filter(x => x && typeof x === 'object').map(x => x.id).sort(); console.log('Available models:', modelIds); } else { console.log('Chat Completion endpoint did not return a list of models.'); } } } else { console.log('Chat Completion status check failed. Either Access Token is incorrect or API endpoint is down.'); response_getstatus_openai.send({ error: true, can_bypass: true, data: { data: [] } }); } } catch (e) { console.error(e); if (!response_getstatus_openai.headersSent) { response_getstatus_openai.send({ error: true }); } else { response_getstatus_openai.end(); } } }); router.post('/bias', jsonParser, async function (request, response) { if (!request.body || !Array.isArray(request.body)) return response.sendStatus(400); try { const result = {}; const model = getTokenizerModel(String(request.query.model || '')); // no bias for claude if (model == 'claude') { return response.send(result); } let encodeFunction; if (sentencepieceTokenizers.includes(model)) { const tokenizer = getSentencepiceTokenizer(model); const instance = await tokenizer?.get(); if (!instance) { console.warn('Tokenizer not initialized:', model); return response.send({}); } encodeFunction = (text) => new Uint32Array(instance.encodeIds(text)); } else { const tokenizer = getTiktokenTokenizer(model); encodeFunction = (tokenizer.encode.bind(tokenizer)); } for (const entry of request.body) { if (!entry || !entry.text) { continue; } try { const tokens = getEntryTokens(entry.text, encodeFunction); for (const token of tokens) { result[token] = entry.value; } } catch { console.warn('Tokenizer failed to encode:', entry.text); } } // not needed for cached tokenizers //tokenizer.free(); return response.send(result); /** * Gets tokenids for a given entry * @param {string} text Entry text * @param {(string) => Uint32Array} encode Function to encode text to token ids * @returns {Uint32Array} Array of token ids */ function getEntryTokens(text, encode) { // Get raw token ids from JSON array if (text.trim().startsWith('[') && text.trim().endsWith(']')) { try { const json = JSON.parse(text); if (Array.isArray(json) && json.every(x => typeof x === 'number')) { return new Uint32Array(json); } } catch { // ignore } } // Otherwise, get token ids from tokenizer return encode(text); } } catch (error) { console.error(error); return response.send({}); } }); router.post('/generate', jsonParser, function (request, response) { if (!request.body) return response.status(400).send({ error: true }); switch (request.body.chat_completion_source) { case CHAT_COMPLETION_SOURCES.CLAUDE: return sendClaudeRequest(request, response); case CHAT_COMPLETION_SOURCES.SCALE: return sendScaleRequest(request, response); case CHAT_COMPLETION_SOURCES.AI21: return sendAI21Request(request, response); case CHAT_COMPLETION_SOURCES.MAKERSUITE: return sendMakerSuiteRequest(request, response); case CHAT_COMPLETION_SOURCES.MISTRALAI: return sendMistralAIRequest(request, response); case CHAT_COMPLETION_SOURCES.COHERE: return sendCohereRequest(request, response); case CHAT_COMPLETION_SOURCES.DEEPSEEK: return sendDeepSeekRequest(request, response); } let apiUrl; let apiKey; let headers; let bodyParams; const isTextCompletion = Boolean(request.body.model && TEXT_COMPLETION_MODELS.includes(request.body.model)) || typeof request.body.messages === 'string'; if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENAI) { apiUrl = new URL(request.body.reverse_proxy || API_OPENAI).toString(); apiKey = request.body.reverse_proxy ? request.body.proxy_password : readSecret(request.user.directories, SECRET_KEYS.OPENAI); headers = {}; bodyParams = { logprobs: request.body.logprobs, top_logprobs: undefined, }; // Adjust logprobs params for Chat Completions API, which expects { top_logprobs: number; logprobs: boolean; } if (!isTextCompletion && bodyParams.logprobs > 0) { bodyParams.top_logprobs = bodyParams.logprobs; bodyParams.logprobs = true; } if (getConfigValue('openai.randomizeUserId', false)) { bodyParams['user'] = uuidv4(); } } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.OPENROUTER) { apiUrl = 'https://openrouter.ai/api/v1'; apiKey = readSecret(request.user.directories, SECRET_KEYS.OPENROUTER); // OpenRouter needs to pass the Referer and X-Title: https://openrouter.ai/docs#requests headers = { ...OPENROUTER_HEADERS }; bodyParams = { 'transforms': getOpenRouterTransforms(request), }; if (request.body.min_p !== undefined) { bodyParams['min_p'] = request.body.min_p; } if (request.body.top_a !== undefined) { bodyParams['top_a'] = request.body.top_a; } if (request.body.repetition_penalty !== undefined) { bodyParams['repetition_penalty'] = request.body.repetition_penalty; } if (Array.isArray(request.body.provider) && request.body.provider.length > 0) { bodyParams['provider'] = { allow_fallbacks: request.body.allow_fallbacks ?? true, order: request.body.provider ?? [], }; } if (request.body.use_fallback) { bodyParams['route'] = 'fallback'; } let cachingAtDepth = getConfigValue('claude.cachingAtDepth', -1); if (Number.isInteger(cachingAtDepth) && cachingAtDepth >= 0 && request.body.model?.startsWith('anthropic/claude-3')) { cachingAtDepthForOpenRouterClaude(request.body.messages, cachingAtDepth); } } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) { apiUrl = request.body.custom_url; apiKey = readSecret(request.user.directories, SECRET_KEYS.CUSTOM); headers = {}; bodyParams = { logprobs: request.body.logprobs, top_logprobs: undefined, }; // Adjust logprobs params for Chat Completions API, which expects { top_logprobs: number; logprobs: boolean; } if (!isTextCompletion && bodyParams.logprobs > 0) { bodyParams.top_logprobs = bodyParams.logprobs; bodyParams.logprobs = true; } mergeObjectWithYaml(bodyParams, request.body.custom_include_body); mergeObjectWithYaml(headers, request.body.custom_include_headers); if (request.body.custom_prompt_post_processing) { console.log('Applying custom prompt post-processing of type', request.body.custom_prompt_post_processing); request.body.messages = postProcessPrompt( request.body.messages, request.body.custom_prompt_post_processing, getPromptNames(request)); } } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.PERPLEXITY) { apiUrl = API_PERPLEXITY; apiKey = readSecret(request.user.directories, SECRET_KEYS.PERPLEXITY); headers = {}; bodyParams = {}; request.body.messages = postProcessPrompt(request.body.messages, 'strict', getPromptNames(request)); } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.GROQ) { apiUrl = API_GROQ; apiKey = readSecret(request.user.directories, SECRET_KEYS.GROQ); headers = {}; bodyParams = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.NANOGPT) { apiUrl = API_NANOGPT; apiKey = readSecret(request.user.directories, SECRET_KEYS.NANOGPT); headers = {}; bodyParams = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.ZEROONEAI) { apiUrl = API_01AI; apiKey = readSecret(request.user.directories, SECRET_KEYS.ZEROONEAI); headers = {}; bodyParams = {}; } else if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.BLOCKENTROPY) { apiUrl = API_BLOCKENTROPY; apiKey = readSecret(request.user.directories, SECRET_KEYS.BLOCKENTROPY); headers = {}; bodyParams = {}; } else { console.log('This chat completion source is not supported yet.'); return response.status(400).send({ error: true }); } if (!apiKey && !request.body.reverse_proxy && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.CUSTOM) { console.log('OpenAI API key is missing.'); return response.status(400).send({ error: true }); } // Add custom stop sequences if (Array.isArray(request.body.stop) && request.body.stop.length > 0) { bodyParams['stop'] = request.body.stop; } const textPrompt = isTextCompletion ? convertTextCompletionPrompt(request.body.messages) : ''; const endpointUrl = isTextCompletion && request.body.chat_completion_source !== CHAT_COMPLETION_SOURCES.OPENROUTER ? `${apiUrl}/completions` : `${apiUrl}/chat/completions`; const controller = new AbortController(); request.socket.removeAllListeners('close'); request.socket.on('close', function () { controller.abort(); }); if (!isTextCompletion && Array.isArray(request.body.tools) && request.body.tools.length > 0) { bodyParams['tools'] = request.body.tools; bodyParams['tool_choice'] = request.body.tool_choice; } const requestBody = { 'messages': isTextCompletion === false ? request.body.messages : undefined, 'prompt': isTextCompletion === true ? textPrompt : undefined, 'model': request.body.model, 'temperature': request.body.temperature, 'max_tokens': request.body.max_tokens, 'max_completion_tokens': request.body.max_completion_tokens, 'stream': request.body.stream, 'presence_penalty': request.body.presence_penalty, 'frequency_penalty': request.body.frequency_penalty, 'top_p': request.body.top_p, 'top_k': request.body.top_k, 'stop': isTextCompletion === false ? request.body.stop : undefined, 'logit_bias': request.body.logit_bias, 'seed': request.body.seed, 'n': request.body.n, ...bodyParams, }; if (request.body.chat_completion_source === CHAT_COMPLETION_SOURCES.CUSTOM) { excludeKeysByYaml(requestBody, request.body.custom_exclude_body); } /** @type {import('node-fetch').RequestInit} */ const config = { method: 'post', headers: { 'Content-Type': 'application/json', 'Authorization': 'Bearer ' + apiKey, ...headers, }, body: JSON.stringify(requestBody), signal: controller.signal, }; console.log(requestBody); makeRequest(config, response, request); /** * Makes a fetch request to the OpenAI API endpoint. * @param {import('node-fetch').RequestInit} config Fetch config * @param {express.Response} response Express response * @param {express.Request} request Express request * @param {Number} retries Number of retries left * @param {Number} timeout Request timeout in ms */ async function makeRequest(config, response, request, retries = 5, timeout = 5000) { try { const fetchResponse = await fetch(endpointUrl, config); if (request.body.stream) { console.log('Streaming request in progress'); forwardFetchResponse(fetchResponse, response); return; } if (fetchResponse.ok) { /** @type {any} */ let json = await fetchResponse.json(); response.send(json); console.log(json); console.log(json?.choices?.[0]?.message); } else if (fetchResponse.status === 429 && retries > 0) { console.log(`Out of quota, retrying in ${Math.round(timeout / 1000)}s`); setTimeout(() => { timeout *= 2; makeRequest(config, response, request, retries - 1, timeout); }, timeout); } else { await handleErrorResponse(fetchResponse); } } catch (error) { console.log('Generation failed', error); const message = error.code === 'ECONNREFUSED' ? `Connection refused: ${error.message}` : error.message || 'Unknown error occurred'; if (!response.headersSent) { response.status(502).send({ error: { message, ...error } }); } else { response.end(); } } } /** * @param {import("node-fetch").Response} errorResponse */ async function handleErrorResponse(errorResponse) { const responseText = await errorResponse.text(); const errorData = tryParse(responseText); const message = errorResponse.statusText || 'Unknown error occurred'; const quota_error = errorResponse.status === 429 && errorData?.error?.type === 'insufficient_quota'; console.log('Chat completion request error: ', message, responseText); if (!response.headersSent) { response.send({ error: { message }, quota_error: quota_error }); } else if (!response.writableEnded) { response.write(errorResponse); } else { response.end(); } } });