Initial vLLM support

This commit is contained in:
sasha0552
2024-05-02 22:40:40 +00:00
committed by GitHub
parent 1a219e32fe
commit 2bd239fe81
15 changed files with 231 additions and 38 deletions

View File

@@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) {
return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders;
}
/**
* Gets the headers for the vLLM API.
* @param {import('./users').UserDirectoryList} directories User directories
* @returns {object} Headers for the request
*/
function getVllmHeaders(directories) {
const apiKey = readSecret(directories, SECRET_KEYS.VLLM);
return apiKey ? ({
'Authorization': `Bearer ${apiKey}`,
}) : {};
}
/**
* Gets the headers for the Aphrodite API.
* @param {import('./users').UserDirectoryList} directories User directories
@@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) {
function setAdditionalHeaders(request, args, server) {
const headerGetters = {
[TEXTGEN_TYPES.MANCER]: getMancerHeaders,
[TEXTGEN_TYPES.VLLM]: getVllmHeaders,
[TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders,
[TEXTGEN_TYPES.TABBY]: getTabbyHeaders,
[TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders,

View File

@@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads';
const TEXTGEN_TYPES = {
OOBA: 'ooba',
MANCER: 'mancer',
VLLM: 'vllm',
APHRODITE: 'aphrodite',
TABBY: 'tabby',
KOBOLDCPP: 'koboldcpp',
@@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [
'stop',
];
// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220
const VLLM_KEYS = [
'model',
'prompt',
'best_of',
'echo',
'frequency_penalty',
'logit_bias',
'logprobs',
'max_tokens',
'n',
'presence_penalty',
'seed',
'stop',
'stream',
'suffix',
'temperature',
'top_p',
'user',
'use_beam_search',
'top_k',
'min_p',
'repetition_penalty',
'length_penalty',
'early_stopping',
'stop_token_ids',
'ignore_eos',
'min_tokens',
'skip_special_tokens',
'spaces_between_special_tokens',
'truncate_prompt_tokens',
'include_stop_str_in_output',
'response_format',
'guided_json',
'guided_regex',
'guided_choice',
'guided_grammar',
'guided_decoding_backend',
'guided_whitespace_pattern',
];
module.exports = {
DEFAULT_USER,
DEFAULT_AVATAR,
@@ -318,4 +362,5 @@ module.exports = {
DREAMGEN_KEYS,
OPENROUTER_HEADERS,
OPENROUTER_KEYS,
VLLM_KEYS,
};

View File

@@ -4,7 +4,7 @@ const _ = require('lodash');
const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common');
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants');
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants');
const { forwardFetchResponse, trimV1 } = require('../../util');
const { setAdditionalHeaders } = require('../../additional-headers');
@@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) {
} else {
switch (request.body.api_type) {
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.KOBOLDCPP:
case TEXTGEN_TYPES.LLAMACPP:
@@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) {
url += '/v1/generate';
} else {
switch (request.body.api_type) {
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.TABBY:
@@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) {
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
args.body = JSON.stringify({
model: request.body.model,

View File

@@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json';
const SECRET_KEYS = {
HORDE: 'api_key_horde',
MANCER: 'api_key_mancer',
VLLM: 'api_key_vllm',
APHRODITE: 'api_key_aphrodite',
TABBY: 'api_key_tabby',
OPENAI: 'api_key_openai',

View File

@@ -720,6 +720,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
url += '/tokenize';
args.body = JSON.stringify({ 'content': text });
break;
case TEXTGEN_TYPES.VLLM:
return response.send({ error: true });
case TEXTGEN_TYPES.APHRODITE:
url += '/v1/tokenize';
args.body = JSON.stringify({ 'prompt': text });