mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Initial vLLM support
This commit is contained in:
@@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) {
|
||||
return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the headers for the vLLM API.
|
||||
* @param {import('./users').UserDirectoryList} directories User directories
|
||||
* @returns {object} Headers for the request
|
||||
*/
|
||||
function getVllmHeaders(directories) {
|
||||
const apiKey = readSecret(directories, SECRET_KEYS.VLLM);
|
||||
|
||||
return apiKey ? ({
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
}) : {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the headers for the Aphrodite API.
|
||||
* @param {import('./users').UserDirectoryList} directories User directories
|
||||
@@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) {
|
||||
function setAdditionalHeaders(request, args, server) {
|
||||
const headerGetters = {
|
||||
[TEXTGEN_TYPES.MANCER]: getMancerHeaders,
|
||||
[TEXTGEN_TYPES.VLLM]: getVllmHeaders,
|
||||
[TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders,
|
||||
[TEXTGEN_TYPES.TABBY]: getTabbyHeaders,
|
||||
[TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders,
|
||||
|
@@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads';
|
||||
const TEXTGEN_TYPES = {
|
||||
OOBA: 'ooba',
|
||||
MANCER: 'mancer',
|
||||
VLLM: 'vllm',
|
||||
APHRODITE: 'aphrodite',
|
||||
TABBY: 'tabby',
|
||||
KOBOLDCPP: 'koboldcpp',
|
||||
@@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [
|
||||
'stop',
|
||||
];
|
||||
|
||||
// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220
|
||||
const VLLM_KEYS = [
|
||||
'model',
|
||||
'prompt',
|
||||
'best_of',
|
||||
'echo',
|
||||
'frequency_penalty',
|
||||
'logit_bias',
|
||||
'logprobs',
|
||||
'max_tokens',
|
||||
'n',
|
||||
'presence_penalty',
|
||||
'seed',
|
||||
'stop',
|
||||
'stream',
|
||||
'suffix',
|
||||
'temperature',
|
||||
'top_p',
|
||||
'user',
|
||||
|
||||
'use_beam_search',
|
||||
'top_k',
|
||||
'min_p',
|
||||
'repetition_penalty',
|
||||
'length_penalty',
|
||||
'early_stopping',
|
||||
'stop_token_ids',
|
||||
'ignore_eos',
|
||||
'min_tokens',
|
||||
'skip_special_tokens',
|
||||
'spaces_between_special_tokens',
|
||||
'truncate_prompt_tokens',
|
||||
|
||||
'include_stop_str_in_output',
|
||||
'response_format',
|
||||
'guided_json',
|
||||
'guided_regex',
|
||||
'guided_choice',
|
||||
'guided_grammar',
|
||||
'guided_decoding_backend',
|
||||
'guided_whitespace_pattern',
|
||||
];
|
||||
|
||||
module.exports = {
|
||||
DEFAULT_USER,
|
||||
DEFAULT_AVATAR,
|
||||
@@ -318,4 +362,5 @@ module.exports = {
|
||||
DREAMGEN_KEYS,
|
||||
OPENROUTER_HEADERS,
|
||||
OPENROUTER_KEYS,
|
||||
VLLM_KEYS,
|
||||
};
|
||||
|
@@ -4,7 +4,7 @@ const _ = require('lodash');
|
||||
const Readable = require('stream').Readable;
|
||||
|
||||
const { jsonParser } = require('../../express-common');
|
||||
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants');
|
||||
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants');
|
||||
const { forwardFetchResponse, trimV1 } = require('../../util');
|
||||
const { setAdditionalHeaders } = require('../../additional-headers');
|
||||
|
||||
@@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) {
|
||||
} else {
|
||||
switch (request.body.api_type) {
|
||||
case TEXTGEN_TYPES.OOBA:
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
case TEXTGEN_TYPES.KOBOLDCPP:
|
||||
case TEXTGEN_TYPES.LLAMACPP:
|
||||
@@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) {
|
||||
url += '/v1/generate';
|
||||
} else {
|
||||
switch (request.body.api_type) {
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
case TEXTGEN_TYPES.OOBA:
|
||||
case TEXTGEN_TYPES.TABBY:
|
||||
@@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) {
|
||||
args.body = JSON.stringify(request.body);
|
||||
}
|
||||
|
||||
if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
|
||||
request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
|
||||
args.body = JSON.stringify(request.body);
|
||||
}
|
||||
|
||||
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
|
||||
args.body = JSON.stringify({
|
||||
model: request.body.model,
|
||||
|
@@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json';
|
||||
const SECRET_KEYS = {
|
||||
HORDE: 'api_key_horde',
|
||||
MANCER: 'api_key_mancer',
|
||||
VLLM: 'api_key_vllm',
|
||||
APHRODITE: 'api_key_aphrodite',
|
||||
TABBY: 'api_key_tabby',
|
||||
OPENAI: 'api_key_openai',
|
||||
|
@@ -720,6 +720,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
||||
url += '/tokenize';
|
||||
args.body = JSON.stringify({ 'content': text });
|
||||
break;
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
return response.send({ error: true });
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
url += '/v1/tokenize';
|
||||
args.body = JSON.stringify({ 'prompt': text });
|
||||
|
Reference in New Issue
Block a user