From 2bd239fe81386ed4891578bfb883361c59c71ba4 Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Thu, 2 May 2024 22:40:40 +0000 Subject: [PATCH] Initial vLLM support --- .../presets/textgen/Universal-Creative.json | 4 +- .../presets/textgen/Universal-Light.json | 4 +- .../textgen/Universal-Super-Creative.json | 4 +- public/index.html | 45 ++++++++++-- public/script.js | 16 ++++- public/scripts/preset-manager.js | 1 + public/scripts/secrets.js | 2 + public/scripts/slash-commands.js | 1 + public/scripts/textgen-models.js | 51 ++++++++++++++ public/scripts/textgen-settings.js | 70 +++++++++++++------ src/additional-headers.js | 14 ++++ src/constants.js | 45 ++++++++++++ src/endpoints/backends/text-completions.js | 9 ++- src/endpoints/secrets.js | 1 + src/endpoints/tokenizers.js | 2 + 15 files changed, 231 insertions(+), 38 deletions(-) diff --git a/default/content/presets/textgen/Universal-Creative.json b/default/content/presets/textgen/Universal-Creative.json index 76cd957e9..7784fe226 100644 --- a/default/content/presets/textgen/Universal-Creative.json +++ b/default/content/presets/textgen/Universal-Creative.json @@ -33,8 +33,8 @@ "negative_prompt": "", "grammar_string": "", "banned_tokens": "", - "ignore_eos_token_aphrodite": false, - "spaces_between_special_tokens_aphrodite": true, + "ignore_eos_token": false, + "spaces_between_special_tokens": true, "type": "ooba", "legacy_api": false, "sampler_order": [ diff --git a/default/content/presets/textgen/Universal-Light.json b/default/content/presets/textgen/Universal-Light.json index 083f908e7..d855094d1 100644 --- a/default/content/presets/textgen/Universal-Light.json +++ b/default/content/presets/textgen/Universal-Light.json @@ -33,8 +33,8 @@ "negative_prompt": "", "grammar_string": "", "banned_tokens": "", - "ignore_eos_token_aphrodite": false, - "spaces_between_special_tokens_aphrodite": true, + "ignore_eos_token": false, + "spaces_between_special_tokens": true, "type": "ooba", "legacy_api": false, "sampler_order": [ diff --git a/default/content/presets/textgen/Universal-Super-Creative.json b/default/content/presets/textgen/Universal-Super-Creative.json index d052806d8..60df29700 100644 --- a/default/content/presets/textgen/Universal-Super-Creative.json +++ b/default/content/presets/textgen/Universal-Super-Creative.json @@ -33,8 +33,8 @@ "negative_prompt": "", "grammar_string": "", "banned_tokens": "", - "ignore_eos_token_aphrodite": false, - "spaces_between_special_tokens_aphrodite": true, + "ignore_eos_token": false, + "spaces_between_special_tokens": true, "type": "ooba", "legacy_api": false, "sampler_order": [ diff --git a/public/index.html b/public/index.html index ed0047ecc..48bd6ec54 100644 --- a/public/index.html +++ b/public/index.html @@ -1125,7 +1125,7 @@
-
+
Multiple swipes per generation
@@ -1399,8 +1399,8 @@
- -
@@ -1948,7 +1948,8 @@

API Type

+
+ +

vLLM API key

+
+ + +
+
+ For privacy reasons, your API key will be hidden after you reload the page. +
+
+

API URL

+ Example: http://127.0.0.1:8000 + +
+
+

vLLM Model

+ +
+
+
- +
diff --git a/public/script.js b/public/script.js index 14079e155..1d33d09b3 100644 --- a/public/script.js +++ b/public/script.js @@ -22,7 +22,7 @@ import { parseTabbyLogprobs, } from './scripts/textgen-settings.js'; -const { MANCER, TOGETHERAI, OOBA, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types; +const { MANCER, TOGETHERAI, OOBA, VLLM, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types; import { world_info, @@ -218,7 +218,7 @@ import { import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_settings } from './scripts/backgrounds.js'; import { hideLoader, showLoader } from './scripts/loader.js'; import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js'; -import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js'; +import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js'; import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId } from './scripts/chats.js'; import { initPresetManager } from './scripts/preset-manager.js'; import { evaluateMacros } from './scripts/macros.js'; @@ -1071,6 +1071,9 @@ async function getStatusTextgen() { } else if (textgen_settings.type === OPENROUTER) { loadOpenRouterModels(data?.data); online_status = textgen_settings.openrouter_model; + } else if (textgen_settings.type === VLLM) { + loadVllmModels(data?.data); + online_status = textgen_settings.vllm_model; } else if (textgen_settings.type === APHRODITE) { loadAphroditeModels(data?.data); online_status = textgen_settings.aphrodite_model; @@ -4832,6 +4835,7 @@ function parseAndSaveLogprobs(data, continueFrom) { case textgen_types.LLAMACPP: { logprobs = data?.completion_probabilities?.map(x => parseTextgenLogprobs(x.content, [x])) || null; } break; + case textgen_types.VLLM: case textgen_types.APHRODITE: case textgen_types.MANCER: case textgen_types.TABBY: { @@ -4888,7 +4892,7 @@ function extractMultiSwipes(data, type) { return swipes; } - if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, APHRODITE].includes(textgen_settings.type))) { + if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, VLLM, APHRODITE].includes(textgen_settings.type))) { if (!Array.isArray(data.choices)) { return swipes; } @@ -7922,6 +7926,11 @@ const CONNECT_API_MAP = { button: '#api_button_textgenerationwebui', type: textgen_types.MANCER, }, + 'vllm': { + selected: 'textgenerationwebui', + button: '#api_button_textgenerationwebui', + type: textgen_types.VLLM, + }, 'aphrodite': { selected: 'textgenerationwebui', button: '#api_button_textgenerationwebui', @@ -8896,6 +8905,7 @@ jQuery(async function () { $('#api_button_textgenerationwebui').on('click', async function (e) { const keys = [ { id: 'api_key_mancer', secret: SECRET_KEYS.MANCER }, + { id: 'api_key_vllm', secret: SECRET_KEYS.VLLM }, { id: 'api_key_aphrodite', secret: SECRET_KEYS.APHRODITE }, { id: 'api_key_tabby', secret: SECRET_KEYS.TABBY }, { id: 'api_key_togetherai', secret: SECRET_KEYS.TOGETHERAI }, diff --git a/public/scripts/preset-manager.js b/public/scripts/preset-manager.js index 1a28f075c..889c254cb 100644 --- a/public/scripts/preset-manager.js +++ b/public/scripts/preset-manager.js @@ -309,6 +309,7 @@ class PresetManager { 'mancer_model', 'togetherai_model', 'ollama_model', + 'vllm_model', 'aphrodite_model', 'server_urls', 'type', diff --git a/public/scripts/secrets.js b/public/scripts/secrets.js index 00c322353..83c994d31 100644 --- a/public/scripts/secrets.js +++ b/public/scripts/secrets.js @@ -3,6 +3,7 @@ import { callPopup, getRequestHeaders } from '../script.js'; export const SECRET_KEYS = { HORDE: 'api_key_horde', MANCER: 'api_key_mancer', + VLLM: 'api_key_vllm', APHRODITE: 'api_key_aphrodite', TABBY: 'api_key_tabby', OPENAI: 'api_key_openai', @@ -38,6 +39,7 @@ const INPUT_MAP = { [SECRET_KEYS.AI21]: '#api_key_ai21', [SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie', [SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite', + [SECRET_KEYS.VLLM]: '#api_key_vllm', [SECRET_KEYS.APHRODITE]: '#api_key_aphrodite', [SECRET_KEYS.TABBY]: '#api_key_tabby', [SECRET_KEYS.MISTRALAI]: '#api_key_mistralai', diff --git a/public/scripts/slash-commands.js b/public/scripts/slash-commands.js index 0e49b2757..ddda52d7d 100644 --- a/public/scripts/slash-commands.js +++ b/public/scripts/slash-commands.js @@ -1665,6 +1665,7 @@ function modelCallback(_, model) { { id: 'model_infermaticai_select', api: 'textgenerationwebui', type: textgen_types.INFERMATICAI }, { id: 'model_dreamgen_select', api: 'textgenerationwebui', type: textgen_types.DREAMGEN }, { id: 'mancer_model', api: 'textgenerationwebui', type: textgen_types.MANCER }, + { id: 'vllm_model', api: 'textgenerationwebui', type: textgen_types.VLLM }, { id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE }, { id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA }, { id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI }, diff --git a/public/scripts/textgen-models.js b/public/scripts/textgen-models.js index c96ad4ce4..28ce1e21e 100644 --- a/public/scripts/textgen-models.js +++ b/public/scripts/textgen-models.js @@ -7,6 +7,7 @@ let mancerModels = []; let togetherModels = []; let infermaticAIModels = []; let dreamGenModels = []; +let vllmModels = []; let aphroditeModels = []; export let openRouterModels = []; @@ -156,6 +157,28 @@ export async function loadOpenRouterModels(data) { calculateOpenRouterCost(); } +export async function loadVllmModels(data) { + if (!Array.isArray(data)) { + console.error('Invalid vLLM models data', data); + return; + } + + vllmModels = data; + + if (!data.find(x => x.id === textgen_settings.vllm_model)) { + textgen_settings.vllm_model = data[0]?.id || ''; + } + + $('#vllm_model').empty(); + for (const model of data) { + const option = document.createElement('option'); + option.value = model.id; + option.text = model.id; + option.selected = model.id === textgen_settings.vllm_model; + $('#vllm_model').append(option); + } +} + export async function loadAphroditeModels(data) { if (!Array.isArray(data)) { console.error('Invalid Aphrodite models data', data); @@ -224,6 +247,12 @@ function onOpenRouterModelSelect() { setGenerationParamsFromPreset({ max_length: model.context_length }); } +function onVllmModelSelect() { + const modelId = String($('#vllm_model').val()); + textgen_settings.vllm_model = modelId; + $('#api_button_textgenerationwebui').trigger('click'); +} + function onAphroditeModelSelect() { const modelId = String($('#aphrodite_model').val()); textgen_settings.aphrodite_model = modelId; @@ -310,6 +339,20 @@ function getOpenRouterModelTemplate(option) { `)); } +function getVllmModelTemplate(option) { + const model = vllmModels.find(x => x.id === option?.element?.value); + + if (!option.id || !model) { + return option.text; + } + + return $((` +
+
${DOMPurify.sanitize(model.id)}
+
+ `)); +} + function getAphroditeModelTemplate(option) { const model = aphroditeModels.find(x => x.id === option?.element?.value); @@ -426,6 +469,7 @@ jQuery(function () { $('#ollama_model').on('change', onOllamaModelSelect); $('#openrouter_model').on('change', onOpenRouterModelSelect); $('#ollama_download_model').on('click', downloadOllamaModel); + $('#vllm_model').on('change', onVllmModelSelect); $('#aphrodite_model').on('change', onAphroditeModelSelect); if (!isMobile()) { @@ -470,6 +514,13 @@ jQuery(function () { width: '100%', templateResult: getOpenRouterModelTemplate, }); + $('#vllm_model').select2({ + placeholder: 'Select a model', + searchInputPlaceholder: 'Search models...', + searchInputCssClass: 'text_pole', + width: '100%', + templateResult: getVllmModelTemplate, + }); $('#aphrodite_model').select2({ placeholder: 'Select a model', searchInputPlaceholder: 'Search models...', diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js index a3b40970f..7058f7e35 100644 --- a/public/scripts/textgen-settings.js +++ b/public/scripts/textgen-settings.js @@ -28,6 +28,7 @@ export { export const textgen_types = { OOBA: 'ooba', MANCER: 'mancer', + VLLM: 'vllm', APHRODITE: 'aphrodite', TABBY: 'tabby', KOBOLDCPP: 'koboldcpp', @@ -39,7 +40,7 @@ export const textgen_types = { OPENROUTER: 'openrouter', }; -const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types; +const { MANCER, VLLM, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types; const LLAMACPP_DEFAULT_ORDER = [ 'top_k', @@ -77,6 +78,7 @@ let OPENROUTER_SERVER = 'https://openrouter.ai/api'; const SERVER_INPUTS = { [textgen_types.OOBA]: '#textgenerationwebui_api_url_text', + [textgen_types.VLLM]: '#vllm_api_url_text', [textgen_types.APHRODITE]: '#aphrodite_api_url_text', [textgen_types.TABBY]: '#tabby_api_url_text', [textgen_types.KOBOLDCPP]: '#koboldcpp_api_url_text', @@ -135,8 +137,8 @@ const settings = { samplers: LLAMACPP_DEFAULT_ORDER, //n_aphrodite: 1, //best_of_aphrodite: 1, - ignore_eos_token_aphrodite: false, - spaces_between_special_tokens_aphrodite: true, + ignore_eos_token: false, + spaces_between_special_tokens: true, //logits_processors_aphrodite: [], //log_probs_aphrodite: 0, //prompt_log_probs_aphrodite: 0, @@ -146,6 +148,7 @@ const settings = { infermaticai_model: '', ollama_model: '', openrouter_model: 'openrouter/auto', + vllm_model: '', aphrodite_model: '', dreamgen_model: 'opus-v1-xl/text', legacy_api: false, @@ -208,8 +211,8 @@ const setting_names = [ 'legacy_api', //'n_aphrodite', //'best_of_aphrodite', - 'ignore_eos_token_aphrodite', - 'spaces_between_special_tokens_aphrodite', + 'ignore_eos_token', + 'spaces_between_special_tokens', //'logits_processors_aphrodite', //'log_probs_aphrodite', //'prompt_log_probs_aphrodite' @@ -587,15 +590,22 @@ jQuery(function () { const type = String($(this).val()); settings.type = type; - if (settings.type === textgen_types.APHRODITE) { - //this is needed because showTypeSpecificControls() does not handle NOT declarations - $('[data-forAphro="False"]').each(function () { - $(this).hide(); - }); + if (settings.type === textgen_types.VLLM || settings.type === textgen_types.APHRODITE) { + if (settings.type === textgen_types.APHRODITE) { + //this is needed because showTypeSpecificControls() does not handle NOT declarations + $('[data-forAphro="False"]').each(function () { + $(this).hide(); + }); + } else { + //this is needed because showTypeSpecificControls() does not handle NOT declarations + $('[data-forAphro="False"]').each(function () { + $(this).show(); + }); + } $('#mirostat_mode_textgenerationwebui').attr('step', 2); //Aphro disallows mode 1 $('#do_sample_textgenerationwebui').prop('checked', true); //Aphro should always do sample; 'otherwise set temp to 0 to mimic no sample' $('#ban_eos_token_textgenerationwebui').prop('checked', false); //Aphro should not ban EOS, just ignore it; 'add token '2' to ban list do to this' - //special handling for Aphrodite topK -1 disable state + //special handling for vLLM/Aphrodite topK -1 disable state $('#top_k_textgenerationwebui').attr('min', -1); if ($('#top_k_textgenerationwebui').val() === '0' || settings['top_k'] === 0) { settings['top_k'] = -1; @@ -607,7 +617,7 @@ jQuery(function () { $(this).show(); }); $('#mirostat_mode_textgenerationwebui').attr('step', 1); - //undo special Aphrodite setup for topK + //undo special vLLM/Aphrodite setup for topK $('#top_k_textgenerationwebui').attr('min', 0); if ($('#top_k_textgenerationwebui').val() === '-1' || settings['top_k'] === -1) { settings['top_k'] = 0; @@ -711,9 +721,10 @@ jQuery(function () { const value = Number($(this).val()); $(`#${id}_counter_textgenerationwebui`).val(value); settings[id] = value; - //special handling for aphrodite using -1 as disabled instead of 0 + //special handling for vLLM/Aphrodite using -1 as disabled instead of 0 if ($(this).attr('id') === 'top_k_textgenerationwebui' && - settings.type === textgen_types.APHRODITE && + (settings.type === textgen_types.VLLM || + settings.type === textgen_types.APHRODITE) && value === 0) { settings[id] = -1; $(this).val(-1); @@ -869,6 +880,7 @@ export function parseTextgenLogprobs(token, logprobs) { switch (settings.type) { case TABBY: + case VLLM: case APHRODITE: case MANCER: case OOBA: { @@ -964,6 +976,8 @@ function getModel() { return settings.dreamgen_model; case OPENROUTER: return settings.openrouter_model; + case VLLM: + return settings.vllm_model; case APHRODITE: return settings.aphrodite_model; case OLLAMA: @@ -1061,11 +1075,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, 'ignore_eos': settings.ban_eos_token, 'n_probs': power_user.request_token_probabilities ? 10 : undefined, }; + const vllmParams = { + 'n': canMultiSwipe ? settings.n : 1, + 'best_of': canMultiSwipe ? settings.n : 1, + 'ignore_eos': settings.ignore_eos_token, + 'spaces_between_special_tokens': settings.spaces_between_special_tokens, + 'seed': settings.seed, + }; const aphroditeParams = { 'n': canMultiSwipe ? settings.n : 1, 'best_of': canMultiSwipe ? settings.n : 1, - 'ignore_eos': settings.ignore_eos_token_aphrodite, - 'spaces_between_special_tokens': settings.spaces_between_special_tokens_aphrodite, + 'ignore_eos': settings.ignore_eos_token, + 'spaces_between_special_tokens': settings.spaces_between_special_tokens, 'grammar': settings.grammar_string, //'logits_processors': settings.logits_processors_aphrodite, //'logprobs': settings.log_probs_aphrodite, @@ -1087,10 +1108,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, delete params.dynatemp_high; } - if (settings.type === APHRODITE) { - params = Object.assign(params, aphroditeParams); - } else { - params = Object.assign(params, nonAphroditeParams); + switch (settings.type) { + case VLLM: + params = Object.assign(params, vllmParams); + break; + + case APHRODITE: + params = Object.assign(params, aphroditeParams); + break; + + default: + params = Object.assign(params, nonAphroditeParams); + break; } if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) { @@ -1119,4 +1148,3 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate, return params; } - diff --git a/src/additional-headers.js b/src/additional-headers.js index aa151011e..148e81e8c 100644 --- a/src/additional-headers.js +++ b/src/additional-headers.js @@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) { return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders; } +/** + * Gets the headers for the vLLM API. + * @param {import('./users').UserDirectoryList} directories User directories + * @returns {object} Headers for the request + */ +function getVllmHeaders(directories) { + const apiKey = readSecret(directories, SECRET_KEYS.VLLM); + + return apiKey ? ({ + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + /** * Gets the headers for the Aphrodite API. * @param {import('./users').UserDirectoryList} directories User directories @@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) { function setAdditionalHeaders(request, args, server) { const headerGetters = { [TEXTGEN_TYPES.MANCER]: getMancerHeaders, + [TEXTGEN_TYPES.VLLM]: getVllmHeaders, [TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders, [TEXTGEN_TYPES.TABBY]: getTabbyHeaders, [TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders, diff --git a/src/constants.js b/src/constants.js index e77649c19..5d80dd03b 100644 --- a/src/constants.js +++ b/src/constants.js @@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads'; const TEXTGEN_TYPES = { OOBA: 'ooba', MANCER: 'mancer', + VLLM: 'vllm', APHRODITE: 'aphrodite', TABBY: 'tabby', KOBOLDCPP: 'koboldcpp', @@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [ 'stop', ]; +// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220 +const VLLM_KEYS = [ + 'model', + 'prompt', + 'best_of', + 'echo', + 'frequency_penalty', + 'logit_bias', + 'logprobs', + 'max_tokens', + 'n', + 'presence_penalty', + 'seed', + 'stop', + 'stream', + 'suffix', + 'temperature', + 'top_p', + 'user', + + 'use_beam_search', + 'top_k', + 'min_p', + 'repetition_penalty', + 'length_penalty', + 'early_stopping', + 'stop_token_ids', + 'ignore_eos', + 'min_tokens', + 'skip_special_tokens', + 'spaces_between_special_tokens', + 'truncate_prompt_tokens', + + 'include_stop_str_in_output', + 'response_format', + 'guided_json', + 'guided_regex', + 'guided_choice', + 'guided_grammar', + 'guided_decoding_backend', + 'guided_whitespace_pattern', +]; + module.exports = { DEFAULT_USER, DEFAULT_AVATAR, @@ -318,4 +362,5 @@ module.exports = { DREAMGEN_KEYS, OPENROUTER_HEADERS, OPENROUTER_KEYS, + VLLM_KEYS, }; diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js index 0e9598827..3cb15a748 100644 --- a/src/endpoints/backends/text-completions.js +++ b/src/endpoints/backends/text-completions.js @@ -4,7 +4,7 @@ const _ = require('lodash'); const Readable = require('stream').Readable; const { jsonParser } = require('../../express-common'); -const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants'); +const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants'); const { forwardFetchResponse, trimV1 } = require('../../util'); const { setAdditionalHeaders } = require('../../additional-headers'); @@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) { } else { switch (request.body.api_type) { case TEXTGEN_TYPES.OOBA: + case TEXTGEN_TYPES.VLLM: case TEXTGEN_TYPES.APHRODITE: case TEXTGEN_TYPES.KOBOLDCPP: case TEXTGEN_TYPES.LLAMACPP: @@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) { url += '/v1/generate'; } else { switch (request.body.api_type) { + case TEXTGEN_TYPES.VLLM: case TEXTGEN_TYPES.APHRODITE: case TEXTGEN_TYPES.OOBA: case TEXTGEN_TYPES.TABBY: @@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) { args.body = JSON.stringify(request.body); } + if (request.body.api_type === TEXTGEN_TYPES.VLLM) { + request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key)); + args.body = JSON.stringify(request.body); + } + if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) { args.body = JSON.stringify({ model: request.body.model, diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js index 5d7560fa5..341658d72 100644 --- a/src/endpoints/secrets.js +++ b/src/endpoints/secrets.js @@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json'; const SECRET_KEYS = { HORDE: 'api_key_horde', MANCER: 'api_key_mancer', + VLLM: 'api_key_vllm', APHRODITE: 'api_key_aphrodite', TABBY: 'api_key_tabby', OPENAI: 'api_key_openai', diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 65e795294..9c49c8ae3 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -720,6 +720,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re url += '/tokenize'; args.body = JSON.stringify({ 'content': text }); break; + case TEXTGEN_TYPES.VLLM: + return response.send({ error: true }); case TEXTGEN_TYPES.APHRODITE: url += '/v1/tokenize'; args.body = JSON.stringify({ 'prompt': text });