diff --git a/default/content/presets/textgen/Universal-Creative.json b/default/content/presets/textgen/Universal-Creative.json
index 76cd957e9..7784fe226 100644
--- a/default/content/presets/textgen/Universal-Creative.json
+++ b/default/content/presets/textgen/Universal-Creative.json
@@ -33,8 +33,8 @@
"negative_prompt": "",
"grammar_string": "",
"banned_tokens": "",
- "ignore_eos_token_aphrodite": false,
- "spaces_between_special_tokens_aphrodite": true,
+ "ignore_eos_token": false,
+ "spaces_between_special_tokens": true,
"type": "ooba",
"legacy_api": false,
"sampler_order": [
diff --git a/default/content/presets/textgen/Universal-Light.json b/default/content/presets/textgen/Universal-Light.json
index 083f908e7..d855094d1 100644
--- a/default/content/presets/textgen/Universal-Light.json
+++ b/default/content/presets/textgen/Universal-Light.json
@@ -33,8 +33,8 @@
"negative_prompt": "",
"grammar_string": "",
"banned_tokens": "",
- "ignore_eos_token_aphrodite": false,
- "spaces_between_special_tokens_aphrodite": true,
+ "ignore_eos_token": false,
+ "spaces_between_special_tokens": true,
"type": "ooba",
"legacy_api": false,
"sampler_order": [
diff --git a/default/content/presets/textgen/Universal-Super-Creative.json b/default/content/presets/textgen/Universal-Super-Creative.json
index d052806d8..60df29700 100644
--- a/default/content/presets/textgen/Universal-Super-Creative.json
+++ b/default/content/presets/textgen/Universal-Super-Creative.json
@@ -33,8 +33,8 @@
"negative_prompt": "",
"grammar_string": "",
"banned_tokens": "",
- "ignore_eos_token_aphrodite": false,
- "spaces_between_special_tokens_aphrodite": true,
+ "ignore_eos_token": false,
+ "spaces_between_special_tokens": true,
"type": "ooba",
"legacy_api": false,
"sampler_order": [
diff --git a/public/index.html b/public/index.html
index ed0047ecc..48bd6ec54 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1125,7 +1125,7 @@
-
@@ -1948,7 +1948,8 @@
API Type
+
+
+
vLLM API key
+
+
+
+
+
+ For privacy reasons, your API key will be hidden after you reload the page.
+
+
+
API URL
+ Example: http://127.0.0.1:8000
+
+
+
+
vLLM Model
+
+
+
+
-
+
diff --git a/public/script.js b/public/script.js
index 14079e155..1d33d09b3 100644
--- a/public/script.js
+++ b/public/script.js
@@ -22,7 +22,7 @@ import {
parseTabbyLogprobs,
} from './scripts/textgen-settings.js';
-const { MANCER, TOGETHERAI, OOBA, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
+const { MANCER, TOGETHERAI, OOBA, VLLM, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
import {
world_info,
@@ -218,7 +218,7 @@ import {
import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_settings } from './scripts/backgrounds.js';
import { hideLoader, showLoader } from './scripts/loader.js';
import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js';
-import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
+import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId } from './scripts/chats.js';
import { initPresetManager } from './scripts/preset-manager.js';
import { evaluateMacros } from './scripts/macros.js';
@@ -1071,6 +1071,9 @@ async function getStatusTextgen() {
} else if (textgen_settings.type === OPENROUTER) {
loadOpenRouterModels(data?.data);
online_status = textgen_settings.openrouter_model;
+ } else if (textgen_settings.type === VLLM) {
+ loadVllmModels(data?.data);
+ online_status = textgen_settings.vllm_model;
} else if (textgen_settings.type === APHRODITE) {
loadAphroditeModels(data?.data);
online_status = textgen_settings.aphrodite_model;
@@ -4832,6 +4835,7 @@ function parseAndSaveLogprobs(data, continueFrom) {
case textgen_types.LLAMACPP: {
logprobs = data?.completion_probabilities?.map(x => parseTextgenLogprobs(x.content, [x])) || null;
} break;
+ case textgen_types.VLLM:
case textgen_types.APHRODITE:
case textgen_types.MANCER:
case textgen_types.TABBY: {
@@ -4888,7 +4892,7 @@ function extractMultiSwipes(data, type) {
return swipes;
}
- if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, APHRODITE].includes(textgen_settings.type))) {
+ if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, VLLM, APHRODITE].includes(textgen_settings.type))) {
if (!Array.isArray(data.choices)) {
return swipes;
}
@@ -7922,6 +7926,11 @@ const CONNECT_API_MAP = {
button: '#api_button_textgenerationwebui',
type: textgen_types.MANCER,
},
+ 'vllm': {
+ selected: 'textgenerationwebui',
+ button: '#api_button_textgenerationwebui',
+ type: textgen_types.VLLM,
+ },
'aphrodite': {
selected: 'textgenerationwebui',
button: '#api_button_textgenerationwebui',
@@ -8896,6 +8905,7 @@ jQuery(async function () {
$('#api_button_textgenerationwebui').on('click', async function (e) {
const keys = [
{ id: 'api_key_mancer', secret: SECRET_KEYS.MANCER },
+ { id: 'api_key_vllm', secret: SECRET_KEYS.VLLM },
{ id: 'api_key_aphrodite', secret: SECRET_KEYS.APHRODITE },
{ id: 'api_key_tabby', secret: SECRET_KEYS.TABBY },
{ id: 'api_key_togetherai', secret: SECRET_KEYS.TOGETHERAI },
diff --git a/public/scripts/preset-manager.js b/public/scripts/preset-manager.js
index 1a28f075c..889c254cb 100644
--- a/public/scripts/preset-manager.js
+++ b/public/scripts/preset-manager.js
@@ -309,6 +309,7 @@ class PresetManager {
'mancer_model',
'togetherai_model',
'ollama_model',
+ 'vllm_model',
'aphrodite_model',
'server_urls',
'type',
diff --git a/public/scripts/secrets.js b/public/scripts/secrets.js
index 00c322353..83c994d31 100644
--- a/public/scripts/secrets.js
+++ b/public/scripts/secrets.js
@@ -3,6 +3,7 @@ import { callPopup, getRequestHeaders } from '../script.js';
export const SECRET_KEYS = {
HORDE: 'api_key_horde',
MANCER: 'api_key_mancer',
+ VLLM: 'api_key_vllm',
APHRODITE: 'api_key_aphrodite',
TABBY: 'api_key_tabby',
OPENAI: 'api_key_openai',
@@ -38,6 +39,7 @@ const INPUT_MAP = {
[SECRET_KEYS.AI21]: '#api_key_ai21',
[SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie',
[SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite',
+ [SECRET_KEYS.VLLM]: '#api_key_vllm',
[SECRET_KEYS.APHRODITE]: '#api_key_aphrodite',
[SECRET_KEYS.TABBY]: '#api_key_tabby',
[SECRET_KEYS.MISTRALAI]: '#api_key_mistralai',
diff --git a/public/scripts/slash-commands.js b/public/scripts/slash-commands.js
index 0e49b2757..ddda52d7d 100644
--- a/public/scripts/slash-commands.js
+++ b/public/scripts/slash-commands.js
@@ -1665,6 +1665,7 @@ function modelCallback(_, model) {
{ id: 'model_infermaticai_select', api: 'textgenerationwebui', type: textgen_types.INFERMATICAI },
{ id: 'model_dreamgen_select', api: 'textgenerationwebui', type: textgen_types.DREAMGEN },
{ id: 'mancer_model', api: 'textgenerationwebui', type: textgen_types.MANCER },
+ { id: 'vllm_model', api: 'textgenerationwebui', type: textgen_types.VLLM },
{ id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
{ id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
{ id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },
diff --git a/public/scripts/textgen-models.js b/public/scripts/textgen-models.js
index c96ad4ce4..28ce1e21e 100644
--- a/public/scripts/textgen-models.js
+++ b/public/scripts/textgen-models.js
@@ -7,6 +7,7 @@ let mancerModels = [];
let togetherModels = [];
let infermaticAIModels = [];
let dreamGenModels = [];
+let vllmModels = [];
let aphroditeModels = [];
export let openRouterModels = [];
@@ -156,6 +157,28 @@ export async function loadOpenRouterModels(data) {
calculateOpenRouterCost();
}
+export async function loadVllmModels(data) {
+ if (!Array.isArray(data)) {
+ console.error('Invalid vLLM models data', data);
+ return;
+ }
+
+ vllmModels = data;
+
+ if (!data.find(x => x.id === textgen_settings.vllm_model)) {
+ textgen_settings.vllm_model = data[0]?.id || '';
+ }
+
+ $('#vllm_model').empty();
+ for (const model of data) {
+ const option = document.createElement('option');
+ option.value = model.id;
+ option.text = model.id;
+ option.selected = model.id === textgen_settings.vllm_model;
+ $('#vllm_model').append(option);
+ }
+}
+
export async function loadAphroditeModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid Aphrodite models data', data);
@@ -224,6 +247,12 @@ function onOpenRouterModelSelect() {
setGenerationParamsFromPreset({ max_length: model.context_length });
}
+function onVllmModelSelect() {
+ const modelId = String($('#vllm_model').val());
+ textgen_settings.vllm_model = modelId;
+ $('#api_button_textgenerationwebui').trigger('click');
+}
+
function onAphroditeModelSelect() {
const modelId = String($('#aphrodite_model').val());
textgen_settings.aphrodite_model = modelId;
@@ -310,6 +339,20 @@ function getOpenRouterModelTemplate(option) {
`));
}
+function getVllmModelTemplate(option) {
+ const model = vllmModels.find(x => x.id === option?.element?.value);
+
+ if (!option.id || !model) {
+ return option.text;
+ }
+
+ return $((`
+
+
${DOMPurify.sanitize(model.id)}
+
+ `));
+}
+
function getAphroditeModelTemplate(option) {
const model = aphroditeModels.find(x => x.id === option?.element?.value);
@@ -426,6 +469,7 @@ jQuery(function () {
$('#ollama_model').on('change', onOllamaModelSelect);
$('#openrouter_model').on('change', onOpenRouterModelSelect);
$('#ollama_download_model').on('click', downloadOllamaModel);
+ $('#vllm_model').on('change', onVllmModelSelect);
$('#aphrodite_model').on('change', onAphroditeModelSelect);
if (!isMobile()) {
@@ -470,6 +514,13 @@ jQuery(function () {
width: '100%',
templateResult: getOpenRouterModelTemplate,
});
+ $('#vllm_model').select2({
+ placeholder: 'Select a model',
+ searchInputPlaceholder: 'Search models...',
+ searchInputCssClass: 'text_pole',
+ width: '100%',
+ templateResult: getVllmModelTemplate,
+ });
$('#aphrodite_model').select2({
placeholder: 'Select a model',
searchInputPlaceholder: 'Search models...',
diff --git a/public/scripts/textgen-settings.js b/public/scripts/textgen-settings.js
index a3b40970f..7058f7e35 100644
--- a/public/scripts/textgen-settings.js
+++ b/public/scripts/textgen-settings.js
@@ -28,6 +28,7 @@ export {
export const textgen_types = {
OOBA: 'ooba',
MANCER: 'mancer',
+ VLLM: 'vllm',
APHRODITE: 'aphrodite',
TABBY: 'tabby',
KOBOLDCPP: 'koboldcpp',
@@ -39,7 +40,7 @@ export const textgen_types = {
OPENROUTER: 'openrouter',
};
-const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
+const { MANCER, VLLM, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
const LLAMACPP_DEFAULT_ORDER = [
'top_k',
@@ -77,6 +78,7 @@ let OPENROUTER_SERVER = 'https://openrouter.ai/api';
const SERVER_INPUTS = {
[textgen_types.OOBA]: '#textgenerationwebui_api_url_text',
+ [textgen_types.VLLM]: '#vllm_api_url_text',
[textgen_types.APHRODITE]: '#aphrodite_api_url_text',
[textgen_types.TABBY]: '#tabby_api_url_text',
[textgen_types.KOBOLDCPP]: '#koboldcpp_api_url_text',
@@ -135,8 +137,8 @@ const settings = {
samplers: LLAMACPP_DEFAULT_ORDER,
//n_aphrodite: 1,
//best_of_aphrodite: 1,
- ignore_eos_token_aphrodite: false,
- spaces_between_special_tokens_aphrodite: true,
+ ignore_eos_token: false,
+ spaces_between_special_tokens: true,
//logits_processors_aphrodite: [],
//log_probs_aphrodite: 0,
//prompt_log_probs_aphrodite: 0,
@@ -146,6 +148,7 @@ const settings = {
infermaticai_model: '',
ollama_model: '',
openrouter_model: 'openrouter/auto',
+ vllm_model: '',
aphrodite_model: '',
dreamgen_model: 'opus-v1-xl/text',
legacy_api: false,
@@ -208,8 +211,8 @@ const setting_names = [
'legacy_api',
//'n_aphrodite',
//'best_of_aphrodite',
- 'ignore_eos_token_aphrodite',
- 'spaces_between_special_tokens_aphrodite',
+ 'ignore_eos_token',
+ 'spaces_between_special_tokens',
//'logits_processors_aphrodite',
//'log_probs_aphrodite',
//'prompt_log_probs_aphrodite'
@@ -587,15 +590,22 @@ jQuery(function () {
const type = String($(this).val());
settings.type = type;
- if (settings.type === textgen_types.APHRODITE) {
- //this is needed because showTypeSpecificControls() does not handle NOT declarations
- $('[data-forAphro="False"]').each(function () {
- $(this).hide();
- });
+ if (settings.type === textgen_types.VLLM || settings.type === textgen_types.APHRODITE) {
+ if (settings.type === textgen_types.APHRODITE) {
+ //this is needed because showTypeSpecificControls() does not handle NOT declarations
+ $('[data-forAphro="False"]').each(function () {
+ $(this).hide();
+ });
+ } else {
+ //this is needed because showTypeSpecificControls() does not handle NOT declarations
+ $('[data-forAphro="False"]').each(function () {
+ $(this).show();
+ });
+ }
$('#mirostat_mode_textgenerationwebui').attr('step', 2); //Aphro disallows mode 1
$('#do_sample_textgenerationwebui').prop('checked', true); //Aphro should always do sample; 'otherwise set temp to 0 to mimic no sample'
$('#ban_eos_token_textgenerationwebui').prop('checked', false); //Aphro should not ban EOS, just ignore it; 'add token '2' to ban list do to this'
- //special handling for Aphrodite topK -1 disable state
+ //special handling for vLLM/Aphrodite topK -1 disable state
$('#top_k_textgenerationwebui').attr('min', -1);
if ($('#top_k_textgenerationwebui').val() === '0' || settings['top_k'] === 0) {
settings['top_k'] = -1;
@@ -607,7 +617,7 @@ jQuery(function () {
$(this).show();
});
$('#mirostat_mode_textgenerationwebui').attr('step', 1);
- //undo special Aphrodite setup for topK
+ //undo special vLLM/Aphrodite setup for topK
$('#top_k_textgenerationwebui').attr('min', 0);
if ($('#top_k_textgenerationwebui').val() === '-1' || settings['top_k'] === -1) {
settings['top_k'] = 0;
@@ -711,9 +721,10 @@ jQuery(function () {
const value = Number($(this).val());
$(`#${id}_counter_textgenerationwebui`).val(value);
settings[id] = value;
- //special handling for aphrodite using -1 as disabled instead of 0
+ //special handling for vLLM/Aphrodite using -1 as disabled instead of 0
if ($(this).attr('id') === 'top_k_textgenerationwebui' &&
- settings.type === textgen_types.APHRODITE &&
+ (settings.type === textgen_types.VLLM ||
+ settings.type === textgen_types.APHRODITE) &&
value === 0) {
settings[id] = -1;
$(this).val(-1);
@@ -869,6 +880,7 @@ export function parseTextgenLogprobs(token, logprobs) {
switch (settings.type) {
case TABBY:
+ case VLLM:
case APHRODITE:
case MANCER:
case OOBA: {
@@ -964,6 +976,8 @@ function getModel() {
return settings.dreamgen_model;
case OPENROUTER:
return settings.openrouter_model;
+ case VLLM:
+ return settings.vllm_model;
case APHRODITE:
return settings.aphrodite_model;
case OLLAMA:
@@ -1061,11 +1075,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
'ignore_eos': settings.ban_eos_token,
'n_probs': power_user.request_token_probabilities ? 10 : undefined,
};
+ const vllmParams = {
+ 'n': canMultiSwipe ? settings.n : 1,
+ 'best_of': canMultiSwipe ? settings.n : 1,
+ 'ignore_eos': settings.ignore_eos_token,
+ 'spaces_between_special_tokens': settings.spaces_between_special_tokens,
+ 'seed': settings.seed,
+ };
const aphroditeParams = {
'n': canMultiSwipe ? settings.n : 1,
'best_of': canMultiSwipe ? settings.n : 1,
- 'ignore_eos': settings.ignore_eos_token_aphrodite,
- 'spaces_between_special_tokens': settings.spaces_between_special_tokens_aphrodite,
+ 'ignore_eos': settings.ignore_eos_token,
+ 'spaces_between_special_tokens': settings.spaces_between_special_tokens,
'grammar': settings.grammar_string,
//'logits_processors': settings.logits_processors_aphrodite,
//'logprobs': settings.log_probs_aphrodite,
@@ -1087,10 +1108,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
delete params.dynatemp_high;
}
- if (settings.type === APHRODITE) {
- params = Object.assign(params, aphroditeParams);
- } else {
- params = Object.assign(params, nonAphroditeParams);
+ switch (settings.type) {
+ case VLLM:
+ params = Object.assign(params, vllmParams);
+ break;
+
+ case APHRODITE:
+ params = Object.assign(params, aphroditeParams);
+ break;
+
+ default:
+ params = Object.assign(params, nonAphroditeParams);
+ break;
}
if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
@@ -1119,4 +1148,3 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
return params;
}
-
diff --git a/src/additional-headers.js b/src/additional-headers.js
index aa151011e..148e81e8c 100644
--- a/src/additional-headers.js
+++ b/src/additional-headers.js
@@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) {
return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders;
}
+/**
+ * Gets the headers for the vLLM API.
+ * @param {import('./users').UserDirectoryList} directories User directories
+ * @returns {object} Headers for the request
+ */
+function getVllmHeaders(directories) {
+ const apiKey = readSecret(directories, SECRET_KEYS.VLLM);
+
+ return apiKey ? ({
+ 'Authorization': `Bearer ${apiKey}`,
+ }) : {};
+}
+
/**
* Gets the headers for the Aphrodite API.
* @param {import('./users').UserDirectoryList} directories User directories
@@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) {
function setAdditionalHeaders(request, args, server) {
const headerGetters = {
[TEXTGEN_TYPES.MANCER]: getMancerHeaders,
+ [TEXTGEN_TYPES.VLLM]: getVllmHeaders,
[TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders,
[TEXTGEN_TYPES.TABBY]: getTabbyHeaders,
[TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders,
diff --git a/src/constants.js b/src/constants.js
index e77649c19..5d80dd03b 100644
--- a/src/constants.js
+++ b/src/constants.js
@@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads';
const TEXTGEN_TYPES = {
OOBA: 'ooba',
MANCER: 'mancer',
+ VLLM: 'vllm',
APHRODITE: 'aphrodite',
TABBY: 'tabby',
KOBOLDCPP: 'koboldcpp',
@@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [
'stop',
];
+// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220
+const VLLM_KEYS = [
+ 'model',
+ 'prompt',
+ 'best_of',
+ 'echo',
+ 'frequency_penalty',
+ 'logit_bias',
+ 'logprobs',
+ 'max_tokens',
+ 'n',
+ 'presence_penalty',
+ 'seed',
+ 'stop',
+ 'stream',
+ 'suffix',
+ 'temperature',
+ 'top_p',
+ 'user',
+
+ 'use_beam_search',
+ 'top_k',
+ 'min_p',
+ 'repetition_penalty',
+ 'length_penalty',
+ 'early_stopping',
+ 'stop_token_ids',
+ 'ignore_eos',
+ 'min_tokens',
+ 'skip_special_tokens',
+ 'spaces_between_special_tokens',
+ 'truncate_prompt_tokens',
+
+ 'include_stop_str_in_output',
+ 'response_format',
+ 'guided_json',
+ 'guided_regex',
+ 'guided_choice',
+ 'guided_grammar',
+ 'guided_decoding_backend',
+ 'guided_whitespace_pattern',
+];
+
module.exports = {
DEFAULT_USER,
DEFAULT_AVATAR,
@@ -318,4 +362,5 @@ module.exports = {
DREAMGEN_KEYS,
OPENROUTER_HEADERS,
OPENROUTER_KEYS,
+ VLLM_KEYS,
};
diff --git a/src/endpoints/backends/text-completions.js b/src/endpoints/backends/text-completions.js
index 0e9598827..3cb15a748 100644
--- a/src/endpoints/backends/text-completions.js
+++ b/src/endpoints/backends/text-completions.js
@@ -4,7 +4,7 @@ const _ = require('lodash');
const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common');
-const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants');
+const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants');
const { forwardFetchResponse, trimV1 } = require('../../util');
const { setAdditionalHeaders } = require('../../additional-headers');
@@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) {
} else {
switch (request.body.api_type) {
case TEXTGEN_TYPES.OOBA:
+ case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.KOBOLDCPP:
case TEXTGEN_TYPES.LLAMACPP:
@@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) {
url += '/v1/generate';
} else {
switch (request.body.api_type) {
+ case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.TABBY:
@@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) {
args.body = JSON.stringify(request.body);
}
+ if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
+ request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
+ args.body = JSON.stringify(request.body);
+ }
+
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
args.body = JSON.stringify({
model: request.body.model,
diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js
index 5d7560fa5..341658d72 100644
--- a/src/endpoints/secrets.js
+++ b/src/endpoints/secrets.js
@@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json';
const SECRET_KEYS = {
HORDE: 'api_key_horde',
MANCER: 'api_key_mancer',
+ VLLM: 'api_key_vllm',
APHRODITE: 'api_key_aphrodite',
TABBY: 'api_key_tabby',
OPENAI: 'api_key_openai',
diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js
index 65e795294..9c49c8ae3 100644
--- a/src/endpoints/tokenizers.js
+++ b/src/endpoints/tokenizers.js
@@ -720,6 +720,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
url += '/tokenize';
args.body = JSON.stringify({ 'content': text });
break;
+ case TEXTGEN_TYPES.VLLM:
+ return response.send({ error: true });
case TEXTGEN_TYPES.APHRODITE:
url += '/v1/tokenize';
args.body = JSON.stringify({ 'prompt': text });