Merge pull request #2177 from sasha0552/vllm-support

vLLM support
This commit is contained in:
Cohee 2024-05-03 20:03:05 +03:00 committed by GitHub
commit b69c5bcd17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 230 additions and 86 deletions

View File

@ -33,8 +33,8 @@
"negative_prompt": "",
"grammar_string": "",
"banned_tokens": "",
"ignore_eos_token_aphrodite": false,
"spaces_between_special_tokens_aphrodite": true,
"ignore_eos_token": false,
"spaces_between_special_tokens": true,
"type": "ooba",
"legacy_api": false,
"sampler_order": [

View File

@ -33,8 +33,8 @@
"negative_prompt": "",
"grammar_string": "",
"banned_tokens": "",
"ignore_eos_token_aphrodite": false,
"spaces_between_special_tokens_aphrodite": true,
"ignore_eos_token": false,
"spaces_between_special_tokens": true,
"type": "ooba",
"legacy_api": false,
"sampler_order": [

View File

@ -33,8 +33,8 @@
"negative_prompt": "",
"grammar_string": "",
"banned_tokens": "",
"ignore_eos_token_aphrodite": false,
"spaces_between_special_tokens_aphrodite": true,
"ignore_eos_token": false,
"spaces_between_special_tokens": true,
"type": "ooba",
"legacy_api": false,
"sampler_order": [

View File

@ -1125,7 +1125,7 @@
<div class="fa-solid fa-circle-info opacity50p" title="Set all samplers to their neutral/disabled state." data-i18n="[title]Set all samplers to their neutral/disabled state."></div>
</small>
</div>
<div data-newbie-hidden data-tg-type="mancer, aphrodite" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
<div data-newbie-hidden data-tg-type="mancer, vllm, aphrodite" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
<small data-i18n="Multiple swipes per generation">Multiple swipes per generation</small>
<input type="number" id="n_textgenerationwebui" class="text_pole textAlignCenter" min="1" value="1" step="1" />
</div>
@ -1207,12 +1207,12 @@
<input class="neo-range-slider" type="range" id="rep_pen_textgenerationwebui" name="volume" min="1" max="3" step="0.01">
<input class="neo-range-input" type="number" min="1" max="3" step="0.01" data-for="rep_pen_textgenerationwebui" id="rep_pen_counter_textgenerationwebui">
</div>
<div data-forAphro="False" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type="ooba, koboldcpp, tabby, ollama, llamacpp" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small data-i18n="rep.pen range">Rep Pen Range</small>
<input class="neo-range-slider" type="range" id="rep_pen_range_textgenerationwebui" name="volume" min="-1" max="8192" step="1">
<input class="neo-range-input" type="number" min="-1" max="8192" step="1" data-for="rep_pen_range_textgenerationwebui" id="rep_pen_range_counter_textgenerationwebui">
</div>
<div data-forAphro="False" data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small data-i18n="Encoder Rep. Pen.">Encoder Penalty</small>
<input class="neo-range-slider" type="range" id="encoder_rep_pen_textgenerationwebui" name="volume" min="0.8" max="1.5" step="0.01" />
<input class="neo-range-input" type="number" min="0.8" max="1.5" step="0.01" data-for="encoder_rep_pen_textgenerationwebui" id="encoder_rep_pen_counter_textgenerationwebui">
@ -1227,7 +1227,7 @@
<input class="neo-range-slider" type="range" id="presence_pen_textgenerationwebui" name="volume" min="-2" max="2" step="0.01" />
<input class="neo-range-input" type="number" min="-2" max="2" step="0.01" data-for="presence_pen_textgenerationwebui" id="presence_pen_counter_textgenerationwebui">
</div>
<div data-forAphro="False" data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type="ooba" data-newbie-hidden class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small data-i18n="No Repeat Ngram Size">No Repeat Ngram Size</small>
<input class="neo-range-slider" type="range" id="no_repeat_ngram_size_textgenerationwebui" name="volume" min="0" max="20" step="1">
<input class="neo-range-input" type="number" min="0" max="20" step="1" data-for="no_repeat_ngram_size_textgenerationwebui" id="no_repeat_ngram_size_counter_textgenerationwebui">
@ -1260,28 +1260,6 @@
</div>
</div>
</div>
<!--
<div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0" data-i18n="Responses">
<small>Responses</small>
<input class="neo-range-slider" type="range" id="n_aphrodite_textgenerationwebui" name="volume" min="1" max="5" step="1">
<input class="neo-range-input" type="number" min="1" max="5" step="1" data-for="n_aphrodite" id="n_aphrodite_counter_textgenerationwebui">
</div>
<div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0 displayNone" data-i18n="Keep # Best Responses">
<small>Keep # Best Responses</small>
<input class="neo-range-slider" type="range" id="best_of_aphrodite_textgenerationwebui" name="volume" min="1" max="5" step="1">
<input class="neo-range-input" type="number" min="1" max="5" step="1" data-for="best_of_aphrodite" id="best_of_aphrodite_counter_textgenerationwebui">
</div>
<div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0 displayNone" data-i18n="Logit Probabilities">
<small>Logit Probabilities</small>
<input class="neo-range-slider" type="range" id="log_probs_aphrodite_textgenerationwebui" name="volume" min="0" max="5" step="1">
<input class="neo-range-input" type="number" min="0" max="5" step="1" data-for="log_probs_aphrodite" id="log_probs_aphrodite_counter_textgenerationwebui">
</div>
<div data-tg-type="aphrodite" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0 displayNone" data-i18n="Prompt Logit Probabilities">
<small>Prompt Logit Probabilities</small>
<input class="neo-range-slider" type="range" id="prompt_log_probs_aphrodite_textgenerationwebui" name="volume" min="0" max="5" step="1">
<input class="neo-range-input" type="number" min="0" max="5" step="1" data-for="prompt_log_probs_aphrodite" id="prompt_log_probs_aphrodite_counter_textgenerationwebui">
</div>
-->
<div data-newbie-hidden data-tg-type="ooba, mancer, koboldcpp, tabby, llamacpp, aphrodite" name="dynaTempBlock" class="wide100p">
<h4 class="wide100p textAlignCenter" data-i18n="DynaTemp">
<div class="flex-container alignitemscenter" style="justify-content: center;">
@ -1366,7 +1344,7 @@
</div>
</div>
</div>
<div data-forAphro="False" data-tg-type="ooba" data-newbie-hidden name="contrastiveSearchBlock" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
<div data-tg-type="ooba" data-newbie-hidden name="contrastiveSearchBlock" class="alignitemscenter flex-container flexFlowColumn flexBasis48p flexGrow flexShrink gap0">
<h4 class="textAlignCenter" data-i18n="Contrastive search">Contrastive Search
<div class=" fa-solid fa-circle-info opacity50p " title="A sampler that encourages diversity while maintaining coherence, by exploiting the isotropicity of the representation space of most LLMs. For details, see the paper A Contrastive Framework for Neural Text Generation by Su et al. (2022)."></div>
</h4>
@ -1381,26 +1359,26 @@
</div>
<div data-newbie-hidden name="checkboxes" class="flex-container flexBasis48p justifyCenter flexGrow flexShrink ">
<div class="flex-container flexFlowColumn marginTop5">
<label data-forAphro="False" data-tg-type="ooba" class="checkbox_label flexGrow flexShrink" for="do_sample_textgenerationwebui">
<label data-tg-type="ooba" class="checkbox_label flexGrow flexShrink" for="do_sample_textgenerationwebui">
<input type="checkbox" id="do_sample_textgenerationwebui" />
<small data-i18n="Do Sample">Do Sample</small>
</label>
<label data-forAphro="False" data-tg-type="ooba, tabby" class="checkbox_label flexGrow flexShrink" for="add_bos_token_textgenerationwebui">
<label data-tg-type="ooba, tabby" class="checkbox_label flexGrow flexShrink" for="add_bos_token_textgenerationwebui">
<input type="checkbox" id="add_bos_token_textgenerationwebui" />
<label>
<small data-i18n="Add BOS Token">Add BOS Token</small>
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Add the bos_token to the beginning of prompts. Disabling this can make the replies more creative" title="Add the bos_token to the beginning of prompts. Disabling this can make the replies more creative."></div>
</label>
</label>
<label data-forAphro="False" class="checkbox_label flexGrow flexShrink" for="ban_eos_token_textgenerationwebui">
<label data-tg-type="ooba, llamacpp" class="checkbox_label flexGrow flexShrink" for="ban_eos_token_textgenerationwebui">
<input type="checkbox" id="ban_eos_token_textgenerationwebui" />
<label>
<small data-i18n="Ban EOS Token">Ban EOS Token</small>
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Ban the eos_token. This forces the model to never end the generation prematurely" title="Ban the eos_token. This forces the model to never end the generation prematurely."></div>
</label>
</label>
<label data-tg-type="aphrodite" class="checkbox_label" for="ignore_eos_token_aphrodite_textgenerationwebui">
<input type="checkbox" id="ignore_eos_token_aphrodite_textgenerationwebui" />
<label data-tg-type="vllm, aphrodite" class="checkbox_label" for="ignore_eos_token_textgenerationwebui">
<input type="checkbox" id="ignore_eos_token_textgenerationwebui" />
<small data-i18n="Ignore EOS Token">Ignore EOS Token
<div class="fa-solid fa-circle-info opacity50p " data-i18n="Ignore the EOS Token even if it generates." title="Ignore the EOS Token even if it generates."></div>
</small>
@ -1409,7 +1387,7 @@
<input type="checkbox" id="skip_special_tokens_textgenerationwebui" />
<small data-i18n="Skip Special Tokens">Skip Special Tokens</small>
</label>
<label data-forAphro="False" data-tg-type="ooba, aphrodite, tabby" class="checkbox_label flexGrow flexShrink" for="temperature_last_textgenerationwebui">
<label data-tg-type="ooba, aphrodite, tabby" class="checkbox_label flexGrow flexShrink" for="temperature_last_textgenerationwebui">
<input type="checkbox" id="temperature_last_textgenerationwebui" />
<label>
<small data-i18n="Temperature Last">Temperature Last</small>
@ -1417,13 +1395,13 @@
</label>
</label>
<label data-tg-type="aphrodite" class="checkbox_label" for="spaces_between_special_tokens_aphrodite_textgenerationwebui">
<input type="checkbox" id="spaces_between_special_tokens_aphrodite_textgenerationwebui" />
<label data-tg-type="vllm, aphrodite" class="checkbox_label" for="spaces_between_special_tokens_textgenerationwebui">
<input type="checkbox" id="spaces_between_special_tokens_textgenerationwebui" />
<small data-i18n="Spaces Between Special Tokens">Spaces Between Special Tokens</small>
</label>
</div>
</div>
<div data-forAphro="False" data-newbie-hidden class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
<div data-newbie-hidden class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
<small data-i18n="Seed" class="textAlignCenter">Seed</small>
<input type="number" id="seed_textgenerationwebui" class="text_pole textAlignCenter" min="-1" value="-1" maxlength="100" />
</div>
@ -1452,7 +1430,7 @@
<div class="logit_bias_list"></div>
</div>
</div>
<div data-newbie-hidden data-forAphro="False" data-tg-type="ooba, tabby" class="wide100p">
<div data-newbie-hidden data-tg-type="ooba, tabby" class="wide100p">
<hr class="width100p">
<h4 data-i18n="CFG" class="textAlignCenter">CFG
<div class="margin5 fa-solid fa-circle-info opacity50p " data-i18n="[title]Classifier Free Guidance. More helpful tip coming soon" title="Classifier Free Guidance. More helpful tip coming soon."></div>
@ -1948,7 +1926,7 @@
<div>
<h4 data-i18n="API Type">API Type</h4>
<select id="textgen_type">
<option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, vLLM, LM Studio, etc.]</option>
<option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, LM Studio, etc.]</option>
<option value="aphrodite">Aphrodite</option>
<option value="dreamgen">DreamGen</option>
<option value="infermaticai">InfermaticAI</option>
@ -1959,6 +1937,7 @@
<option value="openrouter">OpenRouter</option>
<option value="tabby">TabbyAPI</option>
<option value="togetherai">TogetherAI</option>
<option value="vllm">vLLM</option>
</select>
</div>
<div data-tg-type="togetherai" class="flex-container flexFlowColumn">
@ -2098,6 +2077,36 @@
</div>
<input id="custom_model_textgenerationwebui" class="text_pole wide100p" maxlength="500" placeholder="Custom model (optional)" data-i18n="[placeholder]Custom model (optional)" type="text">
</div>
<div data-tg-type="vllm">
<div class="flex-container flexFlowColumn">
<a href="https://github.com/vllm-project/vllm" target="_blank" data-i18n="vllm-project/vllm">
vllm-project/vllm (OpenAI API wrapper mode)
</a>
</div>
<h4 data-i18n="vLLM API key">vLLM API key</h4>
<div class="flex-container">
<input id="api_key_vllm" name="api_key_vllm" class="text_pole flex1 wide100p" maxlength="500" size="35" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_vllm">
</div>
</div>
<div data-for="api_key_vllm" class="neutral_warning" data-i18n="For privacy reasons, your API key will be hidden after you reload the page.">
For privacy reasons, your API key will be hidden after you reload the page.
</div>
<div class="flex1">
<h4 data-i18n="API url">API URL</h4>
<small data-i18n="Example: 127.0.0.1:8000">Example: http://127.0.0.1:8000</small>
<input id="vllm_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" data-server-history="vllm">
</div>
<div>
<h4 data-i18n="vLLM Model">vLLM Model</h4>
<select id="vllm_model">
<option data-i18n="-- Connect to the API --">
-- Connect to the API --
</option>
</select>
</div>
</div>
<div data-tg-type="aphrodite">
<div class="flex-container flexFlowColumn">
<a href="https://github.com/PygmalionAI/aphrodite-engine" target="_blank" data-i18n="PygmalionAI/aphrodite-engine">
@ -2218,7 +2227,7 @@
</div>
</div>
<div class="flex-container">
<div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,aphrodite,tabby,koboldcpp">Connect</div>
<div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,vllm,aphrodite,tabby,koboldcpp">Connect</div>
<div data-tg-type="openrouter" class="menu_button menu_button_icon openrouter_authorize" title="Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai" data-i18n="Authorize;[title]Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai">Authorize</div>
<div class="api_loading menu_button" data-i18n="Cancel">Cancel</div>
</div>

View File

@ -22,7 +22,7 @@ import {
parseTabbyLogprobs,
} from './scripts/textgen-settings.js';
const { MANCER, TOGETHERAI, OOBA, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
const { MANCER, TOGETHERAI, OOBA, VLLM, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
import {
world_info,
@ -218,7 +218,7 @@ import {
import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_settings } from './scripts/backgrounds.js';
import { hideLoader, showLoader } from './scripts/loader.js';
import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js';
import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId } from './scripts/chats.js';
import { initPresetManager } from './scripts/preset-manager.js';
import { evaluateMacros } from './scripts/macros.js';
@ -1071,6 +1071,9 @@ async function getStatusTextgen() {
} else if (textgen_settings.type === OPENROUTER) {
loadOpenRouterModels(data?.data);
online_status = textgen_settings.openrouter_model;
} else if (textgen_settings.type === VLLM) {
loadVllmModels(data?.data);
online_status = textgen_settings.vllm_model;
} else if (textgen_settings.type === APHRODITE) {
loadAphroditeModels(data?.data);
online_status = textgen_settings.aphrodite_model;
@ -4832,6 +4835,7 @@ function parseAndSaveLogprobs(data, continueFrom) {
case textgen_types.LLAMACPP: {
logprobs = data?.completion_probabilities?.map(x => parseTextgenLogprobs(x.content, [x])) || null;
} break;
case textgen_types.VLLM:
case textgen_types.APHRODITE:
case textgen_types.MANCER:
case textgen_types.TABBY: {
@ -4888,7 +4892,7 @@ function extractMultiSwipes(data, type) {
return swipes;
}
if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, APHRODITE].includes(textgen_settings.type))) {
if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, VLLM, APHRODITE].includes(textgen_settings.type))) {
if (!Array.isArray(data.choices)) {
return swipes;
}
@ -7922,6 +7926,11 @@ const CONNECT_API_MAP = {
button: '#api_button_textgenerationwebui',
type: textgen_types.MANCER,
},
'vllm': {
selected: 'textgenerationwebui',
button: '#api_button_textgenerationwebui',
type: textgen_types.VLLM,
},
'aphrodite': {
selected: 'textgenerationwebui',
button: '#api_button_textgenerationwebui',
@ -8923,6 +8932,7 @@ jQuery(async function () {
$('#api_button_textgenerationwebui').on('click', async function (e) {
const keys = [
{ id: 'api_key_mancer', secret: SECRET_KEYS.MANCER },
{ id: 'api_key_vllm', secret: SECRET_KEYS.VLLM },
{ id: 'api_key_aphrodite', secret: SECRET_KEYS.APHRODITE },
{ id: 'api_key_tabby', secret: SECRET_KEYS.TABBY },
{ id: 'api_key_togetherai', secret: SECRET_KEYS.TOGETHERAI },

View File

@ -309,6 +309,7 @@ class PresetManager {
'mancer_model',
'togetherai_model',
'ollama_model',
'vllm_model',
'aphrodite_model',
'server_urls',
'type',

View File

@ -3,6 +3,7 @@ import { callPopup, getRequestHeaders } from '../script.js';
export const SECRET_KEYS = {
HORDE: 'api_key_horde',
MANCER: 'api_key_mancer',
VLLM: 'api_key_vllm',
APHRODITE: 'api_key_aphrodite',
TABBY: 'api_key_tabby',
OPENAI: 'api_key_openai',
@ -38,6 +39,7 @@ const INPUT_MAP = {
[SECRET_KEYS.AI21]: '#api_key_ai21',
[SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie',
[SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite',
[SECRET_KEYS.VLLM]: '#api_key_vllm',
[SECRET_KEYS.APHRODITE]: '#api_key_aphrodite',
[SECRET_KEYS.TABBY]: '#api_key_tabby',
[SECRET_KEYS.MISTRALAI]: '#api_key_mistralai',

View File

@ -1665,6 +1665,7 @@ function modelCallback(_, model) {
{ id: 'model_infermaticai_select', api: 'textgenerationwebui', type: textgen_types.INFERMATICAI },
{ id: 'model_dreamgen_select', api: 'textgenerationwebui', type: textgen_types.DREAMGEN },
{ id: 'mancer_model', api: 'textgenerationwebui', type: textgen_types.MANCER },
{ id: 'vllm_model', api: 'textgenerationwebui', type: textgen_types.VLLM },
{ id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
{ id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
{ id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },

View File

@ -7,6 +7,7 @@ let mancerModels = [];
let togetherModels = [];
let infermaticAIModels = [];
let dreamGenModels = [];
let vllmModels = [];
let aphroditeModels = [];
export let openRouterModels = [];
@ -156,6 +157,28 @@ export async function loadOpenRouterModels(data) {
calculateOpenRouterCost();
}
export async function loadVllmModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid vLLM models data', data);
return;
}
vllmModels = data;
if (!data.find(x => x.id === textgen_settings.vllm_model)) {
textgen_settings.vllm_model = data[0]?.id || '';
}
$('#vllm_model').empty();
for (const model of data) {
const option = document.createElement('option');
option.value = model.id;
option.text = model.id;
option.selected = model.id === textgen_settings.vllm_model;
$('#vllm_model').append(option);
}
}
export async function loadAphroditeModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid Aphrodite models data', data);
@ -224,6 +247,12 @@ function onOpenRouterModelSelect() {
setGenerationParamsFromPreset({ max_length: model.context_length });
}
function onVllmModelSelect() {
const modelId = String($('#vllm_model').val());
textgen_settings.vllm_model = modelId;
$('#api_button_textgenerationwebui').trigger('click');
}
function onAphroditeModelSelect() {
const modelId = String($('#aphrodite_model').val());
textgen_settings.aphrodite_model = modelId;
@ -310,6 +339,20 @@ function getOpenRouterModelTemplate(option) {
`));
}
function getVllmModelTemplate(option) {
const model = vllmModels.find(x => x.id === option?.element?.value);
if (!option.id || !model) {
return option.text;
}
return $((`
<div class="flex-container flexFlowColumn">
<div><strong>${DOMPurify.sanitize(model.id)}</strong></div>
</div>
`));
}
function getAphroditeModelTemplate(option) {
const model = aphroditeModels.find(x => x.id === option?.element?.value);
@ -426,6 +469,7 @@ jQuery(function () {
$('#ollama_model').on('change', onOllamaModelSelect);
$('#openrouter_model').on('change', onOpenRouterModelSelect);
$('#ollama_download_model').on('click', downloadOllamaModel);
$('#vllm_model').on('change', onVllmModelSelect);
$('#aphrodite_model').on('change', onAphroditeModelSelect);
if (!isMobile()) {
@ -470,6 +514,13 @@ jQuery(function () {
width: '100%',
templateResult: getOpenRouterModelTemplate,
});
$('#vllm_model').select2({
placeholder: 'Select a model',
searchInputPlaceholder: 'Search models...',
searchInputCssClass: 'text_pole',
width: '100%',
templateResult: getVllmModelTemplate,
});
$('#aphrodite_model').select2({
placeholder: 'Select a model',
searchInputPlaceholder: 'Search models...',

View File

@ -28,6 +28,7 @@ export {
export const textgen_types = {
OOBA: 'ooba',
MANCER: 'mancer',
VLLM: 'vllm',
APHRODITE: 'aphrodite',
TABBY: 'tabby',
KOBOLDCPP: 'koboldcpp',
@ -39,7 +40,7 @@ export const textgen_types = {
OPENROUTER: 'openrouter',
};
const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
const { MANCER, VLLM, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
const LLAMACPP_DEFAULT_ORDER = [
'top_k',
@ -77,6 +78,7 @@ let OPENROUTER_SERVER = 'https://openrouter.ai/api';
const SERVER_INPUTS = {
[textgen_types.OOBA]: '#textgenerationwebui_api_url_text',
[textgen_types.VLLM]: '#vllm_api_url_text',
[textgen_types.APHRODITE]: '#aphrodite_api_url_text',
[textgen_types.TABBY]: '#tabby_api_url_text',
[textgen_types.KOBOLDCPP]: '#koboldcpp_api_url_text',
@ -135,8 +137,8 @@ const settings = {
samplers: LLAMACPP_DEFAULT_ORDER,
//n_aphrodite: 1,
//best_of_aphrodite: 1,
ignore_eos_token_aphrodite: false,
spaces_between_special_tokens_aphrodite: true,
ignore_eos_token: false,
spaces_between_special_tokens: true,
//logits_processors_aphrodite: [],
//log_probs_aphrodite: 0,
//prompt_log_probs_aphrodite: 0,
@ -146,6 +148,7 @@ const settings = {
infermaticai_model: '',
ollama_model: '',
openrouter_model: 'openrouter/auto',
vllm_model: '',
aphrodite_model: '',
dreamgen_model: 'opus-v1-xl/text',
legacy_api: false,
@ -208,8 +211,8 @@ const setting_names = [
'legacy_api',
//'n_aphrodite',
//'best_of_aphrodite',
'ignore_eos_token_aphrodite',
'spaces_between_special_tokens_aphrodite',
'ignore_eos_token',
'spaces_between_special_tokens',
//'logits_processors_aphrodite',
//'log_probs_aphrodite',
//'prompt_log_probs_aphrodite'
@ -454,18 +457,6 @@ function loadTextGenSettings(data, loadedSettings) {
showTypeSpecificControls(settings.type);
BIAS_CACHE.delete(BIAS_KEY);
displayLogitBias(settings.logit_bias, BIAS_KEY);
//this is needed because showTypeSpecificControls() does not handle NOT declarations
if (settings.type === textgen_types.APHRODITE) {
$('[data-forAphro="False"]').each(function () {
$(this).hide();
});
} else {
$('[data-forAphro="False"]').each(function () {
if ($(this).css('display') !== 'none') { //if it wasn't already hidden by showTypeSpecificControls
$(this).show();
}
});
}
registerDebugFunction('change-mancer-url', 'Change Mancer base URL', 'Change Mancer API server base URL', () => {
const result = prompt(`Enter Mancer base URL\nDefault: ${MANCER_SERVER_DEFAULT}`, MANCER_SERVER);
@ -587,27 +578,19 @@ jQuery(function () {
const type = String($(this).val());
settings.type = type;
if (settings.type === textgen_types.APHRODITE) {
//this is needed because showTypeSpecificControls() does not handle NOT declarations
$('[data-forAphro="False"]').each(function () {
$(this).hide();
});
if (settings.type === textgen_types.VLLM || settings.type === textgen_types.APHRODITE) {
$('#mirostat_mode_textgenerationwebui').attr('step', 2); //Aphro disallows mode 1
$('#do_sample_textgenerationwebui').prop('checked', true); //Aphro should always do sample; 'otherwise set temp to 0 to mimic no sample'
$('#ban_eos_token_textgenerationwebui').prop('checked', false); //Aphro should not ban EOS, just ignore it; 'add token '2' to ban list do to this'
//special handling for Aphrodite topK -1 disable state
//special handling for vLLM/Aphrodite topK -1 disable state
$('#top_k_textgenerationwebui').attr('min', -1);
if ($('#top_k_textgenerationwebui').val() === '0' || settings['top_k'] === 0) {
settings['top_k'] = -1;
$('#top_k_textgenerationwebui').val('-1').trigger('input');
}
} else {
//this is needed because showTypeSpecificControls() does not handle NOT declarations
$('[data-forAphro="False"]').each(function () {
$(this).show();
});
$('#mirostat_mode_textgenerationwebui').attr('step', 1);
//undo special Aphrodite setup for topK
//undo special vLLM/Aphrodite setup for topK
$('#top_k_textgenerationwebui').attr('min', 0);
if ($('#top_k_textgenerationwebui').val() === '-1' || settings['top_k'] === -1) {
settings['top_k'] = 0;
@ -711,9 +694,10 @@ jQuery(function () {
const value = Number($(this).val());
$(`#${id}_counter_textgenerationwebui`).val(value);
settings[id] = value;
//special handling for aphrodite using -1 as disabled instead of 0
//special handling for vLLM/Aphrodite using -1 as disabled instead of 0
if ($(this).attr('id') === 'top_k_textgenerationwebui' &&
settings.type === textgen_types.APHRODITE &&
(settings.type === textgen_types.VLLM ||
settings.type === textgen_types.APHRODITE) &&
value === 0) {
settings[id] = -1;
$(this).val(-1);
@ -869,6 +853,7 @@ export function parseTextgenLogprobs(token, logprobs) {
switch (settings.type) {
case TABBY:
case VLLM:
case APHRODITE:
case MANCER:
case OOBA: {
@ -964,6 +949,8 @@ function getModel() {
return settings.dreamgen_model;
case OPENROUTER:
return settings.openrouter_model;
case VLLM:
return settings.vllm_model;
case APHRODITE:
return settings.aphrodite_model;
case OLLAMA:
@ -1061,11 +1048,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
'ignore_eos': settings.ban_eos_token,
'n_probs': power_user.request_token_probabilities ? 10 : undefined,
};
const vllmParams = {
'n': canMultiSwipe ? settings.n : 1,
'best_of': canMultiSwipe ? settings.n : 1,
'ignore_eos': settings.ignore_eos_token,
'spaces_between_special_tokens': settings.spaces_between_special_tokens,
'seed': settings.seed,
};
const aphroditeParams = {
'n': canMultiSwipe ? settings.n : 1,
'best_of': canMultiSwipe ? settings.n : 1,
'ignore_eos': settings.ignore_eos_token_aphrodite,
'spaces_between_special_tokens': settings.spaces_between_special_tokens_aphrodite,
'ignore_eos': settings.ignore_eos_token,
'spaces_between_special_tokens': settings.spaces_between_special_tokens,
'grammar': settings.grammar_string,
//'logits_processors': settings.logits_processors_aphrodite,
//'logprobs': settings.log_probs_aphrodite,
@ -1087,10 +1081,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
delete params.dynatemp_high;
}
if (settings.type === APHRODITE) {
params = Object.assign(params, aphroditeParams);
} else {
params = Object.assign(params, nonAphroditeParams);
switch (settings.type) {
case VLLM:
params = Object.assign(params, vllmParams);
break;
case APHRODITE:
params = Object.assign(params, aphroditeParams);
break;
default:
params = Object.assign(params, nonAphroditeParams);
break;
}
if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
@ -1119,4 +1121,3 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
return params;
}

View File

@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) {
return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders;
}
/**
* Gets the headers for the vLLM API.
* @param {import('./users').UserDirectoryList} directories User directories
* @returns {object} Headers for the request
*/
function getVllmHeaders(directories) {
const apiKey = readSecret(directories, SECRET_KEYS.VLLM);
return apiKey ? ({
'Authorization': `Bearer ${apiKey}`,
}) : {};
}
/**
* Gets the headers for the Aphrodite API.
* @param {import('./users').UserDirectoryList} directories User directories
@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) {
function setAdditionalHeaders(request, args, server) {
const headerGetters = {
[TEXTGEN_TYPES.MANCER]: getMancerHeaders,
[TEXTGEN_TYPES.VLLM]: getVllmHeaders,
[TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders,
[TEXTGEN_TYPES.TABBY]: getTabbyHeaders,
[TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders,

View File

@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads';
const TEXTGEN_TYPES = {
OOBA: 'ooba',
MANCER: 'mancer',
VLLM: 'vllm',
APHRODITE: 'aphrodite',
TABBY: 'tabby',
KOBOLDCPP: 'koboldcpp',
@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [
'stop',
];
// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220
const VLLM_KEYS = [
'model',
'prompt',
'best_of',
'echo',
'frequency_penalty',
'logit_bias',
'logprobs',
'max_tokens',
'n',
'presence_penalty',
'seed',
'stop',
'stream',
'suffix',
'temperature',
'top_p',
'user',
'use_beam_search',
'top_k',
'min_p',
'repetition_penalty',
'length_penalty',
'early_stopping',
'stop_token_ids',
'ignore_eos',
'min_tokens',
'skip_special_tokens',
'spaces_between_special_tokens',
'truncate_prompt_tokens',
'include_stop_str_in_output',
'response_format',
'guided_json',
'guided_regex',
'guided_choice',
'guided_grammar',
'guided_decoding_backend',
'guided_whitespace_pattern',
];
module.exports = {
DEFAULT_USER,
DEFAULT_AVATAR,
@ -318,4 +362,5 @@ module.exports = {
DREAMGEN_KEYS,
OPENROUTER_HEADERS,
OPENROUTER_KEYS,
VLLM_KEYS,
};

View File

@ -4,7 +4,7 @@ const _ = require('lodash');
const Readable = require('stream').Readable;
const { jsonParser } = require('../../express-common');
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants');
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants');
const { forwardFetchResponse, trimV1 } = require('../../util');
const { setAdditionalHeaders } = require('../../additional-headers');
@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) {
} else {
switch (request.body.api_type) {
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.KOBOLDCPP:
case TEXTGEN_TYPES.LLAMACPP:
@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) {
url += '/v1/generate';
} else {
switch (request.body.api_type) {
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.TABBY:
@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) {
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
args.body = JSON.stringify({
model: request.body.model,

View File

@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json';
const SECRET_KEYS = {
HORDE: 'api_key_horde',
MANCER: 'api_key_mancer',
VLLM: 'api_key_vllm',
APHRODITE: 'api_key_aphrodite',
TABBY: 'api_key_tabby',
OPENAI: 'api_key_openai',

View File

@ -720,6 +720,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
url += '/tokenize';
args.body = JSON.stringify({ 'content': text });
break;
case TEXTGEN_TYPES.VLLM:
return response.send({ error: true });
case TEXTGEN_TYPES.APHRODITE:
url += '/v1/tokenize';
args.body = JSON.stringify({ 'prompt': text });