Initial vLLM support
This commit is contained in:
parent
1a219e32fe
commit
2bd239fe81
|
@ -33,8 +33,8 @@
|
|||
"negative_prompt": "",
|
||||
"grammar_string": "",
|
||||
"banned_tokens": "",
|
||||
"ignore_eos_token_aphrodite": false,
|
||||
"spaces_between_special_tokens_aphrodite": true,
|
||||
"ignore_eos_token": false,
|
||||
"spaces_between_special_tokens": true,
|
||||
"type": "ooba",
|
||||
"legacy_api": false,
|
||||
"sampler_order": [
|
||||
|
|
|
@ -33,8 +33,8 @@
|
|||
"negative_prompt": "",
|
||||
"grammar_string": "",
|
||||
"banned_tokens": "",
|
||||
"ignore_eos_token_aphrodite": false,
|
||||
"spaces_between_special_tokens_aphrodite": true,
|
||||
"ignore_eos_token": false,
|
||||
"spaces_between_special_tokens": true,
|
||||
"type": "ooba",
|
||||
"legacy_api": false,
|
||||
"sampler_order": [
|
||||
|
|
|
@ -33,8 +33,8 @@
|
|||
"negative_prompt": "",
|
||||
"grammar_string": "",
|
||||
"banned_tokens": "",
|
||||
"ignore_eos_token_aphrodite": false,
|
||||
"spaces_between_special_tokens_aphrodite": true,
|
||||
"ignore_eos_token": false,
|
||||
"spaces_between_special_tokens": true,
|
||||
"type": "ooba",
|
||||
"legacy_api": false,
|
||||
"sampler_order": [
|
||||
|
|
|
@ -1125,7 +1125,7 @@
|
|||
<div class="fa-solid fa-circle-info opacity50p" title="Set all samplers to their neutral/disabled state." data-i18n="[title]Set all samplers to their neutral/disabled state."></div>
|
||||
</small>
|
||||
</div>
|
||||
<div data-newbie-hidden data-tg-type="mancer, aphrodite" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
|
||||
<div data-newbie-hidden data-tg-type="mancer, vllm, aphrodite" class="flex-container flexFlowColumn alignitemscenter flexBasis100p flexGrow flexShrink gap0">
|
||||
<small data-i18n="Multiple swipes per generation">Multiple swipes per generation</small>
|
||||
<input type="number" id="n_textgenerationwebui" class="text_pole textAlignCenter" min="1" value="1" step="1" />
|
||||
</div>
|
||||
|
@ -1399,8 +1399,8 @@
|
|||
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Ban the eos_token. This forces the model to never end the generation prematurely" title="Ban the eos_token. This forces the model to never end the generation prematurely."></div>
|
||||
</label>
|
||||
</label>
|
||||
<label data-tg-type="aphrodite" class="checkbox_label" for="ignore_eos_token_aphrodite_textgenerationwebui">
|
||||
<input type="checkbox" id="ignore_eos_token_aphrodite_textgenerationwebui" />
|
||||
<label data-tg-type="vllm, aphrodite" class="checkbox_label" for="ignore_eos_token_textgenerationwebui">
|
||||
<input type="checkbox" id="ignore_eos_token_textgenerationwebui" />
|
||||
<small data-i18n="Ignore EOS Token">Ignore EOS Token
|
||||
<div class="fa-solid fa-circle-info opacity50p " data-i18n="Ignore the EOS Token even if it generates." title="Ignore the EOS Token even if it generates."></div>
|
||||
</small>
|
||||
|
@ -1417,8 +1417,8 @@
|
|||
</label>
|
||||
</label>
|
||||
|
||||
<label data-tg-type="aphrodite" class="checkbox_label" for="spaces_between_special_tokens_aphrodite_textgenerationwebui">
|
||||
<input type="checkbox" id="spaces_between_special_tokens_aphrodite_textgenerationwebui" />
|
||||
<label data-tg-type="vllm, aphrodite" class="checkbox_label" for="spaces_between_special_tokens_textgenerationwebui">
|
||||
<input type="checkbox" id="spaces_between_special_tokens_textgenerationwebui" />
|
||||
<small data-i18n="Spaces Between Special Tokens">Spaces Between Special Tokens</small>
|
||||
</label>
|
||||
</div>
|
||||
|
@ -1948,7 +1948,8 @@
|
|||
<div>
|
||||
<h4 data-i18n="API Type">API Type</h4>
|
||||
<select id="textgen_type">
|
||||
<option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, vLLM, LM Studio, etc.]</option>
|
||||
<option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, LM Studio, etc.]</option>
|
||||
<option value="vllm">vLLM</option>
|
||||
<option value="aphrodite">Aphrodite</option>
|
||||
<option value="dreamgen">DreamGen</option>
|
||||
<option value="infermaticai">InfermaticAI</option>
|
||||
|
@ -2098,6 +2099,36 @@
|
|||
</div>
|
||||
<input id="custom_model_textgenerationwebui" class="text_pole wide100p" maxlength="500" placeholder="Custom model (optional)" data-i18n="[placeholder]Custom model (optional)" type="text">
|
||||
</div>
|
||||
<div data-tg-type="vllm">
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<a href="https://github.com/vllm-project/vllm" target="_blank" data-i18n="vllm-project/vllm">
|
||||
vllm-project/vllm (OpenAI API wrapper mode)
|
||||
</a>
|
||||
</div>
|
||||
<h4 data-i18n="vLLM API key">vLLM API key</h4>
|
||||
<div class="flex-container">
|
||||
<input id="api_key_vllm" name="api_key_vllm" class="text_pole flex1 wide100p" maxlength="500" size="35" type="text" autocomplete="off">
|
||||
<div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_vllm">
|
||||
</div>
|
||||
</div>
|
||||
<div data-for="api_key_vllm" class="neutral_warning" data-i18n="For privacy reasons, your API key will be hidden after you reload the page.">
|
||||
For privacy reasons, your API key will be hidden after you reload the page.
|
||||
</div>
|
||||
<div class="flex1">
|
||||
<h4 data-i18n="API url">API URL</h4>
|
||||
<small data-i18n="Example: 127.0.0.1:8000">Example: http://127.0.0.1:8000</small>
|
||||
<input id="vllm_api_url_text" class="text_pole wide100p" maxlength="500" value="" autocomplete="off" data-server-history="vllm">
|
||||
</div>
|
||||
<div>
|
||||
<h4 data-i18n="vLLM Model">vLLM Model</h4>
|
||||
<select id="vllm_model">
|
||||
<option data-i18n="-- Connect to the API --">
|
||||
-- Connect to the API --
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div data-tg-type="aphrodite">
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<a href="https://github.com/PygmalionAI/aphrodite-engine" target="_blank" data-i18n="PygmalionAI/aphrodite-engine">
|
||||
|
@ -2218,7 +2249,7 @@
|
|||
</div>
|
||||
</div>
|
||||
<div class="flex-container">
|
||||
<div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,aphrodite,tabby,koboldcpp">Connect</div>
|
||||
<div id="api_button_textgenerationwebui" class="api_button menu_button" type="submit" data-i18n="Connect" data-server-connect="ooba_blocking,vllm,aphrodite,tabby,koboldcpp">Connect</div>
|
||||
<div data-tg-type="openrouter" class="menu_button menu_button_icon openrouter_authorize" title="Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai" data-i18n="Authorize;[title]Get your OpenRouter API token using OAuth flow. You will be redirected to openrouter.ai">Authorize</div>
|
||||
<div class="api_loading menu_button" data-i18n="Cancel">Cancel</div>
|
||||
</div>
|
||||
|
|
|
@ -22,7 +22,7 @@ import {
|
|||
parseTabbyLogprobs,
|
||||
} from './scripts/textgen-settings.js';
|
||||
|
||||
const { MANCER, TOGETHERAI, OOBA, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
|
||||
const { MANCER, TOGETHERAI, OOBA, VLLM, APHRODITE, OLLAMA, INFERMATICAI, DREAMGEN, OPENROUTER } = textgen_types;
|
||||
|
||||
import {
|
||||
world_info,
|
||||
|
@ -218,7 +218,7 @@ import {
|
|||
import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_settings } from './scripts/backgrounds.js';
|
||||
import { hideLoader, showLoader } from './scripts/loader.js';
|
||||
import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js';
|
||||
import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
|
||||
import { loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels } from './scripts/textgen-models.js';
|
||||
import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId } from './scripts/chats.js';
|
||||
import { initPresetManager } from './scripts/preset-manager.js';
|
||||
import { evaluateMacros } from './scripts/macros.js';
|
||||
|
@ -1071,6 +1071,9 @@ async function getStatusTextgen() {
|
|||
} else if (textgen_settings.type === OPENROUTER) {
|
||||
loadOpenRouterModels(data?.data);
|
||||
online_status = textgen_settings.openrouter_model;
|
||||
} else if (textgen_settings.type === VLLM) {
|
||||
loadVllmModels(data?.data);
|
||||
online_status = textgen_settings.vllm_model;
|
||||
} else if (textgen_settings.type === APHRODITE) {
|
||||
loadAphroditeModels(data?.data);
|
||||
online_status = textgen_settings.aphrodite_model;
|
||||
|
@ -4832,6 +4835,7 @@ function parseAndSaveLogprobs(data, continueFrom) {
|
|||
case textgen_types.LLAMACPP: {
|
||||
logprobs = data?.completion_probabilities?.map(x => parseTextgenLogprobs(x.content, [x])) || null;
|
||||
} break;
|
||||
case textgen_types.VLLM:
|
||||
case textgen_types.APHRODITE:
|
||||
case textgen_types.MANCER:
|
||||
case textgen_types.TABBY: {
|
||||
|
@ -4888,7 +4892,7 @@ function extractMultiSwipes(data, type) {
|
|||
return swipes;
|
||||
}
|
||||
|
||||
if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, APHRODITE].includes(textgen_settings.type))) {
|
||||
if (main_api === 'openai' || (main_api === 'textgenerationwebui' && [MANCER, VLLM, APHRODITE].includes(textgen_settings.type))) {
|
||||
if (!Array.isArray(data.choices)) {
|
||||
return swipes;
|
||||
}
|
||||
|
@ -7922,6 +7926,11 @@ const CONNECT_API_MAP = {
|
|||
button: '#api_button_textgenerationwebui',
|
||||
type: textgen_types.MANCER,
|
||||
},
|
||||
'vllm': {
|
||||
selected: 'textgenerationwebui',
|
||||
button: '#api_button_textgenerationwebui',
|
||||
type: textgen_types.VLLM,
|
||||
},
|
||||
'aphrodite': {
|
||||
selected: 'textgenerationwebui',
|
||||
button: '#api_button_textgenerationwebui',
|
||||
|
@ -8896,6 +8905,7 @@ jQuery(async function () {
|
|||
$('#api_button_textgenerationwebui').on('click', async function (e) {
|
||||
const keys = [
|
||||
{ id: 'api_key_mancer', secret: SECRET_KEYS.MANCER },
|
||||
{ id: 'api_key_vllm', secret: SECRET_KEYS.VLLM },
|
||||
{ id: 'api_key_aphrodite', secret: SECRET_KEYS.APHRODITE },
|
||||
{ id: 'api_key_tabby', secret: SECRET_KEYS.TABBY },
|
||||
{ id: 'api_key_togetherai', secret: SECRET_KEYS.TOGETHERAI },
|
||||
|
|
|
@ -309,6 +309,7 @@ class PresetManager {
|
|||
'mancer_model',
|
||||
'togetherai_model',
|
||||
'ollama_model',
|
||||
'vllm_model',
|
||||
'aphrodite_model',
|
||||
'server_urls',
|
||||
'type',
|
||||
|
|
|
@ -3,6 +3,7 @@ import { callPopup, getRequestHeaders } from '../script.js';
|
|||
export const SECRET_KEYS = {
|
||||
HORDE: 'api_key_horde',
|
||||
MANCER: 'api_key_mancer',
|
||||
VLLM: 'api_key_vllm',
|
||||
APHRODITE: 'api_key_aphrodite',
|
||||
TABBY: 'api_key_tabby',
|
||||
OPENAI: 'api_key_openai',
|
||||
|
@ -38,6 +39,7 @@ const INPUT_MAP = {
|
|||
[SECRET_KEYS.AI21]: '#api_key_ai21',
|
||||
[SECRET_KEYS.SCALE_COOKIE]: '#scale_cookie',
|
||||
[SECRET_KEYS.MAKERSUITE]: '#api_key_makersuite',
|
||||
[SECRET_KEYS.VLLM]: '#api_key_vllm',
|
||||
[SECRET_KEYS.APHRODITE]: '#api_key_aphrodite',
|
||||
[SECRET_KEYS.TABBY]: '#api_key_tabby',
|
||||
[SECRET_KEYS.MISTRALAI]: '#api_key_mistralai',
|
||||
|
|
|
@ -1665,6 +1665,7 @@ function modelCallback(_, model) {
|
|||
{ id: 'model_infermaticai_select', api: 'textgenerationwebui', type: textgen_types.INFERMATICAI },
|
||||
{ id: 'model_dreamgen_select', api: 'textgenerationwebui', type: textgen_types.DREAMGEN },
|
||||
{ id: 'mancer_model', api: 'textgenerationwebui', type: textgen_types.MANCER },
|
||||
{ id: 'vllm_model', api: 'textgenerationwebui', type: textgen_types.VLLM },
|
||||
{ id: 'aphrodite_model', api: 'textgenerationwebui', type: textgen_types.APHRODITE },
|
||||
{ id: 'ollama_model', api: 'textgenerationwebui', type: textgen_types.OLLAMA },
|
||||
{ id: 'model_openai_select', api: 'openai', type: chat_completion_sources.OPENAI },
|
||||
|
|
|
@ -7,6 +7,7 @@ let mancerModels = [];
|
|||
let togetherModels = [];
|
||||
let infermaticAIModels = [];
|
||||
let dreamGenModels = [];
|
||||
let vllmModels = [];
|
||||
let aphroditeModels = [];
|
||||
export let openRouterModels = [];
|
||||
|
||||
|
@ -156,6 +157,28 @@ export async function loadOpenRouterModels(data) {
|
|||
calculateOpenRouterCost();
|
||||
}
|
||||
|
||||
export async function loadVllmModels(data) {
|
||||
if (!Array.isArray(data)) {
|
||||
console.error('Invalid vLLM models data', data);
|
||||
return;
|
||||
}
|
||||
|
||||
vllmModels = data;
|
||||
|
||||
if (!data.find(x => x.id === textgen_settings.vllm_model)) {
|
||||
textgen_settings.vllm_model = data[0]?.id || '';
|
||||
}
|
||||
|
||||
$('#vllm_model').empty();
|
||||
for (const model of data) {
|
||||
const option = document.createElement('option');
|
||||
option.value = model.id;
|
||||
option.text = model.id;
|
||||
option.selected = model.id === textgen_settings.vllm_model;
|
||||
$('#vllm_model').append(option);
|
||||
}
|
||||
}
|
||||
|
||||
export async function loadAphroditeModels(data) {
|
||||
if (!Array.isArray(data)) {
|
||||
console.error('Invalid Aphrodite models data', data);
|
||||
|
@ -224,6 +247,12 @@ function onOpenRouterModelSelect() {
|
|||
setGenerationParamsFromPreset({ max_length: model.context_length });
|
||||
}
|
||||
|
||||
function onVllmModelSelect() {
|
||||
const modelId = String($('#vllm_model').val());
|
||||
textgen_settings.vllm_model = modelId;
|
||||
$('#api_button_textgenerationwebui').trigger('click');
|
||||
}
|
||||
|
||||
function onAphroditeModelSelect() {
|
||||
const modelId = String($('#aphrodite_model').val());
|
||||
textgen_settings.aphrodite_model = modelId;
|
||||
|
@ -310,6 +339,20 @@ function getOpenRouterModelTemplate(option) {
|
|||
`));
|
||||
}
|
||||
|
||||
function getVllmModelTemplate(option) {
|
||||
const model = vllmModels.find(x => x.id === option?.element?.value);
|
||||
|
||||
if (!option.id || !model) {
|
||||
return option.text;
|
||||
}
|
||||
|
||||
return $((`
|
||||
<div class="flex-container flexFlowColumn">
|
||||
<div><strong>${DOMPurify.sanitize(model.id)}</strong></div>
|
||||
</div>
|
||||
`));
|
||||
}
|
||||
|
||||
function getAphroditeModelTemplate(option) {
|
||||
const model = aphroditeModels.find(x => x.id === option?.element?.value);
|
||||
|
||||
|
@ -426,6 +469,7 @@ jQuery(function () {
|
|||
$('#ollama_model').on('change', onOllamaModelSelect);
|
||||
$('#openrouter_model').on('change', onOpenRouterModelSelect);
|
||||
$('#ollama_download_model').on('click', downloadOllamaModel);
|
||||
$('#vllm_model').on('change', onVllmModelSelect);
|
||||
$('#aphrodite_model').on('change', onAphroditeModelSelect);
|
||||
|
||||
if (!isMobile()) {
|
||||
|
@ -470,6 +514,13 @@ jQuery(function () {
|
|||
width: '100%',
|
||||
templateResult: getOpenRouterModelTemplate,
|
||||
});
|
||||
$('#vllm_model').select2({
|
||||
placeholder: 'Select a model',
|
||||
searchInputPlaceholder: 'Search models...',
|
||||
searchInputCssClass: 'text_pole',
|
||||
width: '100%',
|
||||
templateResult: getVllmModelTemplate,
|
||||
});
|
||||
$('#aphrodite_model').select2({
|
||||
placeholder: 'Select a model',
|
||||
searchInputPlaceholder: 'Search models...',
|
||||
|
|
|
@ -28,6 +28,7 @@ export {
|
|||
export const textgen_types = {
|
||||
OOBA: 'ooba',
|
||||
MANCER: 'mancer',
|
||||
VLLM: 'vllm',
|
||||
APHRODITE: 'aphrodite',
|
||||
TABBY: 'tabby',
|
||||
KOBOLDCPP: 'koboldcpp',
|
||||
|
@ -39,7 +40,7 @@ export const textgen_types = {
|
|||
OPENROUTER: 'openrouter',
|
||||
};
|
||||
|
||||
const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
|
||||
const { MANCER, VLLM, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
|
||||
|
||||
const LLAMACPP_DEFAULT_ORDER = [
|
||||
'top_k',
|
||||
|
@ -77,6 +78,7 @@ let OPENROUTER_SERVER = 'https://openrouter.ai/api';
|
|||
|
||||
const SERVER_INPUTS = {
|
||||
[textgen_types.OOBA]: '#textgenerationwebui_api_url_text',
|
||||
[textgen_types.VLLM]: '#vllm_api_url_text',
|
||||
[textgen_types.APHRODITE]: '#aphrodite_api_url_text',
|
||||
[textgen_types.TABBY]: '#tabby_api_url_text',
|
||||
[textgen_types.KOBOLDCPP]: '#koboldcpp_api_url_text',
|
||||
|
@ -135,8 +137,8 @@ const settings = {
|
|||
samplers: LLAMACPP_DEFAULT_ORDER,
|
||||
//n_aphrodite: 1,
|
||||
//best_of_aphrodite: 1,
|
||||
ignore_eos_token_aphrodite: false,
|
||||
spaces_between_special_tokens_aphrodite: true,
|
||||
ignore_eos_token: false,
|
||||
spaces_between_special_tokens: true,
|
||||
//logits_processors_aphrodite: [],
|
||||
//log_probs_aphrodite: 0,
|
||||
//prompt_log_probs_aphrodite: 0,
|
||||
|
@ -146,6 +148,7 @@ const settings = {
|
|||
infermaticai_model: '',
|
||||
ollama_model: '',
|
||||
openrouter_model: 'openrouter/auto',
|
||||
vllm_model: '',
|
||||
aphrodite_model: '',
|
||||
dreamgen_model: 'opus-v1-xl/text',
|
||||
legacy_api: false,
|
||||
|
@ -208,8 +211,8 @@ const setting_names = [
|
|||
'legacy_api',
|
||||
//'n_aphrodite',
|
||||
//'best_of_aphrodite',
|
||||
'ignore_eos_token_aphrodite',
|
||||
'spaces_between_special_tokens_aphrodite',
|
||||
'ignore_eos_token',
|
||||
'spaces_between_special_tokens',
|
||||
//'logits_processors_aphrodite',
|
||||
//'log_probs_aphrodite',
|
||||
//'prompt_log_probs_aphrodite'
|
||||
|
@ -587,15 +590,22 @@ jQuery(function () {
|
|||
const type = String($(this).val());
|
||||
settings.type = type;
|
||||
|
||||
if (settings.type === textgen_types.VLLM || settings.type === textgen_types.APHRODITE) {
|
||||
if (settings.type === textgen_types.APHRODITE) {
|
||||
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
||||
$('[data-forAphro="False"]').each(function () {
|
||||
$(this).hide();
|
||||
});
|
||||
} else {
|
||||
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
||||
$('[data-forAphro="False"]').each(function () {
|
||||
$(this).show();
|
||||
});
|
||||
}
|
||||
$('#mirostat_mode_textgenerationwebui').attr('step', 2); //Aphro disallows mode 1
|
||||
$('#do_sample_textgenerationwebui').prop('checked', true); //Aphro should always do sample; 'otherwise set temp to 0 to mimic no sample'
|
||||
$('#ban_eos_token_textgenerationwebui').prop('checked', false); //Aphro should not ban EOS, just ignore it; 'add token '2' to ban list do to this'
|
||||
//special handling for Aphrodite topK -1 disable state
|
||||
//special handling for vLLM/Aphrodite topK -1 disable state
|
||||
$('#top_k_textgenerationwebui').attr('min', -1);
|
||||
if ($('#top_k_textgenerationwebui').val() === '0' || settings['top_k'] === 0) {
|
||||
settings['top_k'] = -1;
|
||||
|
@ -607,7 +617,7 @@ jQuery(function () {
|
|||
$(this).show();
|
||||
});
|
||||
$('#mirostat_mode_textgenerationwebui').attr('step', 1);
|
||||
//undo special Aphrodite setup for topK
|
||||
//undo special vLLM/Aphrodite setup for topK
|
||||
$('#top_k_textgenerationwebui').attr('min', 0);
|
||||
if ($('#top_k_textgenerationwebui').val() === '-1' || settings['top_k'] === -1) {
|
||||
settings['top_k'] = 0;
|
||||
|
@ -711,9 +721,10 @@ jQuery(function () {
|
|||
const value = Number($(this).val());
|
||||
$(`#${id}_counter_textgenerationwebui`).val(value);
|
||||
settings[id] = value;
|
||||
//special handling for aphrodite using -1 as disabled instead of 0
|
||||
//special handling for vLLM/Aphrodite using -1 as disabled instead of 0
|
||||
if ($(this).attr('id') === 'top_k_textgenerationwebui' &&
|
||||
settings.type === textgen_types.APHRODITE &&
|
||||
(settings.type === textgen_types.VLLM ||
|
||||
settings.type === textgen_types.APHRODITE) &&
|
||||
value === 0) {
|
||||
settings[id] = -1;
|
||||
$(this).val(-1);
|
||||
|
@ -869,6 +880,7 @@ export function parseTextgenLogprobs(token, logprobs) {
|
|||
|
||||
switch (settings.type) {
|
||||
case TABBY:
|
||||
case VLLM:
|
||||
case APHRODITE:
|
||||
case MANCER:
|
||||
case OOBA: {
|
||||
|
@ -964,6 +976,8 @@ function getModel() {
|
|||
return settings.dreamgen_model;
|
||||
case OPENROUTER:
|
||||
return settings.openrouter_model;
|
||||
case VLLM:
|
||||
return settings.vllm_model;
|
||||
case APHRODITE:
|
||||
return settings.aphrodite_model;
|
||||
case OLLAMA:
|
||||
|
@ -1061,11 +1075,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
|||
'ignore_eos': settings.ban_eos_token,
|
||||
'n_probs': power_user.request_token_probabilities ? 10 : undefined,
|
||||
};
|
||||
const vllmParams = {
|
||||
'n': canMultiSwipe ? settings.n : 1,
|
||||
'best_of': canMultiSwipe ? settings.n : 1,
|
||||
'ignore_eos': settings.ignore_eos_token,
|
||||
'spaces_between_special_tokens': settings.spaces_between_special_tokens,
|
||||
'seed': settings.seed,
|
||||
};
|
||||
const aphroditeParams = {
|
||||
'n': canMultiSwipe ? settings.n : 1,
|
||||
'best_of': canMultiSwipe ? settings.n : 1,
|
||||
'ignore_eos': settings.ignore_eos_token_aphrodite,
|
||||
'spaces_between_special_tokens': settings.spaces_between_special_tokens_aphrodite,
|
||||
'ignore_eos': settings.ignore_eos_token,
|
||||
'spaces_between_special_tokens': settings.spaces_between_special_tokens,
|
||||
'grammar': settings.grammar_string,
|
||||
//'logits_processors': settings.logits_processors_aphrodite,
|
||||
//'logprobs': settings.log_probs_aphrodite,
|
||||
|
@ -1087,10 +1108,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
|||
delete params.dynatemp_high;
|
||||
}
|
||||
|
||||
if (settings.type === APHRODITE) {
|
||||
switch (settings.type) {
|
||||
case VLLM:
|
||||
params = Object.assign(params, vllmParams);
|
||||
break;
|
||||
|
||||
case APHRODITE:
|
||||
params = Object.assign(params, aphroditeParams);
|
||||
} else {
|
||||
break;
|
||||
|
||||
default:
|
||||
params = Object.assign(params, nonAphroditeParams);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
|
||||
|
@ -1119,4 +1148,3 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
|||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
|
|
@ -67,6 +67,19 @@ function getOpenRouterHeaders(directories) {
|
|||
return apiKey ? Object.assign(baseHeaders, { 'Authorization': `Bearer ${apiKey}` }) : baseHeaders;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the headers for the vLLM API.
|
||||
* @param {import('./users').UserDirectoryList} directories User directories
|
||||
* @returns {object} Headers for the request
|
||||
*/
|
||||
function getVllmHeaders(directories) {
|
||||
const apiKey = readSecret(directories, SECRET_KEYS.VLLM);
|
||||
|
||||
return apiKey ? ({
|
||||
'Authorization': `Bearer ${apiKey}`,
|
||||
}) : {};
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the headers for the Aphrodite API.
|
||||
* @param {import('./users').UserDirectoryList} directories User directories
|
||||
|
@ -153,6 +166,7 @@ function getOverrideHeaders(urlHost) {
|
|||
function setAdditionalHeaders(request, args, server) {
|
||||
const headerGetters = {
|
||||
[TEXTGEN_TYPES.MANCER]: getMancerHeaders,
|
||||
[TEXTGEN_TYPES.VLLM]: getVllmHeaders,
|
||||
[TEXTGEN_TYPES.APHRODITE]: getAphroditeHeaders,
|
||||
[TEXTGEN_TYPES.TABBY]: getTabbyHeaders,
|
||||
[TEXTGEN_TYPES.TOGETHERAI]: getTogetherAIHeaders,
|
||||
|
|
|
@ -200,6 +200,7 @@ const UPLOADS_PATH = './uploads';
|
|||
const TEXTGEN_TYPES = {
|
||||
OOBA: 'ooba',
|
||||
MANCER: 'mancer',
|
||||
VLLM: 'vllm',
|
||||
APHRODITE: 'aphrodite',
|
||||
TABBY: 'tabby',
|
||||
KOBOLDCPP: 'koboldcpp',
|
||||
|
@ -298,6 +299,49 @@ const OPENROUTER_KEYS = [
|
|||
'stop',
|
||||
];
|
||||
|
||||
// https://github.com/vllm-project/vllm/blob/0f8a91401c89ac0a8018def3756829611b57727f/vllm/entrypoints/openai/protocol.py#L220
|
||||
const VLLM_KEYS = [
|
||||
'model',
|
||||
'prompt',
|
||||
'best_of',
|
||||
'echo',
|
||||
'frequency_penalty',
|
||||
'logit_bias',
|
||||
'logprobs',
|
||||
'max_tokens',
|
||||
'n',
|
||||
'presence_penalty',
|
||||
'seed',
|
||||
'stop',
|
||||
'stream',
|
||||
'suffix',
|
||||
'temperature',
|
||||
'top_p',
|
||||
'user',
|
||||
|
||||
'use_beam_search',
|
||||
'top_k',
|
||||
'min_p',
|
||||
'repetition_penalty',
|
||||
'length_penalty',
|
||||
'early_stopping',
|
||||
'stop_token_ids',
|
||||
'ignore_eos',
|
||||
'min_tokens',
|
||||
'skip_special_tokens',
|
||||
'spaces_between_special_tokens',
|
||||
'truncate_prompt_tokens',
|
||||
|
||||
'include_stop_str_in_output',
|
||||
'response_format',
|
||||
'guided_json',
|
||||
'guided_regex',
|
||||
'guided_choice',
|
||||
'guided_grammar',
|
||||
'guided_decoding_backend',
|
||||
'guided_whitespace_pattern',
|
||||
];
|
||||
|
||||
module.exports = {
|
||||
DEFAULT_USER,
|
||||
DEFAULT_AVATAR,
|
||||
|
@ -318,4 +362,5 @@ module.exports = {
|
|||
DREAMGEN_KEYS,
|
||||
OPENROUTER_HEADERS,
|
||||
OPENROUTER_KEYS,
|
||||
VLLM_KEYS,
|
||||
};
|
||||
|
|
|
@ -4,7 +4,7 @@ const _ = require('lodash');
|
|||
const Readable = require('stream').Readable;
|
||||
|
||||
const { jsonParser } = require('../../express-common');
|
||||
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, DREAMGEN_KEYS } = require('../../constants');
|
||||
const { TEXTGEN_TYPES, TOGETHERAI_KEYS, OLLAMA_KEYS, INFERMATICAI_KEYS, OPENROUTER_KEYS, VLLM_KEYS, DREAMGEN_KEYS } = require('../../constants');
|
||||
const { forwardFetchResponse, trimV1 } = require('../../util');
|
||||
const { setAdditionalHeaders } = require('../../additional-headers');
|
||||
|
||||
|
@ -103,6 +103,7 @@ router.post('/status', jsonParser, async function (request, response) {
|
|||
} else {
|
||||
switch (request.body.api_type) {
|
||||
case TEXTGEN_TYPES.OOBA:
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
case TEXTGEN_TYPES.KOBOLDCPP:
|
||||
case TEXTGEN_TYPES.LLAMACPP:
|
||||
|
@ -233,6 +234,7 @@ router.post('/generate', jsonParser, async function (request, response) {
|
|||
url += '/v1/generate';
|
||||
} else {
|
||||
switch (request.body.api_type) {
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
case TEXTGEN_TYPES.OOBA:
|
||||
case TEXTGEN_TYPES.TABBY:
|
||||
|
@ -291,6 +293,11 @@ router.post('/generate', jsonParser, async function (request, response) {
|
|||
args.body = JSON.stringify(request.body);
|
||||
}
|
||||
|
||||
if (request.body.api_type === TEXTGEN_TYPES.VLLM) {
|
||||
request.body = _.pickBy(request.body, (_, key) => VLLM_KEYS.includes(key));
|
||||
args.body = JSON.stringify(request.body);
|
||||
}
|
||||
|
||||
if (request.body.api_type === TEXTGEN_TYPES.OLLAMA) {
|
||||
args.body = JSON.stringify({
|
||||
model: request.body.model,
|
||||
|
|
|
@ -9,6 +9,7 @@ const SECRETS_FILE = 'secrets.json';
|
|||
const SECRET_KEYS = {
|
||||
HORDE: 'api_key_horde',
|
||||
MANCER: 'api_key_mancer',
|
||||
VLLM: 'api_key_vllm',
|
||||
APHRODITE: 'api_key_aphrodite',
|
||||
TABBY: 'api_key_tabby',
|
||||
OPENAI: 'api_key_openai',
|
||||
|
|
|
@ -720,6 +720,8 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
|||
url += '/tokenize';
|
||||
args.body = JSON.stringify({ 'content': text });
|
||||
break;
|
||||
case TEXTGEN_TYPES.VLLM:
|
||||
return response.send({ error: true });
|
||||
case TEXTGEN_TYPES.APHRODITE:
|
||||
url += '/v1/tokenize';
|
||||
args.body = JSON.stringify({ 'prompt': text });
|
||||
|
|
Loading…
Reference in New Issue