mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Initial vLLM support
This commit is contained in:
@@ -28,6 +28,7 @@ export {
|
||||
export const textgen_types = {
|
||||
OOBA: 'ooba',
|
||||
MANCER: 'mancer',
|
||||
VLLM: 'vllm',
|
||||
APHRODITE: 'aphrodite',
|
||||
TABBY: 'tabby',
|
||||
KOBOLDCPP: 'koboldcpp',
|
||||
@@ -39,7 +40,7 @@ export const textgen_types = {
|
||||
OPENROUTER: 'openrouter',
|
||||
};
|
||||
|
||||
const { MANCER, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
|
||||
const { MANCER, VLLM, APHRODITE, TABBY, TOGETHERAI, OOBA, OLLAMA, LLAMACPP, INFERMATICAI, DREAMGEN, OPENROUTER, KOBOLDCPP } = textgen_types;
|
||||
|
||||
const LLAMACPP_DEFAULT_ORDER = [
|
||||
'top_k',
|
||||
@@ -77,6 +78,7 @@ let OPENROUTER_SERVER = 'https://openrouter.ai/api';
|
||||
|
||||
const SERVER_INPUTS = {
|
||||
[textgen_types.OOBA]: '#textgenerationwebui_api_url_text',
|
||||
[textgen_types.VLLM]: '#vllm_api_url_text',
|
||||
[textgen_types.APHRODITE]: '#aphrodite_api_url_text',
|
||||
[textgen_types.TABBY]: '#tabby_api_url_text',
|
||||
[textgen_types.KOBOLDCPP]: '#koboldcpp_api_url_text',
|
||||
@@ -135,8 +137,8 @@ const settings = {
|
||||
samplers: LLAMACPP_DEFAULT_ORDER,
|
||||
//n_aphrodite: 1,
|
||||
//best_of_aphrodite: 1,
|
||||
ignore_eos_token_aphrodite: false,
|
||||
spaces_between_special_tokens_aphrodite: true,
|
||||
ignore_eos_token: false,
|
||||
spaces_between_special_tokens: true,
|
||||
//logits_processors_aphrodite: [],
|
||||
//log_probs_aphrodite: 0,
|
||||
//prompt_log_probs_aphrodite: 0,
|
||||
@@ -146,6 +148,7 @@ const settings = {
|
||||
infermaticai_model: '',
|
||||
ollama_model: '',
|
||||
openrouter_model: 'openrouter/auto',
|
||||
vllm_model: '',
|
||||
aphrodite_model: '',
|
||||
dreamgen_model: 'opus-v1-xl/text',
|
||||
legacy_api: false,
|
||||
@@ -208,8 +211,8 @@ const setting_names = [
|
||||
'legacy_api',
|
||||
//'n_aphrodite',
|
||||
//'best_of_aphrodite',
|
||||
'ignore_eos_token_aphrodite',
|
||||
'spaces_between_special_tokens_aphrodite',
|
||||
'ignore_eos_token',
|
||||
'spaces_between_special_tokens',
|
||||
//'logits_processors_aphrodite',
|
||||
//'log_probs_aphrodite',
|
||||
//'prompt_log_probs_aphrodite'
|
||||
@@ -587,15 +590,22 @@ jQuery(function () {
|
||||
const type = String($(this).val());
|
||||
settings.type = type;
|
||||
|
||||
if (settings.type === textgen_types.APHRODITE) {
|
||||
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
||||
$('[data-forAphro="False"]').each(function () {
|
||||
$(this).hide();
|
||||
});
|
||||
if (settings.type === textgen_types.VLLM || settings.type === textgen_types.APHRODITE) {
|
||||
if (settings.type === textgen_types.APHRODITE) {
|
||||
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
||||
$('[data-forAphro="False"]').each(function () {
|
||||
$(this).hide();
|
||||
});
|
||||
} else {
|
||||
//this is needed because showTypeSpecificControls() does not handle NOT declarations
|
||||
$('[data-forAphro="False"]').each(function () {
|
||||
$(this).show();
|
||||
});
|
||||
}
|
||||
$('#mirostat_mode_textgenerationwebui').attr('step', 2); //Aphro disallows mode 1
|
||||
$('#do_sample_textgenerationwebui').prop('checked', true); //Aphro should always do sample; 'otherwise set temp to 0 to mimic no sample'
|
||||
$('#ban_eos_token_textgenerationwebui').prop('checked', false); //Aphro should not ban EOS, just ignore it; 'add token '2' to ban list do to this'
|
||||
//special handling for Aphrodite topK -1 disable state
|
||||
//special handling for vLLM/Aphrodite topK -1 disable state
|
||||
$('#top_k_textgenerationwebui').attr('min', -1);
|
||||
if ($('#top_k_textgenerationwebui').val() === '0' || settings['top_k'] === 0) {
|
||||
settings['top_k'] = -1;
|
||||
@@ -607,7 +617,7 @@ jQuery(function () {
|
||||
$(this).show();
|
||||
});
|
||||
$('#mirostat_mode_textgenerationwebui').attr('step', 1);
|
||||
//undo special Aphrodite setup for topK
|
||||
//undo special vLLM/Aphrodite setup for topK
|
||||
$('#top_k_textgenerationwebui').attr('min', 0);
|
||||
if ($('#top_k_textgenerationwebui').val() === '-1' || settings['top_k'] === -1) {
|
||||
settings['top_k'] = 0;
|
||||
@@ -711,9 +721,10 @@ jQuery(function () {
|
||||
const value = Number($(this).val());
|
||||
$(`#${id}_counter_textgenerationwebui`).val(value);
|
||||
settings[id] = value;
|
||||
//special handling for aphrodite using -1 as disabled instead of 0
|
||||
//special handling for vLLM/Aphrodite using -1 as disabled instead of 0
|
||||
if ($(this).attr('id') === 'top_k_textgenerationwebui' &&
|
||||
settings.type === textgen_types.APHRODITE &&
|
||||
(settings.type === textgen_types.VLLM ||
|
||||
settings.type === textgen_types.APHRODITE) &&
|
||||
value === 0) {
|
||||
settings[id] = -1;
|
||||
$(this).val(-1);
|
||||
@@ -869,6 +880,7 @@ export function parseTextgenLogprobs(token, logprobs) {
|
||||
|
||||
switch (settings.type) {
|
||||
case TABBY:
|
||||
case VLLM:
|
||||
case APHRODITE:
|
||||
case MANCER:
|
||||
case OOBA: {
|
||||
@@ -964,6 +976,8 @@ function getModel() {
|
||||
return settings.dreamgen_model;
|
||||
case OPENROUTER:
|
||||
return settings.openrouter_model;
|
||||
case VLLM:
|
||||
return settings.vllm_model;
|
||||
case APHRODITE:
|
||||
return settings.aphrodite_model;
|
||||
case OLLAMA:
|
||||
@@ -1061,11 +1075,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
||||
'ignore_eos': settings.ban_eos_token,
|
||||
'n_probs': power_user.request_token_probabilities ? 10 : undefined,
|
||||
};
|
||||
const vllmParams = {
|
||||
'n': canMultiSwipe ? settings.n : 1,
|
||||
'best_of': canMultiSwipe ? settings.n : 1,
|
||||
'ignore_eos': settings.ignore_eos_token,
|
||||
'spaces_between_special_tokens': settings.spaces_between_special_tokens,
|
||||
'seed': settings.seed,
|
||||
};
|
||||
const aphroditeParams = {
|
||||
'n': canMultiSwipe ? settings.n : 1,
|
||||
'best_of': canMultiSwipe ? settings.n : 1,
|
||||
'ignore_eos': settings.ignore_eos_token_aphrodite,
|
||||
'spaces_between_special_tokens': settings.spaces_between_special_tokens_aphrodite,
|
||||
'ignore_eos': settings.ignore_eos_token,
|
||||
'spaces_between_special_tokens': settings.spaces_between_special_tokens,
|
||||
'grammar': settings.grammar_string,
|
||||
//'logits_processors': settings.logits_processors_aphrodite,
|
||||
//'logprobs': settings.log_probs_aphrodite,
|
||||
@@ -1087,10 +1108,18 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
||||
delete params.dynatemp_high;
|
||||
}
|
||||
|
||||
if (settings.type === APHRODITE) {
|
||||
params = Object.assign(params, aphroditeParams);
|
||||
} else {
|
||||
params = Object.assign(params, nonAphroditeParams);
|
||||
switch (settings.type) {
|
||||
case VLLM:
|
||||
params = Object.assign(params, vllmParams);
|
||||
break;
|
||||
|
||||
case APHRODITE:
|
||||
params = Object.assign(params, aphroditeParams);
|
||||
break;
|
||||
|
||||
default:
|
||||
params = Object.assign(params, nonAphroditeParams);
|
||||
break;
|
||||
}
|
||||
|
||||
if (Array.isArray(settings.logit_bias) && settings.logit_bias.length) {
|
||||
@@ -1119,4 +1148,3 @@ export function getTextGenGenerationData(finalPrompt, maxTokens, isImpersonate,
|
||||
|
||||
return params;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user