Merge pull request #3181 from SillyTavern/tc-split-generic

Add generic text completion API type (100% OAI compatible)
This commit is contained in:
Cohee 2024-12-14 16:05:44 +02:00 committed by GitHub
commit 8811010c73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 117 additions and 12 deletions

View File

@ -1235,7 +1235,8 @@
<input class="neo-range-slider" type="range" id="temp_textgenerationwebui" name="volume" min="0.0" max="5.0" step="0.01" x-setting-id="temp">
<input class="neo-range-input" type="number" min="0.0" max="5.0" step="0.01" data-for="temp_textgenerationwebui" id="temp_counter_textgenerationwebui">
</div>
<div class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<!-- Note: "except" mode = show for all BUT types in data-tg-type -->
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small>
<span data-i18n="Top K">Top K</span>
<div class="fa-solid fa-circle-info opacity50p" title="Top K sets a maximum amount of top tokens that can be chosen from.&#13;E.g Top K is 20, this means only the 20 highest ranking tokens will be kept (regardless of their probabilities being diverse or limited).&#13;Set to 0 (or -1, depending on your backend) to disable." data-i18n="[title]Top_K_desc"></div>
@ -1251,7 +1252,7 @@
<input class="neo-range-slider" type="range" id="top_p_textgenerationwebui" name="volume" min="0" max="1" step="0.01">
<input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="top_p_textgenerationwebui" id="top_p_counter_textgenerationwebui">
</div>
<div class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small>
<span data-i18n="Typical P">Typical P</span>
<div class="fa-solid fa-circle-info opacity50p" title="Typical P Sampling prioritizes tokens based on their deviation from the average entropy of the set.&#13;It maintains tokens whose cumulative probability is close to a predefined threshold (e.g., 0.5), emphasizing those with average information content.&#13;Set to 1.0 to disable." data-i18n="[title]Typical_P_desc"></div>
@ -1259,7 +1260,7 @@
<input class="neo-range-slider" type="range" id="typical_p_textgenerationwebui" name="volume" min="0" max="1" step="0.01">
<input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="typical_p_textgenerationwebui" id="typical_p_counter_textgenerationwebui">
</div>
<div class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small>
<span data-i18n="Min P">Min P</span>
<div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Min_P_desc" title="Min P sets a base minimum probability. This is scaled according to the top token's probability.&#13;E.g If Top token is 80% probability, and Min P is 0.1, only tokens higher than 8% would be considered.&#13;Set to 0 to disable."></div>
@ -1267,7 +1268,7 @@
<input class="neo-range-slider" type="range" id="min_p_textgenerationwebui" name="volume" min="0" max="1" step="0.001">
<input class="neo-range-input" type="number" min="0" max="1" step="0.001" data-for="min_p_textgenerationwebui" id="min_p_counter_textgenerationwebui">
</div>
<div class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small>
<span data-i18n="Top A">Top A</span>
<div class="fa-solid fa-circle-info opacity50p" title="Top A sets a threshold for token selection based on the square of the highest token probability.&#13;E.g if the Top-A value is 0.2 and the top token's probability is 50%, tokens with probabilities below 5% (0.2 * 0.5^2) are excluded.&#13;Set to 0 to disable." data-i18n="[title]Top_A_desc"></div>
@ -1275,7 +1276,7 @@
<input class="neo-range-slider" type="range" id="top_a_textgenerationwebui" name="volume" min="0" max="1" step="0.01">
<input class="neo-range-input" type="number" min="0" max="1" step="0.01" data-for="top_a_textgenerationwebui" id="top_a_counter_textgenerationwebui">
</div>
<div class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small>
<span data-i18n="TFS">TFS</span>
<div class="fa-solid fa-circle-info opacity50p" data-i18n="[title]Tail_Free_Sampling_desc" title="Tail-Free Sampling (TFS) searches for a tail of low-probability tokens in the distribution,&#13;by analyzing the rate of change in token probabilities using derivatives. It retains tokens up to a threshold (e.g., 0.3) based on the normalized second derivative.&#13;The closer to 0, the more discarded tokens. Set to 1.0 to disable."></div>
@ -1307,7 +1308,7 @@
<input class="neo-range-slider" type="range" id="eta_cutoff_textgenerationwebui" name="volume" min="0" max="20" step="0.01">
<input class="neo-range-input" type="number" min="0" max="20" step="0.01" data-for="eta_cutoff_textgenerationwebui" id="eta_cutoff_counter_textgenerationwebui">
</div>
<div class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<div data-tg-type-mode="except" data-tg-type="generic" class="alignitemscenter flex-container flexFlowColumn flexBasis30p flexGrow flexShrink gap0">
<small data-i18n="rep.pen">Repetition Penalty</small>
<input class="neo-range-slider" type="range" id="rep_pen_textgenerationwebui" name="volume" min="1" max="3" step="0.01">
<input class="neo-range-input" type="number" min="1" max="3" step="0.01" data-for="rep_pen_textgenerationwebui" id="rep_pen_counter_textgenerationwebui">
@ -1569,7 +1570,7 @@
<small data-i18n="Ignore EOS Token">Ignore EOS Token</small>
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Ignore the EOS Token even if it generates." title="Ignore the EOS Token even if it generates."></div>
</label>
<label class="checkbox_label flexGrow flexShrink" for="skip_special_tokens_textgenerationwebui">
<label data-tg-type-mode="except" data-tg-type="generic" class="checkbox_label flexGrow flexShrink" for="skip_special_tokens_textgenerationwebui">
<input type="checkbox" id="skip_special_tokens_textgenerationwebui" />
<small data-i18n="Skip Special Tokens">Skip Special Tokens</small>
</label>
@ -1594,14 +1595,14 @@
</label>
</div>
</div>
<div data-tg-type="mancer, ooba, koboldcpp, vllm, aphrodite, llamacpp, ollama, infermaticai, huggingface" class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
<div data-tg-type="mancer, ooba, koboldcpp, vllm, aphrodite, llamacpp, ollama, infermaticai, huggingface, generic" class="flex-container flexFlowColumn alignitemscenter flexBasis48p flexGrow flexShrink gap0">
<label>
<small data-i18n="Seed">Seed</small>
<div class="fa-solid fa-circle-info opacity50p " data-i18n="[title]Seed_desc" title="A random seed to use for deterministic and reproducable outputs. Set to -1 to use a random seed."></div>
</label>
<input type="number" id="seed_textgenerationwebui" class="text_pole textAlignCenter" min="-1" value="-1" />
</div>
<div id="banned_tokens_block_ooba" class="wide100p">
<div data-tg-type-mode="except" data-tg-type="generic" id="banned_tokens_block_ooba" class="wide100p">
<hr class="width100p">
<h4 class="range-block-title justifyCenter">
<span data-i18n="Banned Tokens">Banned Tokens/Strings</span>
@ -2187,10 +2188,10 @@
<div>
<h4 data-i18n="API Type">API Type</h4>
<select id="textgen_type">
<option value="ooba" data-i18n="Default (completions compatible)">Default [OpenAI /completions compatible: oobabooga, LM Studio, etc.]</option>
<option value="aphrodite">Aphrodite</option>
<option value="dreamgen">DreamGen</option>
<option value="featherless">Featherless</option>
<option value="generic" data-i18n="Generic (OpenAI-compatible) [LM Studio, LiteLLM, etc.]">Generic (OpenAI-compatible) [LM Studio, LiteLLM, etc.]</option>
<option value="huggingface">HuggingFace (Inference Endpoint)</option>
<option value="infermaticai">InfermaticAI</option>
<option value="koboldcpp">KoboldCpp</option>
@ -2199,6 +2200,7 @@
<option value="ollama">Ollama</option>
<option value="openrouter">OpenRouter</option>
<option value="tabby">TabbyAPI</option>
<option value="ooba">Text Generation WebUI (oobabooga)</option>
<option value="togetherai">TogetherAI</option>
<option value="vllm">vLLM</option>
</select>
@ -2321,6 +2323,24 @@
</select>
</div>
</div>
<div data-tg-type="generic" class="flex-container flexFlowColumn">
<h4 data-i18n="API key (optional)">API key (optional)</h4>
<div class="flex-container">
<input id="api_key_generic" name="api_key_generic" class="text_pole flex1 wide100p" type="text" autocomplete="off">
<div title="Clear your API key" data-i18n="[title]Clear your API key" class="menu_button fa-solid fa-circle-xmark clear-api-key" data-key="api_key_generic">
</div>
</div>
<div data-for="api_key_generic" class="neutral_warning" data-i18n="For privacy reasons, your API key will be hidden after you reload the page.">
For privacy reasons, your API key will be hidden after you reload the page.
</div>
<div class="flex1">
<h4 data-i18n="Server url">Server URL</h4>
<small data-i18n="Example: 127.0.0.1:5000">Example: http://127.0.0.1:5000</small>
<input id="generic_api_url_text" name="generic_api_url" class="text_pole wide100p" value="" autocomplete="off" data-server-history="generic">
</div>
<datalist id="generic_model_fill"></datalist>
<input id="generic_model_textgenerationwebui" list="generic_model_fill" class="text_pole wide100p" placeholder="Model ID (optional)" data-i18n="[placeholder]Model ID (optional)" type="text">
</div>
<div data-tg-type="ooba" class="flex-container flexFlowColumn">
<div class="flex-container flexFlowColumn">
<a href="https://github.com/oobabooga/text-generation-webui" target="_blank">

View File

@ -234,7 +234,7 @@ import {
import { getBackgrounds, initBackgrounds, loadBackgroundSettings, background_settings } from './scripts/backgrounds.js';
import { hideLoader, showLoader } from './scripts/loader.js';
import { BulkEditOverlay, CharacterContextMenu } from './scripts/BulkEditOverlay.js';
import { loadFeatherlessModels, loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels, initTextGenModels, loadTabbyModels } from './scripts/textgen-models.js';
import { loadFeatherlessModels, loadMancerModels, loadOllamaModels, loadTogetherAIModels, loadInfermaticAIModels, loadOpenRouterModels, loadVllmModels, loadAphroditeModels, loadDreamGenModels, initTextGenModels, loadTabbyModels, loadGenericModels } from './scripts/textgen-models.js';
import { appendFileContent, hasPendingFileAttachment, populateFileAttachment, decodeStyleTags, encodeStyleTags, isExternalMediaAllowed, getCurrentEntityId, preserveNeutralChat, restoreNeutralChat } from './scripts/chats.js';
import { getPresetManager, initPresetManager } from './scripts/preset-manager.js';
import { evaluateMacros, getLastMessageId, initMacros } from './scripts/macros.js';
@ -1221,6 +1221,9 @@ async function getStatusTextgen() {
} else if (textgen_settings.type === textgen_types.TABBY) {
loadTabbyModels(data?.data);
setOnlineStatus(textgen_settings.tabby_model || data?.result);
} else if (textgen_settings.type === textgen_types.GENERIC) {
loadGenericModels(data?.data);
setOnlineStatus(textgen_settings.generic_model || 'Connected');
} else {
setOnlineStatus(data?.result);
}
@ -10042,6 +10045,7 @@ jQuery(async function () {
{ id: 'api_key_llamacpp', secret: SECRET_KEYS.LLAMACPP },
{ id: 'api_key_featherless', secret: SECRET_KEYS.FEATHERLESS },
{ id: 'api_key_huggingface', secret: SECRET_KEYS.HUGGINGFACE },
{ id: 'api_key_generic', secret: SECRET_KEYS.GENERIC },
];
for (const key of keys) {

View File

@ -585,6 +585,7 @@ class PresetManager {
'openrouter_allow_fallbacks',
'tabby_model',
'derived',
'generic_model',
];
const settings = Object.assign({}, getSettingsByApiId(this.apiId));

View File

@ -38,6 +38,7 @@ export const SECRET_KEYS = {
NANOGPT: 'api_key_nanogpt',
TAVILY: 'api_key_tavily',
BFL: 'api_key_bfl',
GENERIC: 'api_key_generic',
};
const INPUT_MAP = {
@ -71,6 +72,7 @@ const INPUT_MAP = {
[SECRET_KEYS.HUGGINGFACE]: '#api_key_huggingface',
[SECRET_KEYS.BLOCKENTROPY]: '#api_key_blockentropy',
[SECRET_KEYS.NANOGPT]: '#api_key_nanogpt',
[SECRET_KEYS.GENERIC]: '#api_key_generic',
};
async function clearSecret() {

View File

@ -3743,6 +3743,7 @@ function setBackgroundCallback(_, bg) {
function getModelOptions(quiet) {
const nullResult = { control: null, options: null };
const modelSelectMap = [
{ id: 'generic_model_textgenerationwebui', api: 'textgenerationwebui', type: textgen_types.GENERIC },
{ id: 'custom_model_textgenerationwebui', api: 'textgenerationwebui', type: textgen_types.OOBA },
{ id: 'model_togetherai_select', api: 'textgenerationwebui', type: textgen_types.TOGETHERAI },
{ id: 'openrouter_model', api: 'textgenerationwebui', type: textgen_types.OPENROUTER },

View File

@ -160,6 +160,24 @@ export async function loadInfermaticAIModels(data) {
}
}
export function loadGenericModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid Generic models data', data);
return;
}
data.sort((a, b) => a.id.localeCompare(b.id));
const dataList = $('#generic_model_fill');
dataList.empty();
for (const model of data) {
const option = document.createElement('option');
option.value = model.id;
option.text = model.id;
dataList.append(option);
}
}
export async function loadDreamGenModels(data) {
if (!Array.isArray(data)) {
console.error('Invalid DreamGen models data', data);

View File

@ -33,9 +33,11 @@ export const textgen_types = {
OPENROUTER: 'openrouter',
FEATHERLESS: 'featherless',
HUGGINGFACE: 'huggingface',
GENERIC: 'generic',
};
const {
GENERIC,
MANCER,
VLLM,
APHRODITE,
@ -120,6 +122,7 @@ export const SERVER_INPUTS = {
[textgen_types.LLAMACPP]: '#llamacpp_api_url_text',
[textgen_types.OLLAMA]: '#ollama_api_url_text',
[textgen_types.HUGGINGFACE]: '#huggingface_api_url_text',
[textgen_types.GENERIC]: '#generic_api_url_text',
};
const KOBOLDCPP_ORDER = [6, 0, 1, 3, 4, 2, 5];
@ -205,6 +208,7 @@ const settings = {
xtc_probability: 0,
nsigma: 0.0,
featherless_model: '',
generic_model: '',
};
export {
@ -282,6 +286,7 @@ export const setting_names = [
'xtc_threshold',
'xtc_probability',
'nsigma',
'generic_model',
];
const DYNATEMP_BLOCK = document.getElementById('dynatemp_block_ooba');
@ -833,7 +838,14 @@ jQuery(function () {
function showTypeSpecificControls(type) {
$('[data-tg-type]').each(function () {
const mode = String($(this).attr('data-tg-type-mode') ?? '').toLowerCase().trim();
const tgTypes = $(this).attr('data-tg-type').split(',').map(x => x.trim());
if (mode === 'except') {
$(this)[tgTypes.includes(type) ? 'hide' : 'show']();
return;
}
for (const tgType of tgTypes) {
if (tgType === type || tgType == 'all') {
$(this).show();
@ -1100,6 +1112,11 @@ export function getTextGenModel() {
return settings.custom_model;
}
break;
case GENERIC:
if (settings.generic_model) {
return settings.generic_model;
}
break;
case MANCER:
return settings.mancer_model;
case TOGETHERAI:

View File

@ -172,6 +172,19 @@ function getHuggingFaceHeaders(directories) {
}) : {};
}
/**
* Gets the headers for the Generic text completion API.
* @param {import('./users.js').UserDirectoryList} directories
* @returns {object} Headers for the request
*/
function getGenericHeaders(directories) {
const apiKey = readSecret(directories, SECRET_KEYS.GENERIC);
return apiKey ? ({
'Authorization': `Bearer ${apiKey}`,
}) : {};
}
export function getOverrideHeaders(urlHost) {
const requestOverrides = getConfigValue('requestOverrides', []);
const overrideHeaders = requestOverrides?.find((e) => e.hosts?.includes(urlHost))?.headers;
@ -214,6 +227,7 @@ export function setAdditionalHeadersByType(requestHeaders, type, server, directo
[TEXTGEN_TYPES.LLAMACPP]: getLlamaCppHeaders,
[TEXTGEN_TYPES.FEATHERLESS]: getFeatherlessHeaders,
[TEXTGEN_TYPES.HUGGINGFACE]: getHuggingFaceHeaders,
[TEXTGEN_TYPES.GENERIC]: getGenericHeaders,
};
const getHeaders = headerGetters[type];

View File

@ -225,6 +225,7 @@ export const TEXTGEN_TYPES = {
OPENROUTER: 'openrouter',
FEATHERLESS: 'featherless',
HUGGINGFACE: 'huggingface',
GENERIC: 'generic',
};
export const INFERMATICAI_KEYS = [
@ -346,6 +347,24 @@ export const OLLAMA_KEYS = [
'min_p',
];
// https://platform.openai.com/docs/api-reference/completions
export const OPENAI_KEYS = [
'model',
'prompt',
'stream',
'temperature',
'top_p',
'frequency_penalty',
'presence_penalty',
'stop',
'seed',
'logit_bias',
'logprobs',
'max_tokens',
'n',
'best_of',
];
export const AVATAR_WIDTH = 512;
export const AVATAR_HEIGHT = 768;

View File

@ -13,6 +13,7 @@ import {
VLLM_KEYS,
DREAMGEN_KEYS,
FEATHERLESS_KEYS,
OPENAI_KEYS,
} from '../../constants.js';
import { forwardFetchResponse, trimV1, getConfigValue } from '../../util.js';
import { setAdditionalHeaders } from '../../additional-headers.js';
@ -113,8 +114,8 @@ router.post('/status', jsonParser, async function (request, response) {
let url = baseUrl;
let result = '';
switch (apiType) {
case TEXTGEN_TYPES.GENERIC:
case TEXTGEN_TYPES.OOBA:
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.APHRODITE:
@ -287,6 +288,7 @@ router.post('/generate', jsonParser, async function (request, response) {
let url = trimV1(baseUrl);
switch (request.body.api_type) {
case TEXTGEN_TYPES.GENERIC:
case TEXTGEN_TYPES.VLLM:
case TEXTGEN_TYPES.FEATHERLESS:
case TEXTGEN_TYPES.APHRODITE:
@ -347,6 +349,12 @@ router.post('/generate', jsonParser, async function (request, response) {
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.GENERIC) {
request.body = _.pickBy(request.body, (_, key) => OPENAI_KEYS.includes(key));
if (Array.isArray(request.body.stop)) { request.body.stop = request.body.stop.slice(0, 4); }
args.body = JSON.stringify(request.body);
}
if (request.body.api_type === TEXTGEN_TYPES.OPENROUTER) {
if (Array.isArray(request.body.provider) && request.body.provider.length > 0) {
request.body.provider = {

View File

@ -50,6 +50,7 @@ export const SECRET_KEYS = {
TAVILY: 'api_key_tavily',
NANOGPT: 'api_key_nanogpt',
BFL: 'api_key_bfl',
GENERIC: 'api_key_generic',
};
// These are the keys that are safe to expose, even if allowKeysExposure is false