Add a token counting endpoint for vLLM
This commit is contained in:
parent
584d0e6222
commit
6c87dfe10d
|
@ -7,7 +7,7 @@ import { kai_flags } from './kai-settings.js';
|
||||||
import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer, getTextGenModel } from './textgen-settings.js';
|
import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer, getTextGenModel } from './textgen-settings.js';
|
||||||
import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, openRouterModels } from './textgen-models.js';
|
import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, openRouterModels } from './textgen-models.js';
|
||||||
|
|
||||||
const { OOBA, TABBY, KOBOLDCPP, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types;
|
const { OOBA, TABBY, KOBOLDCPP, VLLM, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types;
|
||||||
|
|
||||||
export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
|
||||||
const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown';
|
const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown';
|
||||||
|
@ -39,7 +39,7 @@ export const SENTENCEPIECE_TOKENIZERS = [
|
||||||
//tokenizers.NERD2,
|
//tokenizers.NERD2,
|
||||||
];
|
];
|
||||||
|
|
||||||
export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, APHRODITE];
|
export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, VLLM, APHRODITE];
|
||||||
|
|
||||||
const TOKENIZER_URLS = {
|
const TOKENIZER_URLS = {
|
||||||
[tokenizers.GPT2]: {
|
[tokenizers.GPT2]: {
|
||||||
|
@ -765,6 +765,7 @@ function getTextgenAPITokenizationParams(str) {
|
||||||
api_type: textgen_settings.type,
|
api_type: textgen_settings.type,
|
||||||
url: getTextGenServer(),
|
url: getTextGenServer(),
|
||||||
legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE),
|
legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE),
|
||||||
|
vllm_model: textgen_settings.vllm_model,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -784,6 +784,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
||||||
const text = String(request.body.text) || '';
|
const text = String(request.body.text) || '';
|
||||||
const baseUrl = String(request.body.url);
|
const baseUrl = String(request.body.url);
|
||||||
const legacyApi = Boolean(request.body.legacy_api);
|
const legacyApi = Boolean(request.body.legacy_api);
|
||||||
|
const vllmModel = String(request.body.vllm_model) || '';
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const args = {
|
const args = {
|
||||||
|
@ -814,7 +815,9 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
||||||
args.body = JSON.stringify({ 'content': text });
|
args.body = JSON.stringify({ 'content': text });
|
||||||
break;
|
break;
|
||||||
case TEXTGEN_TYPES.VLLM:
|
case TEXTGEN_TYPES.VLLM:
|
||||||
return response.send({ error: true });
|
url += '/tokenize';
|
||||||
|
args.body = JSON.stringify({ 'model': vllmModel, 'prompt': text });
|
||||||
|
break;
|
||||||
case TEXTGEN_TYPES.APHRODITE:
|
case TEXTGEN_TYPES.APHRODITE:
|
||||||
url += '/v1/tokenize';
|
url += '/v1/tokenize';
|
||||||
args.body = JSON.stringify({ 'prompt': text });
|
args.body = JSON.stringify({ 'prompt': text });
|
||||||
|
@ -834,7 +837,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = await result.json();
|
const data = await result.json();
|
||||||
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value ?? data?.tokens?.length);
|
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.count ?? data?.value ?? data?.tokens?.length);
|
||||||
const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []);
|
const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []);
|
||||||
|
|
||||||
return response.send({ count, ids });
|
return response.send({ count, ids });
|
||||||
|
|
Loading…
Reference in New Issue