Add a token counting endpoint for vLLM

This commit is contained in:
sasha0552 2024-06-20 01:13:32 +00:00 committed by GitHub
parent 584d0e6222
commit 6c87dfe10d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 4 deletions

View File

@ -7,7 +7,7 @@ import { kai_flags } from './kai-settings.js';
import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer, getTextGenModel } from './textgen-settings.js'; import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer, getTextGenModel } from './textgen-settings.js';
import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, openRouterModels } from './textgen-models.js'; import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, openRouterModels } from './textgen-models.js';
const { OOBA, TABBY, KOBOLDCPP, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types; const { OOBA, TABBY, KOBOLDCPP, VLLM, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types;
export const CHARACTERS_PER_TOKEN_RATIO = 3.35; export const CHARACTERS_PER_TOKEN_RATIO = 3.35;
const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown'; const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown';
@ -39,7 +39,7 @@ export const SENTENCEPIECE_TOKENIZERS = [
//tokenizers.NERD2, //tokenizers.NERD2,
]; ];
export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, APHRODITE]; export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, VLLM, APHRODITE];
const TOKENIZER_URLS = { const TOKENIZER_URLS = {
[tokenizers.GPT2]: { [tokenizers.GPT2]: {
@ -765,6 +765,7 @@ function getTextgenAPITokenizationParams(str) {
api_type: textgen_settings.type, api_type: textgen_settings.type,
url: getTextGenServer(), url: getTextGenServer(),
legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE), legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE),
vllm_model: textgen_settings.vllm_model,
}; };
} }

View File

@ -784,6 +784,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
const text = String(request.body.text) || ''; const text = String(request.body.text) || '';
const baseUrl = String(request.body.url); const baseUrl = String(request.body.url);
const legacyApi = Boolean(request.body.legacy_api); const legacyApi = Boolean(request.body.legacy_api);
const vllmModel = String(request.body.vllm_model) || '';
try { try {
const args = { const args = {
@ -814,7 +815,9 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
args.body = JSON.stringify({ 'content': text }); args.body = JSON.stringify({ 'content': text });
break; break;
case TEXTGEN_TYPES.VLLM: case TEXTGEN_TYPES.VLLM:
return response.send({ error: true }); url += '/tokenize';
args.body = JSON.stringify({ 'model': vllmModel, 'prompt': text });
break;
case TEXTGEN_TYPES.APHRODITE: case TEXTGEN_TYPES.APHRODITE:
url += '/v1/tokenize'; url += '/v1/tokenize';
args.body = JSON.stringify({ 'prompt': text }); args.body = JSON.stringify({ 'prompt': text });
@ -834,7 +837,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re
} }
const data = await result.json(); const data = await result.json();
const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value ?? data?.tokens?.length); const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.count ?? data?.value ?? data?.tokens?.length);
const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []); const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []);
return response.send({ count, ids }); return response.send({ count, ids });