From 6c87dfe10d98a1597c27309211c9471e7a4d8b53 Mon Sep 17 00:00:00 2001 From: sasha0552 Date: Thu, 20 Jun 2024 01:13:32 +0000 Subject: [PATCH] Add a token counting endpoint for vLLM --- public/scripts/tokenizers.js | 5 +++-- src/endpoints/tokenizers.js | 7 +++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 39a19d0ca..5c017ad83 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -7,7 +7,7 @@ import { kai_flags } from './kai-settings.js'; import { textgen_types, textgenerationwebui_settings as textgen_settings, getTextGenServer, getTextGenModel } from './textgen-settings.js'; import { getCurrentDreamGenModelTokenizer, getCurrentOpenRouterModelTokenizer, openRouterModels } from './textgen-models.js'; -const { OOBA, TABBY, KOBOLDCPP, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types; +const { OOBA, TABBY, KOBOLDCPP, VLLM, APHRODITE, LLAMACPP, OPENROUTER, DREAMGEN } = textgen_types; export const CHARACTERS_PER_TOKEN_RATIO = 3.35; const TOKENIZER_WARNING_KEY = 'tokenizationWarningShown'; @@ -39,7 +39,7 @@ export const SENTENCEPIECE_TOKENIZERS = [ //tokenizers.NERD2, ]; -export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, APHRODITE]; +export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP, VLLM, APHRODITE]; const TOKENIZER_URLS = { [tokenizers.GPT2]: { @@ -765,6 +765,7 @@ function getTextgenAPITokenizationParams(str) { api_type: textgen_settings.type, url: getTextGenServer(), legacy_api: textgen_settings.legacy_api && (textgen_settings.type === OOBA || textgen_settings.type === APHRODITE), + vllm_model: textgen_settings.vllm_model, }; } diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 0c8c99034..6b150e32b 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -784,6 +784,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re const text = String(request.body.text) || ''; const baseUrl = String(request.body.url); const legacyApi = Boolean(request.body.legacy_api); + const vllmModel = String(request.body.vllm_model) || ''; try { const args = { @@ -814,7 +815,9 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re args.body = JSON.stringify({ 'content': text }); break; case TEXTGEN_TYPES.VLLM: - return response.send({ error: true }); + url += '/tokenize'; + args.body = JSON.stringify({ 'model': vllmModel, 'prompt': text }); + break; case TEXTGEN_TYPES.APHRODITE: url += '/v1/tokenize'; args.body = JSON.stringify({ 'prompt': text }); @@ -834,7 +837,7 @@ router.post('/remote/textgenerationwebui/encode', jsonParser, async function (re } const data = await result.json(); - const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value ?? data?.tokens?.length); + const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.count ?? data?.value ?? data?.tokens?.length); const ids = legacyApi ? [] : (data?.tokens ?? data?.ids ?? []); return response.send({ count, ids });