From 81fe9aa699b7187b7b670662a6d6c8000b62d285 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:39:08 +0200 Subject: [PATCH] Fix updated tokenization via ooba API --- public/scripts/tokenizers.js | 20 +++++++++++++------- server.js | 15 +++++++++------ 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 994c45630..8430c36ab 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -363,6 +363,15 @@ function getTokenCacheObject() { return tokenCache[String(chatId)]; } +function getRemoteTokenizationParams(str) { + return { + text: str, + api: main_api, + url: getAPIServerUrl(), + legacy_api: main_api === 'textgenerationwebui' && textgenerationwebui_settings.legacy_api && !isMancer(), + }; +} + /** * Counts token using the remote server API. * @param {string} endpoint API endpoint. @@ -377,12 +386,7 @@ function countTokensRemote(endpoint, str, padding) { async: false, type: 'POST', url: endpoint, - data: JSON.stringify({ - text: str, - api: main_api, - url: getAPIServerUrl(), - legacy_api: main_api === 'textgenerationwebui' && textgenerationwebui_settings.legacy_api && !isMancer() , - }), + data: JSON.stringify(getRemoteTokenizationParams(str)), dataType: "json", contentType: "application/json", success: function (data) { @@ -425,7 +429,7 @@ function getTextTokensRemote(endpoint, str, model = '') { async: false, type: 'POST', url: endpoint, - data: JSON.stringify({ text: str }), + data: JSON.stringify(getRemoteTokenizationParams(str)), dataType: "json", contentType: "application/json", success: function (data) { @@ -482,6 +486,8 @@ export function getTextTokens(tokenizerType, str) { case tokenizers.OPENAI: const model = getTokenizerModel(); return getTextTokensRemote('/api/tokenize/openai-encode', str, model); + case tokenizers.API: + return getTextTokensRemote('/tokenize_via_api', str); default: console.warn("Calling getTextTokens with unsupported tokenizer type", tokenizerType); return []; diff --git a/server.js b/server.js index 19b088fe4..119b60536 100644 --- a/server.js +++ b/server.js @@ -3384,8 +3384,7 @@ app.post("/tokenize_via_api", jsonParser, async function (request, response) { if (api == 'textgenerationwebui') { const args = { method: 'POST', - body: JSON.stringify({ "prompt": text }), - headers: { "Content-Type": "application/json" } + headers: { "Content-Type": "application/json" }, }; setAdditionalHeaders(request, args, null); @@ -3395,9 +3394,10 @@ app.post("/tokenize_via_api", jsonParser, async function (request, response) { if (legacyApi) { url += '/v1/token-count'; - + args.body = JSON.stringify({ "prompt": text}); } else { - url += '/api/v1/token-count'; + url += '/v1/internal/encode'; + args.body = JSON.stringify({ "text": text }); } const result = await fetch(url, args); @@ -3408,7 +3408,10 @@ app.post("/tokenize_via_api", jsonParser, async function (request, response) { } const data = await result.json(); - return response.send({ count: data['results'][0]['tokens'] }); + const count = legacyApi ? data?.results[0]?.tokens : data?.length; + const ids = legacyApi ? [] : data?.tokens; + + return response.send({ count, ids }); } else if (api == 'kobold') { @@ -3430,7 +3433,7 @@ app.post("/tokenize_via_api", jsonParser, async function (request, response) { const data = await result.json(); const count = data['value']; - return response.send({ count: count }); + return response.send({ count: count, ids: [] }); } else {