From 8bad059a62d9bd491f1e78fb4881ec8667003502 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 19:29:24 -0500 Subject: [PATCH 01/15] Rename /tokenize_via_api endpoint No redirect for this since I don't expect any extensions to be calling this directly. --- public/scripts/tokenizers.js | 4 ++-- server.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index ecab34705..e9a1b905f 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -173,7 +173,7 @@ function callTokenizer(type, str, padding) { case tokenizers.YI: return countTokensRemote('/api/tokenizers/yi/encode', str, padding); case tokenizers.API: - return countTokensRemote('/tokenize_via_api', str, padding); + return countTokensRemote('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -525,7 +525,7 @@ export function getTextTokens(tokenizerType, str) { return getTextTokensRemote('/api/tokenizers/openai/encode', str, model); } case tokenizers.API: - return getTextTokensRemote('/tokenize_via_api', str); + return getTextTokensRemote('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; diff --git a/server.js b/server.js index 613510d5f..6b4d7199d 100644 --- a/server.js +++ b/server.js @@ -1774,7 +1774,7 @@ async function sendAI21Request(request, response) { } -app.post('/tokenize_via_api', jsonParser, async function (request, response) { +app.post('/api/tokenizers/remote/encode', jsonParser, async function (request, response) { if (!request.body) { return response.sendStatus(400); } From 04e92efe298353df7b139a2a61e403f8e49aba23 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 19:35:49 -0500 Subject: [PATCH 02/15] Move API tokenization endpoint into /tokenizers Requires extracting some more functions out of server.js. --- server.js | 152 +----------------------------------- src/additional-headers.js | 72 +++++++++++++++++ src/endpoints/tokenizers.js | 89 +++++++++++++++++++++ 3 files changed, 162 insertions(+), 151 deletions(-) create mode 100644 src/additional-headers.js diff --git a/server.js b/server.js index 6b4d7199d..b44533d3e 100644 --- a/server.js +++ b/server.js @@ -49,6 +49,7 @@ const { delay, getVersion, getConfigValue, color, uuidv4, tryParse, clientRelati const { ensureThumbnailCache } = require('./src/endpoints/thumbnails'); const { getTokenizerModel, getTiktokenTokenizer, loadTokenizers, TEXT_COMPLETION_MODELS, getSentencepiceTokenizer, sentencepieceTokenizers } = require('./src/endpoints/tokenizers'); const { convertClaudePrompt } = require('./src/chat-completion'); +const { getOverrideHeaders, setAdditionalHeaders } = require('./src/additional-headers'); // Work around a node v20.0.0, v20.1.0, and v20.2.0 bug. The issue was fixed in v20.3.0. // https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 @@ -119,70 +120,6 @@ const listen = getConfigValue('listen', false); const API_OPENAI = 'https://api.openai.com/v1'; const API_CLAUDE = 'https://api.anthropic.com/v1'; -function getMancerHeaders() { - const apiKey = readSecret(SECRET_KEYS.MANCER); - - return apiKey ? ({ - 'X-API-KEY': apiKey, - 'Authorization': `Bearer ${apiKey}`, - }) : {}; -} - -function getAphroditeHeaders() { - const apiKey = readSecret(SECRET_KEYS.APHRODITE); - - return apiKey ? ({ - 'X-API-KEY': apiKey, - 'Authorization': `Bearer ${apiKey}`, - }) : {}; -} - -function getTabbyHeaders() { - const apiKey = readSecret(SECRET_KEYS.TABBY); - - return apiKey ? ({ - 'x-api-key': apiKey, - 'Authorization': `Bearer ${apiKey}`, - }) : {}; -} - -function getOverrideHeaders(urlHost) { - const requestOverrides = getConfigValue('requestOverrides', []); - const overrideHeaders = requestOverrides?.find((e) => e.hosts?.includes(urlHost))?.headers; - if (overrideHeaders && urlHost) { - return overrideHeaders; - } else { - return {}; - } -} - -/** - * Sets additional headers for the request. - * @param {object} request Original request body - * @param {object} args New request arguments - * @param {string|null} server API server for new request - */ -function setAdditionalHeaders(request, args, server) { - let headers; - - switch (request.body.api_type) { - case TEXTGEN_TYPES.MANCER: - headers = getMancerHeaders(); - break; - case TEXTGEN_TYPES.APHRODITE: - headers = getAphroditeHeaders(); - break; - case TEXTGEN_TYPES.TABBY: - headers = getTabbyHeaders(); - break; - default: - headers = server ? getOverrideHeaders((new URL(server))?.host) : {}; - break; - } - - Object.assign(args.headers, headers); -} - const SETTINGS_FILE = './public/settings.json'; const { DIRECTORIES, UPLOADS_PATH, PALM_SAFETY, TEXTGEN_TYPES, CHAT_COMPLETION_SOURCES, AVATAR_WIDTH, AVATAR_HEIGHT } = require('./src/constants'); @@ -1774,93 +1711,6 @@ async function sendAI21Request(request, response) { } -app.post('/api/tokenizers/remote/encode', jsonParser, async function (request, response) { - if (!request.body) { - return response.sendStatus(400); - } - const text = String(request.body.text) || ''; - const api = String(request.body.main_api); - const baseUrl = String(request.body.url); - const legacyApi = Boolean(request.body.legacy_api); - - try { - if (api == 'textgenerationwebui') { - const args = { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - }; - - setAdditionalHeaders(request, args, null); - - // Convert to string + remove trailing slash + /v1 suffix - let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); - - if (legacyApi) { - url += '/v1/token-count'; - args.body = JSON.stringify({ 'prompt': text }); - } else { - switch (request.body.api_type) { - case TEXTGEN_TYPES.TABBY: - url += '/v1/token/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - case TEXTGEN_TYPES.KOBOLDCPP: - url += '/api/extra/tokencount'; - args.body = JSON.stringify({ 'prompt': text }); - break; - default: - url += '/v1/internal/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - } - } - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); - const ids = legacyApi ? [] : (data?.tokens ?? []); - - return response.send({ count, ids }); - } - - else if (api == 'kobold') { - const args = { - method: 'POST', - body: JSON.stringify({ 'prompt': text }), - headers: { 'Content-Type': 'application/json' }, - }; - - let url = String(baseUrl).replace(/\/$/, ''); - url += '/extra/tokencount'; - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = data['value']; - return response.send({ count: count, ids: [] }); - } - - else { - console.log('Unknown API', api); - return response.send({ error: true }); - } - } catch (error) { - console.log(error); - return response.send({ error: true }); - } -}); - /** * Redirect a deprecated API endpoint URL to its replacement. Because fetch, form submissions, and $.ajax follow * redirects, this is transparent to client-side code. diff --git a/src/additional-headers.js b/src/additional-headers.js new file mode 100644 index 000000000..61ea1790d --- /dev/null +++ b/src/additional-headers.js @@ -0,0 +1,72 @@ +const { TEXTGEN_TYPES } = require('./constants'); +const { SECRET_KEYS, readSecret } = require('./endpoints/secrets'); +const { getConfigValue } = require('./util'); + +function getMancerHeaders() { + const apiKey = readSecret(SECRET_KEYS.MANCER); + + return apiKey ? ({ + 'X-API-KEY': apiKey, + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + +function getAphroditeHeaders() { + const apiKey = readSecret(SECRET_KEYS.APHRODITE); + + return apiKey ? ({ + 'X-API-KEY': apiKey, + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + +function getTabbyHeaders() { + const apiKey = readSecret(SECRET_KEYS.TABBY); + + return apiKey ? ({ + 'x-api-key': apiKey, + 'Authorization': `Bearer ${apiKey}`, + }) : {}; +} + +function getOverrideHeaders(urlHost) { + const requestOverrides = getConfigValue('requestOverrides', []); + const overrideHeaders = requestOverrides?.find((e) => e.hosts?.includes(urlHost))?.headers; + if (overrideHeaders && urlHost) { + return overrideHeaders; + } else { + return {}; + } +} + +/** + * Sets additional headers for the request. + * @param {object} request Original request body + * @param {object} args New request arguments + * @param {string|null} server API server for new request + */ +function setAdditionalHeaders(request, args, server) { + let headers; + + switch (request.body.api_type) { + case TEXTGEN_TYPES.MANCER: + headers = getMancerHeaders(); + break; + case TEXTGEN_TYPES.APHRODITE: + headers = getAphroditeHeaders(); + break; + case TEXTGEN_TYPES.TABBY: + headers = getTabbyHeaders(); + break; + default: + headers = server ? getOverrideHeaders((new URL(server))?.host) : {}; + break; + } + + Object.assign(args.headers, headers); +} + +module.exports = { + getOverrideHeaders, + setAdditionalHeaders, +}; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 57abc6b8f..8a4db7728 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -6,7 +6,9 @@ const tiktoken = require('@dqbd/tiktoken'); const { Tokenizer } = require('@agnai/web-tokenizers'); const { convertClaudePrompt } = require('../chat-completion'); const { readSecret, SECRET_KEYS } = require('./secrets'); +const { TEXTGEN_TYPES } = require('../constants'); const { jsonParser } = require('../express-common'); +const { setAdditionalHeaders } = require('../additional-headers'); /** * @type {{[key: string]: import("@dqbd/tiktoken").Tiktoken}} Tokenizers cache @@ -534,6 +536,93 @@ router.post('/openai/count', jsonParser, async function (req, res) { } }); +router.post('/remote/encode', jsonParser, async function (request, response) { + if (!request.body) { + return response.sendStatus(400); + } + const text = String(request.body.text) || ''; + const api = String(request.body.main_api); + const baseUrl = String(request.body.url); + const legacyApi = Boolean(request.body.legacy_api); + + try { + if (api == 'textgenerationwebui') { + const args = { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }; + + setAdditionalHeaders(request, args, null); + + // Convert to string + remove trailing slash + /v1 suffix + let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); + + if (legacyApi) { + url += '/v1/token-count'; + args.body = JSON.stringify({ 'prompt': text }); + } else { + switch (request.body.api_type) { + case TEXTGEN_TYPES.TABBY: + url += '/v1/token/encode'; + args.body = JSON.stringify({ 'text': text }); + break; + case TEXTGEN_TYPES.KOBOLDCPP: + url += '/api/extra/tokencount'; + args.body = JSON.stringify({ 'prompt': text }); + break; + default: + url += '/v1/internal/encode'; + args.body = JSON.stringify({ 'text': text }); + break; + } + } + + const result = await fetch(url, args); + + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); + return response.send({ error: true }); + } + + const data = await result.json(); + const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); + const ids = legacyApi ? [] : (data?.tokens ?? []); + + return response.send({ count, ids }); + } + + else if (api == 'kobold') { + const args = { + method: 'POST', + body: JSON.stringify({ 'prompt': text }), + headers: { 'Content-Type': 'application/json' }, + }; + + let url = String(baseUrl).replace(/\/$/, ''); + url += '/extra/tokencount'; + + const result = await fetch(url, args); + + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); + return response.send({ error: true }); + } + + const data = await result.json(); + const count = data['value']; + return response.send({ count: count, ids: [] }); + } + + else { + console.log('Unknown API', api); + return response.send({ error: true }); + } + } catch (error) { + console.log(error); + return response.send({ error: true }); + } +}); + module.exports = { TEXT_COMPLETION_MODELS, getTokenizerModel, From ddd73a204a00bd7f2e91cb1dc3594b673e1f0b59 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 19:43:33 -0500 Subject: [PATCH 03/15] Remove "remote" language from tokenizer functions We'll be making a distinction between tokenizing *on* the server itself, and tokenizing via the server having the AI service do it. It makes more sense to use the term "remote" for the latter. --- public/scripts/tokenizers.js | 62 ++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index e9a1b905f..b72b672a8 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -161,19 +161,19 @@ function callTokenizer(type, str, padding) { case tokenizers.NONE: return guesstimate(str) + padding; case tokenizers.GPT2: - return countTokensRemote('/api/tokenizers/gpt2/encode', str, padding); + return countTokensFromServer('/api/tokenizers/gpt2/encode', str, padding); case tokenizers.LLAMA: - return countTokensRemote('/api/tokenizers/llama/encode', str, padding); + return countTokensFromServer('/api/tokenizers/llama/encode', str, padding); case tokenizers.NERD: - return countTokensRemote('/api/tokenizers/nerdstash/encode', str, padding); + return countTokensFromServer('/api/tokenizers/nerdstash/encode', str, padding); case tokenizers.NERD2: - return countTokensRemote('/api/tokenizers/nerdstash_v2/encode', str, padding); + return countTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str, padding); case tokenizers.MISTRAL: - return countTokensRemote('/api/tokenizers/mistral/encode', str, padding); + return countTokensFromServer('/api/tokenizers/mistral/encode', str, padding); case tokenizers.YI: - return countTokensRemote('/api/tokenizers/yi/encode', str, padding); + return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API: - return countTokensRemote('/api/tokenizers/remote/encode', str, padding); + return countTokensFromServer('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -391,7 +391,7 @@ function getTokenCacheObject() { return tokenCache[String(chatId)]; } -function getRemoteTokenizationParams(str) { +function getServerTokenizationParams(str) { return { text: str, main_api, @@ -404,20 +404,20 @@ function getRemoteTokenizationParams(str) { } /** - * Counts token using the remote server API. + * Counts token using the server API. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensRemote(endpoint, str, padding) { +function countTokensFromServer(endpoint, str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteTokenizationParams(str)), + data: JSON.stringify(getServerTokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -450,7 +450,7 @@ function countTokensRemote(endpoint, str, padding) { * @param {string} model Tokenizer model. * @returns {number[]} Array of token ids. */ -function getTextTokensRemote(endpoint, str, model = '') { +function getTextTokensFromServer(endpoint, str, model = '') { if (model) { endpoint += `?model=${model}`; } @@ -460,7 +460,7 @@ function getTextTokensRemote(endpoint, str, model = '') { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteTokenizationParams(str)), + data: JSON.stringify(getServerTokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -480,7 +480,7 @@ function getTextTokensRemote(endpoint, str, model = '') { * @param {string} endpoint API endpoint. * @param {number[]} ids Array of token ids */ -function decodeTextTokensRemote(endpoint, ids, model = '') { +function decodeTextTokensFromServer(endpoint, ids, model = '') { if (model) { endpoint += `?model=${model}`; } @@ -501,7 +501,7 @@ function decodeTextTokensRemote(endpoint, ids, model = '') { } /** - * Encodes a string to tokens using the remote server API. + * Encodes a string to tokens using the server API. * @param {number} tokenizerType Tokenizer type. * @param {string} str String to tokenize. * @returns {number[]} Array of token ids. @@ -509,23 +509,23 @@ function decodeTextTokensRemote(endpoint, ids, model = '') { export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { case tokenizers.GPT2: - return getTextTokensRemote('/api/tokenizers/gpt2/encode', str); + return getTextTokensFromServer('/api/tokenizers/gpt2/encode', str); case tokenizers.LLAMA: - return getTextTokensRemote('/api/tokenizers/llama/encode', str); + return getTextTokensFromServer('/api/tokenizers/llama/encode', str); case tokenizers.NERD: - return getTextTokensRemote('/api/tokenizers/nerdstash/encode', str); + return getTextTokensFromServer('/api/tokenizers/nerdstash/encode', str); case tokenizers.NERD2: - return getTextTokensRemote('/api/tokenizers/nerdstash_v2/encode', str); + return getTextTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str); case tokenizers.MISTRAL: - return getTextTokensRemote('/api/tokenizers/mistral/encode', str); + return getTextTokensFromServer('/api/tokenizers/mistral/encode', str); case tokenizers.YI: - return getTextTokensRemote('/api/tokenizers/yi/encode', str); + return getTextTokensFromServer('/api/tokenizers/yi/encode', str); case tokenizers.OPENAI: { const model = getTokenizerModel(); - return getTextTokensRemote('/api/tokenizers/openai/encode', str, model); + return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } case tokenizers.API: - return getTextTokensRemote('/api/tokenizers/remote/encode', str); + return getTextTokensFromServer('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; @@ -533,27 +533,27 @@ export function getTextTokens(tokenizerType, str) { } /** - * Decodes token ids to text using the remote server API. + * Decodes token ids to text using the server API. * @param {number} tokenizerType Tokenizer type. * @param {number[]} ids Array of token ids */ export function decodeTextTokens(tokenizerType, ids) { switch (tokenizerType) { case tokenizers.GPT2: - return decodeTextTokensRemote('/api/tokenizers/gpt2/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/gpt2/decode', ids); case tokenizers.LLAMA: - return decodeTextTokensRemote('/api/tokenizers/llama/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/llama/decode', ids); case tokenizers.NERD: - return decodeTextTokensRemote('/api/tokenizers/nerdstash/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/nerdstash/decode', ids); case tokenizers.NERD2: - return decodeTextTokensRemote('/api/tokenizers/nerdstash_v2/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/nerdstash_v2/decode', ids); case tokenizers.MISTRAL: - return decodeTextTokensRemote('/api/tokenizers/mistral/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/mistral/decode', ids); case tokenizers.YI: - return decodeTextTokensRemote('/api/tokenizers/yi/decode', ids); + return decodeTextTokensFromServer('/api/tokenizers/yi/decode', ids); case tokenizers.OPENAI: { const model = getTokenizerModel(); - return decodeTextTokensRemote('/api/tokenizers/openai/decode', ids, model); + return decodeTextTokensFromServer('/api/tokenizers/openai/decode', ids, model); } default: console.warn('Calling decodeTextTokens with unsupported tokenizer type', tokenizerType); From 18177c147d6530f095657c181e668bea18c51be7 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:08:48 -0500 Subject: [PATCH 04/15] Separate remote and server tokenization code paths This lets us remove extraneous API params from paths where they aren't needed. --- public/scripts/tokenizers.js | 93 ++++++++++++++++++++++++++++++------ 1 file changed, 78 insertions(+), 15 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index b72b672a8..c67e531a5 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -173,7 +173,7 @@ function callTokenizer(type, str, padding) { case tokenizers.YI: return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API: - return countTokensFromServer('/api/tokenizers/remote/encode', str, padding); + return countTokensFromRemoteAPI('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -392,6 +392,12 @@ function getTokenCacheObject() { } function getServerTokenizationParams(str) { + return { + text: str, + }; +} + +function getRemoteAPITokenizationParams(str) { return { text: str, main_api, @@ -404,7 +410,7 @@ function getServerTokenizationParams(str) { } /** - * Counts token using the server API. + * Count tokens using the server API. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. @@ -424,18 +430,7 @@ function countTokensFromServer(endpoint, str, padding) { if (typeof data.count === 'number') { tokenCount = data.count; } else { - tokenCount = guesstimate(str); - console.error('Error counting tokens'); - - if (!sessionStorage.getItem(TOKENIZER_WARNING_KEY)) { - toastr.warning( - 'Your selected API doesn\'t support the tokenization endpoint. Using estimated counts.', - 'Error counting tokens', - { timeOut: 10000, preventDuplicates: true }, - ); - - sessionStorage.setItem(TOKENIZER_WARNING_KEY, String(true)); - } + tokenCount = apiFailureTokenCount(str); } }, }); @@ -443,6 +438,51 @@ function countTokensFromServer(endpoint, str, padding) { return tokenCount + padding; } +/** + * Count tokens using the AI provider's API. + * @param {string} endpoint API endpoint. + * @param {string} str String to tokenize. + * @param {number} padding Number of padding tokens. + * @returns {number} Token count with padding. + */ +function countTokensFromRemoteAPI(endpoint, str, padding) { + let tokenCount = 0; + + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify(getRemoteAPITokenizationParams(str)), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + if (typeof data.count === 'number') { + tokenCount = data.count; + } else { + tokenCount = apiFailureTokenCount(str); + } + }, + }); + + return tokenCount + padding; +} + +function apiFailureTokenCount(str) { + console.error('Error counting tokens'); + + if (!sessionStorage.getItem(TOKENIZER_WARNING_KEY)) { + toastr.warning( + 'Your selected API doesn\'t support the tokenization endpoint. Using estimated counts.', + 'Error counting tokens', + { timeOut: 10000, preventDuplicates: true }, + ); + + sessionStorage.setItem(TOKENIZER_WARNING_KEY, String(true)); + } + + return guesstimate(str); +} + /** * Calls the underlying tokenizer model to encode a string to tokens. * @param {string} endpoint API endpoint. @@ -475,6 +515,29 @@ function getTextTokensFromServer(endpoint, str, model = '') { return ids; } +/** + * Calls the AI provider's tokenize API to encode a string to tokens. + * @param {string} endpoint API endpoint. + * @param {string} str String to tokenize. + * @param {string} model Tokenizer model. + * @returns {number[]} Array of token ids. + */ +function getTextTokensFromRemoteAPI(endpoint, str, model = '') { + let ids = []; + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify(getRemoteAPITokenizationParams(str)), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + ids = data.ids; + }, + }); + return ids; +} + /** * Calls the underlying tokenizer model to decode token ids to text. * @param {string} endpoint API endpoint. @@ -525,7 +588,7 @@ export function getTextTokens(tokenizerType, str) { return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } case tokenizers.API: - return getTextTokensFromServer('/api/tokenizers/remote/encode', str); + return getTextTokensFromRemoteAPI('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; From 7486ab3886fd3f9bb27756fe5901170e7cb580f8 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:20:53 -0500 Subject: [PATCH 05/15] Separate textgen and Kobold tokenization APIs They function differently and have different logic and API parameters, so it makes sense to count them as two different APIs. Kobold's API doesn't return tokens, so it can only be used to count them. There's still a lot of duplicate code which I will clean up in the following commits. --- public/scripts/tokenizers.js | 69 ++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index c67e531a5..cf0fd3481 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -18,9 +18,10 @@ export const tokenizers = { LLAMA: 3, NERD: 4, NERD2: 5, - API: 6, + API_KOBOLD: 6, MISTRAL: 7, YI: 8, + API_TEXTGENERATIONWEBUI: 9, BEST_MATCH: 99, }; @@ -135,11 +136,11 @@ export function getTokenizerBestMatch(forApi) { if (!hasTokenizerError && isConnected) { if (forApi === 'kobold' && kai_flags.can_use_tokenization) { - return tokenizers.API; + return tokenizers.API_KOBOLD; } if (forApi === 'textgenerationwebui' && isTokenizerSupported) { - return tokenizers.API; + return tokenizers.API_TEXTGENERATIONWEBUI; } } @@ -172,8 +173,10 @@ function callTokenizer(type, str, padding) { return countTokensFromServer('/api/tokenizers/mistral/encode', str, padding); case tokenizers.YI: return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); - case tokenizers.API: - return countTokensFromRemoteAPI('/api/tokenizers/remote/encode', str, padding); + case tokenizers.API_KOBOLD: + return countTokensFromKoboldAPI('/api/tokenizers/remote/encode', str, padding); + case tokenizers.API_TEXTGENERATIONWEBUI: + return countTokensFromTextgenAPI('/api/tokenizers/remote/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -397,13 +400,21 @@ function getServerTokenizationParams(str) { }; } -function getRemoteAPITokenizationParams(str) { +function getKoboldAPITokenizationParams(str) { return { text: str, - main_api, + main_api: 'kobold', + url: getAPIServerUrl(), + }; +} + +function getTextgenAPITokenizationParams(str) { + return { + text: str, + main_api: 'textgenerationwebui', api_type: textgen_settings.type, url: getAPIServerUrl(), - legacy_api: main_api === 'textgenerationwebui' && + legacy_api: textgen_settings.legacy_api && textgen_settings.type !== MANCER, }; @@ -445,14 +456,43 @@ function countTokensFromServer(endpoint, str, padding) { * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensFromRemoteAPI(endpoint, str, padding) { +function countTokensFromKoboldAPI(endpoint, str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteAPITokenizationParams(str)), + data: JSON.stringify(getKoboldAPITokenizationParams(str)), + dataType: 'json', + contentType: 'application/json', + success: function (data) { + if (typeof data.count === 'number') { + tokenCount = data.count; + } else { + tokenCount = apiFailureTokenCount(str); + } + }, + }); + + return tokenCount + padding; +} + +/** + * Count tokens using the AI provider's API. + * @param {string} endpoint API endpoint. + * @param {string} str String to tokenize. + * @param {number} padding Number of padding tokens. + * @returns {number} Token count with padding. + */ +function countTokensFromTextgenAPI(endpoint, str, padding) { + let tokenCount = 0; + + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -519,16 +559,15 @@ function getTextTokensFromServer(endpoint, str, model = '') { * Calls the AI provider's tokenize API to encode a string to tokens. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. - * @param {string} model Tokenizer model. * @returns {number[]} Array of token ids. */ -function getTextTokensFromRemoteAPI(endpoint, str, model = '') { +function getTextTokensFromTextgenAPI(endpoint, str) { let ids = []; jQuery.ajax({ async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getRemoteAPITokenizationParams(str)), + data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -587,8 +626,8 @@ export function getTextTokens(tokenizerType, str) { const model = getTokenizerModel(); return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } - case tokenizers.API: - return getTextTokensFromRemoteAPI('/api/tokenizers/remote/encode', str); + case tokenizers.API_TEXTGENERATIONWEBUI: + return getTextTokensFromTextgenAPI('/api/tokenizers/remote/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; From 30502ac94958f71ac040c8e0c84a2b31e5e57f94 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:26:24 -0500 Subject: [PATCH 06/15] Split up Kobold and textgenerationwebui endpoints The endpoint was one big if/else statement that did two entirely different things depending on the value of main_api. It makes more sense for those to be two separate endpoints. --- public/scripts/tokenizers.js | 14 ++-- src/endpoints/tokenizers.js | 131 ++++++++++++++++++----------------- 2 files changed, 73 insertions(+), 72 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index cf0fd3481..e0d37ddca 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -1,4 +1,4 @@ -import { characters, getAPIServerUrl, main_api, nai_settings, online_status, this_chid } from '../script.js'; +import { characters, main_api, api_server, api_server_textgenerationwebui, nai_settings, online_status, this_chid } from '../script.js'; import { power_user, registerDebugFunction } from './power-user.js'; import { chat_completion_sources, model_list, oai_settings } from './openai.js'; import { groups, selected_group } from './group-chats.js'; @@ -174,9 +174,9 @@ function callTokenizer(type, str, padding) { case tokenizers.YI: return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API_KOBOLD: - return countTokensFromKoboldAPI('/api/tokenizers/remote/encode', str, padding); + return countTokensFromKoboldAPI('/api/tokenizers/remote/kobold/count', str, padding); case tokenizers.API_TEXTGENERATIONWEBUI: - return countTokensFromTextgenAPI('/api/tokenizers/remote/encode', str, padding); + return countTokensFromTextgenAPI('/api/tokenizers/remote/textgenerationwebui/encode', str, padding); default: console.warn('Unknown tokenizer type', type); return callTokenizer(tokenizers.NONE, str, padding); @@ -403,17 +403,15 @@ function getServerTokenizationParams(str) { function getKoboldAPITokenizationParams(str) { return { text: str, - main_api: 'kobold', - url: getAPIServerUrl(), + url: api_server, }; } function getTextgenAPITokenizationParams(str) { return { text: str, - main_api: 'textgenerationwebui', api_type: textgen_settings.type, - url: getAPIServerUrl(), + url: api_server_textgenerationwebui, legacy_api: textgen_settings.legacy_api && textgen_settings.type !== MANCER, @@ -627,7 +625,7 @@ export function getTextTokens(tokenizerType, str) { return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); } case tokenizers.API_TEXTGENERATIONWEBUI: - return getTextTokensFromTextgenAPI('/api/tokenizers/remote/encode', str); + return getTextTokensFromTextgenAPI('/api/tokenizers/textgenerationwebui/encode', str); default: console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); return []; diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 8a4db7728..27ef4faf3 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -536,87 +536,90 @@ router.post('/openai/count', jsonParser, async function (req, res) { } }); -router.post('/remote/encode', jsonParser, async function (request, response) { +router.post('/remote/kobold/count', jsonParser, async function (request, response) { + if (!request.body) { + return response.sendStatus(400); + } + const text = String(request.body.text) || ''; + const baseUrl = String(request.body.url); + + try { + const args = { + method: 'POST', + body: JSON.stringify({ 'prompt': text }), + headers: { 'Content-Type': 'application/json' }, + }; + + let url = String(baseUrl).replace(/\/$/, ''); + url += '/extra/tokencount'; + + const result = await fetch(url, args); + + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); + return response.send({ error: true }); + } + + const data = await result.json(); + const count = data['value']; + return response.send({ count, ids: [] }); + } catch (error) { + console.log(error); + return response.send({ error: true }); + } +}); + +router.post('/remote/textgenerationwebui/encode', jsonParser, async function (request, response) { if (!request.body) { return response.sendStatus(400); } const text = String(request.body.text) || ''; - const api = String(request.body.main_api); const baseUrl = String(request.body.url); const legacyApi = Boolean(request.body.legacy_api); try { - if (api == 'textgenerationwebui') { - const args = { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - }; + const args = { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + }; - setAdditionalHeaders(request, args, null); + setAdditionalHeaders(request, args, null); - // Convert to string + remove trailing slash + /v1 suffix - let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); + // Convert to string + remove trailing slash + /v1 suffix + let url = String(baseUrl).replace(/\/$/, '').replace(/\/v1$/, ''); - if (legacyApi) { - url += '/v1/token-count'; - args.body = JSON.stringify({ 'prompt': text }); - } else { - switch (request.body.api_type) { - case TEXTGEN_TYPES.TABBY: - url += '/v1/token/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - case TEXTGEN_TYPES.KOBOLDCPP: - url += '/api/extra/tokencount'; - args.body = JSON.stringify({ 'prompt': text }); - break; - default: - url += '/v1/internal/encode'; - args.body = JSON.stringify({ 'text': text }); - break; - } + if (legacyApi) { + url += '/v1/token-count'; + args.body = JSON.stringify({ 'prompt': text }); + } else { + switch (request.body.api_type) { + case TEXTGEN_TYPES.TABBY: + url += '/v1/token/encode'; + args.body = JSON.stringify({ 'text': text }); + break; + case TEXTGEN_TYPES.KOBOLDCPP: + url += '/api/extra/tokencount'; + args.body = JSON.stringify({ 'prompt': text }); + break; + default: + url += '/v1/internal/encode'; + args.body = JSON.stringify({ 'text': text }); + break; } - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); - const ids = legacyApi ? [] : (data?.tokens ?? []); - - return response.send({ count, ids }); } - else if (api == 'kobold') { - const args = { - method: 'POST', - body: JSON.stringify({ 'prompt': text }), - headers: { 'Content-Type': 'application/json' }, - }; + const result = await fetch(url, args); - let url = String(baseUrl).replace(/\/$/, ''); - url += '/extra/tokencount'; - - const result = await fetch(url, args); - - if (!result.ok) { - console.log(`API returned error: ${result.status} ${result.statusText}`); - return response.send({ error: true }); - } - - const data = await result.json(); - const count = data['value']; - return response.send({ count: count, ids: [] }); - } - - else { - console.log('Unknown API', api); + if (!result.ok) { + console.log(`API returned error: ${result.status} ${result.statusText}`); return response.send({ error: true }); } + + const data = await result.json(); + const count = legacyApi ? data?.results[0]?.tokens : (data?.length ?? data?.value); + const ids = legacyApi ? [] : (data?.tokens ?? []); + + return response.send({ count, ids }); } catch (error) { console.log(error); return response.send({ error: true }); From 09465fbb972233bb290989b0eb1b9e400000a3ba Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:35:11 -0500 Subject: [PATCH 07/15] Inline most get(...)TokenizerParams calls For everything except textgenerationwebui, these params are now simple enough that it doesn't make sense for them to be in a separate function. --- public/scripts/tokenizers.js | 44 ++++++++++++++---------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index e0d37ddca..d21c1abb0 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -394,30 +394,6 @@ function getTokenCacheObject() { return tokenCache[String(chatId)]; } -function getServerTokenizationParams(str) { - return { - text: str, - }; -} - -function getKoboldAPITokenizationParams(str) { - return { - text: str, - url: api_server, - }; -} - -function getTextgenAPITokenizationParams(str) { - return { - text: str, - api_type: textgen_settings.type, - url: api_server_textgenerationwebui, - legacy_api: - textgen_settings.legacy_api && - textgen_settings.type !== MANCER, - }; -} - /** * Count tokens using the server API. * @param {string} endpoint API endpoint. @@ -432,7 +408,7 @@ function countTokensFromServer(endpoint, str, padding) { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getServerTokenizationParams(str)), + data: JSON.stringify({ text: str }), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -461,7 +437,10 @@ function countTokensFromKoboldAPI(endpoint, str, padding) { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getKoboldAPITokenizationParams(str)), + data: JSON.stringify({ + text: str, + url: api_server, + }), dataType: 'json', contentType: 'application/json', success: function (data) { @@ -476,6 +455,17 @@ function countTokensFromKoboldAPI(endpoint, str, padding) { return tokenCount + padding; } +function getTextgenAPITokenizationParams(str) { + return { + text: str, + api_type: textgen_settings.type, + url: api_server_textgenerationwebui, + legacy_api: + textgen_settings.legacy_api && + textgen_settings.type !== MANCER, + }; +} + /** * Count tokens using the AI provider's API. * @param {string} endpoint API endpoint. @@ -538,7 +528,7 @@ function getTextTokensFromServer(endpoint, str, model = '') { async: false, type: 'POST', url: endpoint, - data: JSON.stringify(getServerTokenizationParams(str)), + data: JSON.stringify({ text: str }), dataType: 'json', contentType: 'application/json', success: function (data) { From 2f2cd197cc5648db9fe28b416252a9009a4a9090 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:48:41 -0500 Subject: [PATCH 08/15] Clean up tokenizer API code Store the URLs for each tokenizer's action in one place at the top of the file, instead of in a bunch of switch-cases. The URLs for the textgen and Kobold APIs don't change and hence don't need to be function arguments. --- public/scripts/tokenizers.js | 174 +++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 79 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index d21c1abb0..1c8420616 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -34,6 +34,51 @@ export const SENTENCEPIECE_TOKENIZERS = [ //tokenizers.NERD2, ]; +const TOKENIZER_URLS = { + [tokenizers.GPT2]: { + encode: '/api/tokenizers/gpt2/encode', + decode: '/api/tokenizers/gpt2/decode', + count: '/api/tokenizers/gpt2/encode', + }, + [tokenizers.OPENAI]: { + encode: '/api/tokenizers/openai/encode', + decode: '/api/tokenizers/openai/decode', + count: '/api/tokenizers/openai/encode', + }, + [tokenizers.LLAMA]: { + encode: '/api/tokenizers/llama/encode', + decode: '/api/tokenizers/llama/decode', + count: '/api/tokenizers/llama/encode', + }, + [tokenizers.NERD]: { + encode: '/api/tokenizers/nerdstash/encode', + decode: '/api/tokenizers/nerdstash/decode', + count: '/api/tokenizers/nerdstash/encode', + }, + [tokenizers.NERD2]: { + encode: '/api/tokenizers/nerdstash_v2/encode', + decode: '/api/tokenizers/nerdstash_v2/decode', + count: '/api/tokenizers/nerdstash_v2/encode', + }, + [tokenizers.API_KOBOLD]: { + count: '/api/tokenizers/remote/kobold/count', + }, + [tokenizers.MISTRAL]: { + encode: '/api/tokenizers/mistral/encode', + decode: '/api/tokenizers/mistral/decode', + count: '/api/tokenizers/mistral/encode', + }, + [tokenizers.YI]: { + encode: '/api/tokenizers/yi/encode', + decode: '/api/tokenizers/yi/decode', + count: '/api/tokenizers/yi/encode', + }, + [tokenizers.API_TEXTGENERATIONWEBUI]: { + encode: '/api/tokenizers/remote/textgenerationwebui/encode', + count: '/api/tokenizers/remote/textgenerationwebui/encode', + }, +}; + const objectStore = new localforage.createInstance({ name: 'SillyTavern_ChatCompletions' }); let tokenCache = {}; @@ -158,28 +203,21 @@ export function getTokenizerBestMatch(forApi) { * @returns {number} Token count. */ function callTokenizer(type, str, padding) { + if (type === tokenizers.NONE) return guesstimate(str) + padding; + switch (type) { - case tokenizers.NONE: - return guesstimate(str) + padding; - case tokenizers.GPT2: - return countTokensFromServer('/api/tokenizers/gpt2/encode', str, padding); - case tokenizers.LLAMA: - return countTokensFromServer('/api/tokenizers/llama/encode', str, padding); - case tokenizers.NERD: - return countTokensFromServer('/api/tokenizers/nerdstash/encode', str, padding); - case tokenizers.NERD2: - return countTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str, padding); - case tokenizers.MISTRAL: - return countTokensFromServer('/api/tokenizers/mistral/encode', str, padding); - case tokenizers.YI: - return countTokensFromServer('/api/tokenizers/yi/encode', str, padding); case tokenizers.API_KOBOLD: - return countTokensFromKoboldAPI('/api/tokenizers/remote/kobold/count', str, padding); + return countTokensFromKoboldAPI(str, padding); case tokenizers.API_TEXTGENERATIONWEBUI: - return countTokensFromTextgenAPI('/api/tokenizers/remote/textgenerationwebui/encode', str, padding); - default: - console.warn('Unknown tokenizer type', type); - return callTokenizer(tokenizers.NONE, str, padding); + return countTokensFromTextgenAPI(str, padding); + default: { + const endpointUrl = TOKENIZER_URLS[type]?.count; + if (!endpointUrl) { + console.warn('Unknown tokenizer type', type); + return callTokenizer(tokenizers.NONE, str, padding); + } + return countTokensFromServer(endpointUrl, str, padding); + } } } @@ -425,18 +463,17 @@ function countTokensFromServer(endpoint, str, padding) { /** * Count tokens using the AI provider's API. - * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensFromKoboldAPI(endpoint, str, padding) { +function countTokensFromKoboldAPI(str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', - url: endpoint, + url: TOKENIZER_URLS[tokenizers.API_KOBOLD].count, data: JSON.stringify({ text: str, url: api_server, @@ -468,18 +505,17 @@ function getTextgenAPITokenizationParams(str) { /** * Count tokens using the AI provider's API. - * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @param {number} padding Number of padding tokens. * @returns {number} Token count with padding. */ -function countTokensFromTextgenAPI(endpoint, str, padding) { +function countTokensFromTextgenAPI(str, padding) { let tokenCount = 0; jQuery.ajax({ async: false, type: 'POST', - url: endpoint, + url: TOKENIZER_URLS[tokenizers.API_TEXTGENERATIONWEBUI].count, data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', @@ -515,14 +551,9 @@ function apiFailureTokenCount(str) { * Calls the underlying tokenizer model to encode a string to tokens. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. - * @param {string} model Tokenizer model. * @returns {number[]} Array of token ids. */ -function getTextTokensFromServer(endpoint, str, model = '') { - if (model) { - endpoint += `?model=${model}`; - } - +function getTextTokensFromServer(endpoint, str) { let ids = []; jQuery.ajax({ async: false, @@ -545,16 +576,15 @@ function getTextTokensFromServer(endpoint, str, model = '') { /** * Calls the AI provider's tokenize API to encode a string to tokens. - * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. * @returns {number[]} Array of token ids. */ -function getTextTokensFromTextgenAPI(endpoint, str) { +function getTextTokensFromTextgenAPI(str) { let ids = []; jQuery.ajax({ async: false, type: 'POST', - url: endpoint, + url: TOKENIZER_URLS[tokenizers.API_TEXTGENERATIONWEBUI].encode, data: JSON.stringify(getTextgenAPITokenizationParams(str)), dataType: 'json', contentType: 'application/json', @@ -570,11 +600,7 @@ function getTextTokensFromTextgenAPI(endpoint, str) { * @param {string} endpoint API endpoint. * @param {number[]} ids Array of token ids */ -function decodeTextTokensFromServer(endpoint, ids, model = '') { - if (model) { - endpoint += `?model=${model}`; - } - +function decodeTextTokensFromServer(endpoint, ids) { let text = ''; jQuery.ajax({ async: false, @@ -598,27 +624,24 @@ function decodeTextTokensFromServer(endpoint, ids, model = '') { */ export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { - case tokenizers.GPT2: - return getTextTokensFromServer('/api/tokenizers/gpt2/encode', str); - case tokenizers.LLAMA: - return getTextTokensFromServer('/api/tokenizers/llama/encode', str); - case tokenizers.NERD: - return getTextTokensFromServer('/api/tokenizers/nerdstash/encode', str); - case tokenizers.NERD2: - return getTextTokensFromServer('/api/tokenizers/nerdstash_v2/encode', str); - case tokenizers.MISTRAL: - return getTextTokensFromServer('/api/tokenizers/mistral/encode', str); - case tokenizers.YI: - return getTextTokensFromServer('/api/tokenizers/yi/encode', str); - case tokenizers.OPENAI: { - const model = getTokenizerModel(); - return getTextTokensFromServer('/api/tokenizers/openai/encode', str, model); - } case tokenizers.API_TEXTGENERATIONWEBUI: - return getTextTokensFromTextgenAPI('/api/tokenizers/textgenerationwebui/encode', str); - default: - console.warn('Calling getTextTokens with unsupported tokenizer type', tokenizerType); - return []; + return getTextTokensFromTextgenAPI(str); + default: { + const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; + if (!tokenizerEndpoints) { + console.warn('Unknown tokenizer type', tokenizerType); + return []; + } + let endpointUrl = tokenizerEndpoints.encode; + if (!endpointUrl) { + console.warn('This tokenizer type does not support encoding', tokenizerType); + return []; + } + if (tokenizerType === tokenizers.OPENAI) { + endpointUrl += `?model=${getTokenizerModel()}`; + } + return getTextTokensFromServer(endpointUrl, str); + } } } @@ -628,27 +651,20 @@ export function getTextTokens(tokenizerType, str) { * @param {number[]} ids Array of token ids */ export function decodeTextTokens(tokenizerType, ids) { - switch (tokenizerType) { - case tokenizers.GPT2: - return decodeTextTokensFromServer('/api/tokenizers/gpt2/decode', ids); - case tokenizers.LLAMA: - return decodeTextTokensFromServer('/api/tokenizers/llama/decode', ids); - case tokenizers.NERD: - return decodeTextTokensFromServer('/api/tokenizers/nerdstash/decode', ids); - case tokenizers.NERD2: - return decodeTextTokensFromServer('/api/tokenizers/nerdstash_v2/decode', ids); - case tokenizers.MISTRAL: - return decodeTextTokensFromServer('/api/tokenizers/mistral/decode', ids); - case tokenizers.YI: - return decodeTextTokensFromServer('/api/tokenizers/yi/decode', ids); - case tokenizers.OPENAI: { - const model = getTokenizerModel(); - return decodeTextTokensFromServer('/api/tokenizers/openai/decode', ids, model); - } - default: - console.warn('Calling decodeTextTokens with unsupported tokenizer type', tokenizerType); - return ''; + const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; + if (!tokenizerEndpoints) { + console.warn('Unknown tokenizer type', tokenizerType); + return []; } + let endpointUrl = tokenizerEndpoints.decode; + if (!endpointUrl) { + console.warn('This tokenizer type does not support decoding', tokenizerType); + return []; + } + if (tokenizerType === tokenizers.OPENAI) { + endpointUrl += `?model=${getTokenizerModel()}`; + } + return decodeTextTokensFromServer(endpointUrl, ids); } export async function initTokenizers() { From 014416546ce0ef01c9fd8ba43ac9ab7cedff09bf Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:53:16 -0500 Subject: [PATCH 09/15] Add padding once in getTokenCount This means we don't have to pass the "padding" parameter into every function so they can add the padding themselves--we can do it in just one place instead. --- public/scripts/tokenizers.js | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 1c8420616..6c406531f 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -199,24 +199,23 @@ export function getTokenizerBestMatch(forApi) { * Calls the underlying tokenizer model to the token count for a string. * @param {number} type Tokenizer type. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. * @returns {number} Token count. */ -function callTokenizer(type, str, padding) { - if (type === tokenizers.NONE) return guesstimate(str) + padding; +function callTokenizer(type, str) { + if (type === tokenizers.NONE) return guesstimate(str); switch (type) { case tokenizers.API_KOBOLD: - return countTokensFromKoboldAPI(str, padding); + return countTokensFromKoboldAPI(str); case tokenizers.API_TEXTGENERATIONWEBUI: - return countTokensFromTextgenAPI(str, padding); + return countTokensFromTextgenAPI(str); default: { const endpointUrl = TOKENIZER_URLS[type]?.count; if (!endpointUrl) { console.warn('Unknown tokenizer type', type); - return callTokenizer(tokenizers.NONE, str, padding); + return callTokenizer(tokenizers.NONE, str); } - return countTokensFromServer(endpointUrl, str, padding); + return countTokensFromServer(endpointUrl, str); } } } @@ -260,7 +259,7 @@ export function getTokenCount(str, padding = undefined) { return cacheObject[cacheKey]; } - const result = callTokenizer(tokenizerType, str, padding); + const result = callTokenizer(tokenizerType, str) + padding; if (isNaN(result)) { console.warn('Token count calculation returned NaN'); @@ -436,10 +435,9 @@ function getTokenCacheObject() { * Count tokens using the server API. * @param {string} endpoint API endpoint. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. - * @returns {number} Token count with padding. + * @returns {number} Token count. */ -function countTokensFromServer(endpoint, str, padding) { +function countTokensFromServer(endpoint, str) { let tokenCount = 0; jQuery.ajax({ @@ -458,16 +456,15 @@ function countTokensFromServer(endpoint, str, padding) { }, }); - return tokenCount + padding; + return tokenCount; } /** * Count tokens using the AI provider's API. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. - * @returns {number} Token count with padding. + * @returns {number} Token count. */ -function countTokensFromKoboldAPI(str, padding) { +function countTokensFromKoboldAPI(str) { let tokenCount = 0; jQuery.ajax({ @@ -489,7 +486,7 @@ function countTokensFromKoboldAPI(str, padding) { }, }); - return tokenCount + padding; + return tokenCount; } function getTextgenAPITokenizationParams(str) { @@ -506,10 +503,9 @@ function getTextgenAPITokenizationParams(str) { /** * Count tokens using the AI provider's API. * @param {string} str String to tokenize. - * @param {number} padding Number of padding tokens. - * @returns {number} Token count with padding. + * @returns {number} Token count. */ -function countTokensFromTextgenAPI(str, padding) { +function countTokensFromTextgenAPI(str) { let tokenCount = 0; jQuery.ajax({ @@ -528,7 +524,7 @@ function countTokensFromTextgenAPI(str, padding) { }, }); - return tokenCount + padding; + return tokenCount; } function apiFailureTokenCount(str) { From 499d158c11136c8649c7bb3eb75dcdd31de2f54c Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 20:55:34 -0500 Subject: [PATCH 10/15] Remove last usage of getAPIServerUrl Now that we're not using this in the tokenizers code, we can remove it. --- public/script.js | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/public/script.js b/public/script.js index 81bed0af0..da754aa5f 100644 --- a/public/script.js +++ b/public/script.js @@ -871,7 +871,7 @@ async function getStatusKobold() { const url = '/getstatus'; - let endpoint = getAPIServerUrl(); + let endpoint = api_server; if (!endpoint) { console.warn('No endpoint for status check'); @@ -919,7 +919,9 @@ async function getStatusKobold() { async function getStatusTextgen() { const url = '/api/textgenerationwebui/status'; - let endpoint = getAPIServerUrl(); + let endpoint = textgen_settings.type === MANCER ? + MANCER_SERVER : + api_server_textgenerationwebui; if (!endpoint) { console.warn('No endpoint for status check'); @@ -999,23 +1001,6 @@ export function resultCheckStatus() { stopStatusLoading(); } -// TODO(valadaptive): remove the usage of this function in the tokenizers code, then remove the function entirely -export function getAPIServerUrl() { - if (main_api == 'textgenerationwebui') { - if (textgen_settings.type === MANCER) { - return MANCER_SERVER; - } - - return api_server_textgenerationwebui; - } - - if (main_api == 'kobold') { - return api_server; - } - - return ''; -} - export async function selectCharacterById(id) { if (characters[id] == undefined) { return; From 55976e61a3b150067f90de4aaa05b2b7fc097077 Mon Sep 17 00:00:00 2001 From: valadaptive Date: Sat, 9 Dec 2023 23:57:21 -0500 Subject: [PATCH 11/15] Fix tokenizer override I searched for all users of tokenizers.API, but missed that the menu converts the numerical select values directly to enum values. I've used the special tokenizer value 98 to represent "the tokenizer API for whichever backend we're currently using". --- public/index.html | 2 +- public/scripts/tokenizers.js | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/public/index.html b/public/index.html index da5b6e0cb..8549d6194 100644 --- a/public/index.html +++ b/public/index.html @@ -2438,7 +2438,7 @@ - +
diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 6c406531f..5f7fdc01a 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -22,6 +22,7 @@ export const tokenizers = { MISTRAL: 7, YI: 8, API_TEXTGENERATIONWEBUI: 9, + API_CURRENT: 98, BEST_MATCH: 99, }; @@ -195,6 +196,19 @@ export function getTokenizerBestMatch(forApi) { return tokenizers.NONE; } +// Get the current remote tokenizer API based on the current text generation API. +function currentRemoteTokenizerAPI() { + switch (main_api) { + case 'kobold': + case 'koboldhorde': + return tokenizers.API_KOBOLD; + case 'textgenerationwebui': + return tokenizers.API_TEXTGENERATIONWEBUI; + default: + return tokenizers.NONE; + } +} + /** * Calls the underlying tokenizer model to the token count for a string. * @param {number} type Tokenizer type. @@ -205,6 +219,8 @@ function callTokenizer(type, str) { if (type === tokenizers.NONE) return guesstimate(str); switch (type) { + case tokenizers.API_CURRENT: + return callTokenizer(currentRemoteTokenizerAPI(), str); case tokenizers.API_KOBOLD: return countTokensFromKoboldAPI(str); case tokenizers.API_TEXTGENERATIONWEBUI: @@ -620,6 +636,8 @@ function decodeTextTokensFromServer(endpoint, ids) { */ export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { + case tokenizers.API_CURRENT: + return callTokenizer(currentRemoteTokenizerAPI(), str); case tokenizers.API_TEXTGENERATIONWEBUI: return getTextTokensFromTextgenAPI(str); default: { @@ -647,6 +665,10 @@ export function getTextTokens(tokenizerType, str) { * @param {number[]} ids Array of token ids */ export function decodeTextTokens(tokenizerType, ids) { + // Currently, neither remote API can decode, but this may change in the future. Put this guard here to be safe + if (tokenizerType === tokenizers.API_CURRENT) { + return decodeTextTokens(tokenizers.NONE); + } const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { console.warn('Unknown tokenizer type', tokenizerType); From 6e5eea5dba99d1046c473fb341a9dfc659458992 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 15:56:38 +0200 Subject: [PATCH 12/15] Unbreak previously selected API tokenizer in dropdown --- public/index.html | 2 +- public/scripts/tokenizers.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/index.html b/public/index.html index 8549d6194..da5b6e0cb 100644 --- a/public/index.html +++ b/public/index.html @@ -2438,7 +2438,7 @@ - +
diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 5f7fdc01a..87d72c1a5 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -18,11 +18,11 @@ export const tokenizers = { LLAMA: 3, NERD: 4, NERD2: 5, - API_KOBOLD: 6, + API_CURRENT: 6, MISTRAL: 7, YI: 8, API_TEXTGENERATIONWEBUI: 9, - API_CURRENT: 98, + API_KOBOLD: 10, BEST_MATCH: 99, }; From 6957d9e7cf73e07ed5c59722abb0c61b9484057e Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:03:25 +0200 Subject: [PATCH 13/15] Fix display names of Best match tokenizers --- public/scripts/tokenizers.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 87d72c1a5..3d8357e13 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -139,7 +139,18 @@ export function getFriendlyTokenizerName(forApi) { if (forApi !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) { tokenizerId = getTokenizerBestMatch(forApi); - tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text(); + + switch (tokenizerId) { + case tokenizers.API_KOBOLD: + tokenizerName = 'API (KoboldAI Classic)'; + break; + case tokenizers.API_TEXTGENERATIONWEBUI: + tokenizerName = 'API (Text Completion)'; + break; + default: + tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text(); + break; + } } tokenizerName = forApi == 'openai' From f54bf99006e49625cbedd60c1386d472b2b92632 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:09:00 +0200 Subject: [PATCH 14/15] Fix token ids not displaying in "API_CURRENT" mode for TextGen --- public/scripts/tokenizers.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 3d8357e13..7db5d0887 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -648,7 +648,7 @@ function decodeTextTokensFromServer(endpoint, ids) { export function getTextTokens(tokenizerType, str) { switch (tokenizerType) { case tokenizers.API_CURRENT: - return callTokenizer(currentRemoteTokenizerAPI(), str); + return getTextTokens(currentRemoteTokenizerAPI(), str); case tokenizers.API_TEXTGENERATIONWEBUI: return getTextTokensFromTextgenAPI(str); default: { @@ -678,7 +678,7 @@ export function getTextTokens(tokenizerType, str) { export function decodeTextTokens(tokenizerType, ids) { // Currently, neither remote API can decode, but this may change in the future. Put this guard here to be safe if (tokenizerType === tokenizers.API_CURRENT) { - return decodeTextTokens(tokenizers.NONE); + return decodeTextTokens(tokenizers.NONE, ids); } const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { From 9acef0fae615214991453cc4028c2601227c8854 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 10 Dec 2023 16:21:06 +0200 Subject: [PATCH 15/15] Horde doesn't support API tokenizers --- public/scripts/tokenizers.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 7db5d0887..bef54b791 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -211,7 +211,6 @@ export function getTokenizerBestMatch(forApi) { function currentRemoteTokenizerAPI() { switch (main_api) { case 'kobold': - case 'koboldhorde': return tokenizers.API_KOBOLD; case 'textgenerationwebui': return tokenizers.API_TEXTGENERATIONWEBUI; @@ -240,7 +239,7 @@ function callTokenizer(type, str) { const endpointUrl = TOKENIZER_URLS[type]?.count; if (!endpointUrl) { console.warn('Unknown tokenizer type', type); - return callTokenizer(tokenizers.NONE, str); + return apiFailureTokenCount(str); } return countTokensFromServer(endpointUrl, str); } @@ -654,11 +653,13 @@ export function getTextTokens(tokenizerType, str) { default: { const tokenizerEndpoints = TOKENIZER_URLS[tokenizerType]; if (!tokenizerEndpoints) { + apiFailureTokenCount(str); console.warn('Unknown tokenizer type', tokenizerType); return []; } let endpointUrl = tokenizerEndpoints.encode; if (!endpointUrl) { + apiFailureTokenCount(str); console.warn('This tokenizer type does not support encoding', tokenizerType); return []; }