diff --git a/public/jsconfig.json b/public/jsconfig.json index 1fe7ce877..0751d9915 100644 --- a/public/jsconfig.json +++ b/public/jsconfig.json @@ -21,7 +21,8 @@ "showdown-katex", "droll", "handlebars", - "highlight.js" + "highlight.js", + "localforage" ] } } diff --git a/public/script.js b/public/script.js index 7ffcef682..3389cc096 100644 --- a/public/script.js +++ b/public/script.js @@ -1,7 +1,5 @@ import { humanizedDateTime, favsToHotswap, getMessageTimeStamp, dragElement, isMobile, } from "./scripts/RossAscends-mods.js"; import { userStatsHandler, statMesProcess } from './scripts/stats.js'; -import { encode } from "../lib/gpt-2-3-tokenizer/mod.js"; -import { GPT3BrowserTokenizer } from "../lib/gpt-3-tokenizer/gpt3-tokenizer.js"; import { generateKoboldWithStreaming, kai_settings, @@ -65,7 +63,6 @@ import { fixMarkdown, power_user, pygmalion_options, - tokenizers, persona_description_positions, loadMovingUIState, getCustomStoppingStrings, @@ -86,9 +83,7 @@ import { oai_settings, is_get_status_openai, openai_messages_count, - getTokenCountOpenAI, chat_completion_sources, - getTokenizerModel, getChatCompletionModel, } from "./scripts/openai.js"; @@ -172,6 +167,7 @@ import { autoSelectInstructPreset, } from "./scripts/instruct-mode.js"; import { applyLocale } from "./scripts/i18n.js"; +import { getTokenCount, getTokenizerModel, saveTokenCache } from "./scripts/tokenizers.js"; //exporting functions and vars for mods export { @@ -208,7 +204,6 @@ export { setGenerationProgress, updateChatMetadata, scrollChatToBottom, - getTokenCount, isStreamingEnabled, getThumbnailUrl, getStoppingStrings, @@ -292,7 +287,6 @@ eventSource.on(event_types.CHAT_CHANGED, setChatLockedPersona); eventSource.on(event_types.MESSAGE_RECEIVED, processExtensionHelpers); eventSource.on(event_types.MESSAGE_SENT, processExtensionHelpers); -const gpt3 = new GPT3BrowserTokenizer({ type: 'gpt3' }); hljs.addPlugin({ "before:highlightElement": ({ el }) => { el.textContent = el.innerText } }); // Markdown converter @@ -535,123 +529,6 @@ async function getClientVersion() { } } -function getTokenizerBestMatch() { - if (main_api === 'novel') { - if (nai_settings.model_novel.includes('krake') || nai_settings.model_novel.includes('euterpe')) { - return tokenizers.CLASSIC; - } - if (nai_settings.model_novel.includes('clio')) { - return tokenizers.NERD; - } - if (nai_settings.model_novel.includes('kayra')) { - return tokenizers.NERD2; - } - } - if (main_api === 'kobold' || main_api === 'textgenerationwebui' || main_api === 'koboldhorde') { - return tokenizers.LLAMA; - } - - return power_user.NONE; -} - -/** - * Gets the token count for a string using the current model tokenizer. - * @param {string} str String to tokenize - * @param {number | undefined} padding Optional padding tokens. Defaults to 0. - * @returns {number} Token count. - */ -function getTokenCount(str, padding = undefined) { - if (typeof str !== 'string' || !str?.length) { - return 0; - } - - let tokenizerType = power_user.tokenizer; - - if (main_api === 'openai') { - if (padding === power_user.token_padding) { - // For main "shadow" prompt building - tokenizerType = tokenizers.NONE; - } else { - // For extensions and WI - return getTokenCountOpenAI(str); - } - } - - if (tokenizerType === tokenizers.BEST_MATCH) { - tokenizerType = getTokenizerBestMatch(); - } - - if (padding === undefined) { - padding = 0; - } - - switch (tokenizerType) { - case tokenizers.NONE: - return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding; - case tokenizers.GPT3: - return gpt3.encode(str).bpe.length + padding; - case tokenizers.CLASSIC: - return encode(str).length + padding; - case tokenizers.LLAMA: - return countTokensRemote('/tokenize_llama', str, padding); - case tokenizers.NERD: - return countTokensRemote('/tokenize_nerdstash', str, padding); - case tokenizers.NERD2: - return countTokensRemote('/tokenize_nerdstash_v2', str, padding); - case tokenizers.API: - return countTokensRemote('/tokenize_via_api', str, padding); - default: - console.warn("Unknown tokenizer type", tokenizerType); - return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding; - } -} - -function countTokensRemote(endpoint, str, padding) { - let tokenCount = 0; - jQuery.ajax({ - async: false, - type: 'POST', - url: endpoint, - data: JSON.stringify({ text: str }), - dataType: "json", - contentType: "application/json", - success: function (data) { - tokenCount = data.count; - } - }); - return tokenCount + padding; -} - -function getTextTokensRemote(endpoint, str) { - let ids = []; - jQuery.ajax({ - async: false, - type: 'POST', - url: endpoint, - data: JSON.stringify({ text: str }), - dataType: "json", - contentType: "application/json", - success: function (data) { - ids = data.ids; - } - }); - return ids; -} - -export function getTextTokens(tokenizerType, str) { - switch (tokenizerType) { - case tokenizers.LLAMA: - return getTextTokensRemote('/tokenize_llama', str); - case tokenizers.NERD: - return getTextTokensRemote('/tokenize_nerdstash', str); - case tokenizers.NERD2: - return getTextTokensRemote('/tokenize_nerdstash_v2', str); - default: - console.warn("Calling getTextTokens with unsupported tokenizer type", tokenizerType); - return []; - } -} - function reloadMarkdownProcessor(render_formulas = false) { if (render_formulas) { converter = new showdown.Converter({ @@ -699,7 +576,6 @@ function getCurrentChatId() { } } -export const CHARACTERS_PER_TOKEN_RATIO = 3.35; const talkativeness_default = 0.5; var is_advanced_char_open = false; @@ -6132,6 +6008,9 @@ export async function saveChatConditional() { else { await saveChat(); } + + // Save token cache to IndexedDB storage + await saveTokenCache(); } async function importCharacterChat(formData) { diff --git a/public/scripts/RossAscends-mods.js b/public/scripts/RossAscends-mods.js index 222451067..1f7cdcfe9 100644 --- a/public/scripts/RossAscends-mods.js +++ b/public/scripts/RossAscends-mods.js @@ -2,15 +2,12 @@ esversion: 6 import { Generate, - this_chid, characters, online_status, main_api, api_server, api_server_textgenerationwebui, is_send_press, - getTokenCount, - menu_type, max_context, saveSettingsDebounced, active_group, @@ -35,6 +32,7 @@ import { } from "./secrets.js"; import { debounce, delay, getStringHash, waitUntilCondition } from "./utils.js"; import { chat_completion_sources, oai_settings } from "./openai.js"; +import { getTokenCount } from "./tokenizers.js"; var RPanelPin = document.getElementById("rm_button_panel_pin"); var LPanelPin = document.getElementById("lm_button_panel_pin"); diff --git a/public/scripts/authors-note.js b/public/scripts/authors-note.js index b58d6f947..03a2d1d1c 100644 --- a/public/scripts/authors-note.js +++ b/public/scripts/authors-note.js @@ -2,7 +2,6 @@ import { chat_metadata, eventSource, event_types, - getTokenCount, saveSettingsDebounced, this_chid, } from "../script.js"; @@ -10,6 +9,7 @@ import { selected_group } from "./group-chats.js"; import { extension_settings, getContext, saveMetadataDebounced } from "./extensions.js"; import { registerSlashCommand } from "./slash-commands.js"; import { getCharaFilename, debounce, waitUntilCondition, delay } from "./utils.js"; +import { getTokenCount } from "./tokenizers.js"; export { MODULE_NAME as NOTE_MODULE_NAME }; const MODULE_NAME = '2_floating_prompt'; // <= Deliberate, for sorting lower than memory diff --git a/public/scripts/extensions/infinity-context/index.js b/public/scripts/extensions/infinity-context/index.js index 7f3aba368..b06309e5b 100644 --- a/public/scripts/extensions/infinity-context/index.js +++ b/public/scripts/extensions/infinity-context/index.js @@ -1,6 +1,7 @@ -import { saveSettingsDebounced, getCurrentChatId, system_message_types, extension_prompt_types, eventSource, event_types, getRequestHeaders, CHARACTERS_PER_TOKEN_RATIO, substituteParams, max_context, } from "../../../script.js"; +import { saveSettingsDebounced, getCurrentChatId, system_message_types, extension_prompt_types, eventSource, event_types, getRequestHeaders, substituteParams, } from "../../../script.js"; import { humanizedDateTime } from "../../RossAscends-mods.js"; import { getApiUrl, extension_settings, getContext, doExtrasFetch } from "../../extensions.js"; +import { CHARACTERS_PER_TOKEN_RATIO } from "../../tokenizers.js"; import { getFileText, onlyUnique, splitRecursive } from "../../utils.js"; export { MODULE_NAME }; diff --git a/public/scripts/extensions/token-counter/index.js b/public/scripts/extensions/token-counter/index.js index 430fb5771..bdc8e19f0 100644 --- a/public/scripts/extensions/token-counter/index.js +++ b/public/scripts/extensions/token-counter/index.js @@ -1,6 +1,6 @@ import { callPopup, main_api } from "../../../script.js"; import { getContext } from "../../extensions.js"; -import { getTokenizerModel } from "../../openai.js"; +import { getTokenizerModel } from "../../tokenizers.js"; async function doTokenCounter() { const selectedTokenizer = main_api == 'openai' diff --git a/public/scripts/nai-settings.js b/public/scripts/nai-settings.js index 6d5a9d0bc..41fc87791 100644 --- a/public/scripts/nai-settings.js +++ b/public/scripts/nai-settings.js @@ -1,14 +1,14 @@ import { getRequestHeaders, getStoppingStrings, - getTextTokens, max_context, novelai_setting_names, saveSettingsDebounced, setGenerationParamsFromPreset } from "../script.js"; import { getCfgPrompt } from "./extensions/cfg/util.js"; -import { MAX_CONTEXT_DEFAULT, tokenizers } from "./power-user.js"; +import { MAX_CONTEXT_DEFAULT } from "./power-user.js"; +import { getTextTokens, tokenizers } from "./tokenizers.js"; import { getSortableDelay, getStringHash, diff --git a/public/scripts/openai.js b/public/scripts/openai.js index c01b1cf26..aef41beb8 100644 --- a/public/scripts/openai.js +++ b/public/scripts/openai.js @@ -48,10 +48,10 @@ import { delay, download, getFileText, getSortableDelay, - getStringHash, parseJsonFile, stringFormat, } from "./utils.js"; +import { countTokensOpenAI } from "./tokenizers.js"; export { is_get_status_openai, @@ -67,7 +67,6 @@ export { sendOpenAIRequest, setOpenAIOnlineStatus, getChatCompletionModel, - countTokens, TokenHandler, IdentifierNotFoundError, Message, @@ -124,40 +123,6 @@ const openrouter_website_model = 'OR_Website'; let biasCache = undefined; let model_list = []; -const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" }); - -let tokenCache = {}; - -async function loadTokenCache() { - try { - console.debug('Chat Completions: loading token cache') - tokenCache = await objectStore.getItem('tokenCache') || {}; - } catch (e) { - console.log('Chat Completions: unable to load token cache, using default value', e); - tokenCache = {}; - } -} - -async function saveTokenCache() { - try { - console.debug('Chat Completions: saving token cache') - await objectStore.setItem('tokenCache', tokenCache); - } catch (e) { - console.log('Chat Completions: unable to save token cache', e); - } -} - -async function resetTokenCache() { - try { - console.debug('Chat Completions: resetting token cache'); - Object.keys(tokenCache).forEach(key => delete tokenCache[key]); - await objectStore.removeItem('tokenCache'); - } catch (e) { - console.log('Chat Completions: unable to reset token cache', e); - } -} - -window['resetTokenCache'] = resetTokenCache; export const chat_completion_sources = { OPENAI: 'openai', @@ -268,10 +233,6 @@ const oai_settings = { let openai_setting_names; let openai_settings; -export function getTokenCountOpenAI(text) { - const message = { role: 'system', content: text }; - return countTokens(message, true); -} let promptManager = null; @@ -871,8 +832,6 @@ function prepareOpenAIMessages({ const chat = chatCompletion.getChat(); openai_messages_count = chat.filter(x => x?.role === "user" || x?.role === "assistant")?.length || 0; - // Save token cache to IndexedDB storage (async, no need to await) - saveTokenCache(); return [chat, promptManager.tokenHandler.counts]; } @@ -1410,68 +1369,8 @@ class TokenHandler { } } -function countTokens(messages, full = false) { - let shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; - let chatId = 'undefined'; - try { - if (selected_group) { - chatId = groups.find(x => x.id == selected_group)?.chat_id; - } - else if (this_chid) { - chatId = characters[this_chid].chat; - } - } catch { - console.log('No character / group selected. Using default cache item'); - } - - if (typeof tokenCache[chatId] !== 'object') { - tokenCache[chatId] = {}; - } - - if (!Array.isArray(messages)) { - messages = [messages]; - } - - let token_count = -1; - - for (const message of messages) { - const model = getTokenizerModel(); - - if (model === 'claude' || shouldTokenizeAI21) { - full = true; - } - - const hash = getStringHash(JSON.stringify(message)); - const cacheKey = `${model}-${hash}`; - const cachedCount = tokenCache[chatId][cacheKey]; - - if (typeof cachedCount === 'number') { - token_count += cachedCount; - } - - else { - jQuery.ajax({ - async: false, - type: 'POST', // - url: shouldTokenizeAI21 ? '/tokenize_ai21' : `/tokenize_openai?model=${model}`, - data: JSON.stringify([message]), - dataType: "json", - contentType: "application/json", - success: function (data) { - token_count += Number(data.token_count); - tokenCache[chatId][cacheKey] = Number(data.token_count); - } - }); - } - } - - if (!full) token_count -= 2; - - return token_count; -} - -const tokenHandler = new TokenHandler(countTokens); +const tokenHandler = new TokenHandler(countTokensOpenAI); // Thrown by ChatCompletion when a requested prompt couldn't be found. class IdentifierNotFoundError extends Error { @@ -1908,62 +1807,6 @@ class ChatCompletion { } } -export function getTokenizerModel() { - // OpenAI models always provide their own tokenizer - if (oai_settings.chat_completion_source == chat_completion_sources.OPENAI) { - return oai_settings.openai_model; - } - - const turboTokenizer = 'gpt-3.5-turbo'; - const gpt4Tokenizer = 'gpt-4'; - const gpt2Tokenizer = 'gpt2'; - const claudeTokenizer = 'claude'; - - // Assuming no one would use it for different models.. right? - if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { - return gpt4Tokenizer; - } - - // Select correct tokenizer for WindowAI proxies - if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI && oai_settings.windowai_model) { - if (oai_settings.windowai_model.includes('gpt-4')) { - return gpt4Tokenizer; - } - else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) { - return turboTokenizer; - } - else if (oai_settings.windowai_model.includes('claude')) { - return claudeTokenizer; - } - else if (oai_settings.windowai_model.includes('GPT-NeoXT')) { - return gpt2Tokenizer; - } - } - - // And for OpenRouter (if not a site model, then it's impossible to determine the tokenizer) - if (oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER && oai_settings.openrouter_model) { - if (oai_settings.openrouter_model.includes('gpt-4')) { - return gpt4Tokenizer; - } - else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) { - return turboTokenizer; - } - else if (oai_settings.openrouter_model.includes('claude')) { - return claudeTokenizer; - } - else if (oai_settings.openrouter_model.includes('GPT-NeoXT')) { - return gpt2Tokenizer; - } - } - - if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { - return claudeTokenizer; - } - - // Default to Turbo 3.5 - return turboTokenizer; -} - function loadOpenAISettings(data, settings) { openai_setting_names = data.openai_setting_names; openai_settings = data.openai_settings; @@ -3036,8 +2879,6 @@ function updateScaleForm() { } $(document).ready(async function () { - await loadTokenCache(); - $('#test_api_button').on('click', testApiConnection); $('#scale-alt').on('change', function () { diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js index 9ccbd7db9..a0365cc0f 100644 --- a/public/scripts/power-user.js +++ b/public/scripts/power-user.js @@ -23,6 +23,7 @@ import { import { loadInstructMode } from "./instruct-mode.js"; import { registerSlashCommand } from "./slash-commands.js"; +import { tokenizers } from "./tokenizers.js"; import { delay } from "./utils.js"; @@ -35,7 +36,6 @@ export { fixMarkdown, power_user, pygmalion_options, - tokenizers, send_on_enter_options, }; @@ -63,17 +63,6 @@ const pygmalion_options = { ENABLED: 1, } -const tokenizers = { - NONE: 0, - GPT3: 1, - CLASSIC: 2, - LLAMA: 3, - NERD: 4, - NERD2: 5, - API: 6, - BEST_MATCH: 99, -} - const send_on_enter_options = { DISABLED: -1, AUTO: 0, diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js new file mode 100644 index 000000000..2714c12ef --- /dev/null +++ b/public/scripts/tokenizers.js @@ -0,0 +1,342 @@ +import { characters, main_api, nai_settings, this_chid } from "../script.js"; +import { power_user } from "./power-user.js"; +import { encode } from "../lib/gpt-2-3-tokenizer/mod.js"; +import { GPT3BrowserTokenizer } from "../lib/gpt-3-tokenizer/gpt3-tokenizer.js"; +import { chat_completion_sources, oai_settings } from "./openai.js"; +import { groups, selected_group } from "./group-chats.js"; +import { getStringHash } from "./utils.js"; + +export const CHARACTERS_PER_TOKEN_RATIO = 3.35; + +export const tokenizers = { + NONE: 0, + GPT3: 1, + CLASSIC: 2, + LLAMA: 3, + NERD: 4, + NERD2: 5, + API: 6, + BEST_MATCH: 99, +}; + +const objectStore = new localforage.createInstance({ name: "SillyTavern_ChatCompletions" }); +const gpt3 = new GPT3BrowserTokenizer({ type: 'gpt3' }); + +let tokenCache = {}; + +async function loadTokenCache() { + try { + console.debug('Chat Completions: loading token cache') + tokenCache = await objectStore.getItem('tokenCache') || {}; + } catch (e) { + console.log('Chat Completions: unable to load token cache, using default value', e); + tokenCache = {}; + } +} + +export async function saveTokenCache() { + try { + console.debug('Chat Completions: saving token cache') + await objectStore.setItem('tokenCache', tokenCache); + } catch (e) { + console.log('Chat Completions: unable to save token cache', e); + } +} + +async function resetTokenCache() { + try { + console.debug('Chat Completions: resetting token cache'); + Object.keys(tokenCache).forEach(key => delete tokenCache[key]); + await objectStore.removeItem('tokenCache'); + } catch (e) { + console.log('Chat Completions: unable to reset token cache', e); + } +} + +window['resetTokenCache'] = resetTokenCache; + +function getTokenizerBestMatch() { + if (main_api === 'novel') { + if (nai_settings.model_novel.includes('krake') || nai_settings.model_novel.includes('euterpe')) { + return tokenizers.CLASSIC; + } + if (nai_settings.model_novel.includes('clio')) { + return tokenizers.NERD; + } + if (nai_settings.model_novel.includes('kayra')) { + return tokenizers.NERD2; + } + } + if (main_api === 'kobold' || main_api === 'textgenerationwebui' || main_api === 'koboldhorde') { + return tokenizers.LLAMA; + } + + return tokenizers.NONE; +} + +/** + * Gets the token count for a string using the current model tokenizer. + * @param {string} str String to tokenize + * @param {number | undefined} padding Optional padding tokens. Defaults to 0. + * @returns {number} Token count. + */ +export function getTokenCount(str, padding = undefined) { + /** + * Calculates the token count for a string. + * @param {number} [type] Tokenizer type. + * @returns {number} Token count. + */ + function calculate(type) { + switch (type) { + case tokenizers.NONE: + return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding; + case tokenizers.GPT3: + return gpt3.encode(str).bpe.length + padding; + case tokenizers.CLASSIC: + return encode(str).length + padding; + case tokenizers.LLAMA: + return countTokensRemote('/tokenize_llama', str, padding); + case tokenizers.NERD: + return countTokensRemote('/tokenize_nerdstash', str, padding); + case tokenizers.NERD2: + return countTokensRemote('/tokenize_nerdstash_v2', str, padding); + case tokenizers.API: + return countTokensRemote('/tokenize_via_api', str, padding); + default: + console.warn("Unknown tokenizer type", type); + return calculate(tokenizers.NONE); + } + } + + if (typeof str !== 'string' || !str?.length) { + return 0; + } + + let tokenizerType = power_user.tokenizer; + + if (main_api === 'openai') { + if (padding === power_user.token_padding) { + // For main "shadow" prompt building + tokenizerType = tokenizers.NONE; + } else { + // For extensions and WI + return counterWrapperOpenAI(str); + } + } + + if (tokenizerType === tokenizers.BEST_MATCH) { + tokenizerType = getTokenizerBestMatch(); + } + + if (padding === undefined) { + padding = 0; + } + + const cacheObject = getTokenCacheObject(); + const hash = getStringHash(str); + const cacheKey = `${tokenizerType}-${hash}`; + + if (typeof cacheObject[cacheKey] === 'number') { + return cacheObject[cacheKey]; + } + + const result = calculate(tokenizerType); + + if (isNaN(result)) { + console.warn("Token count calculation returned NaN"); + return 0; + } + + cacheObject[cacheKey] = result; + return result; +} + +/** + * Gets the token count for a string using the OpenAI tokenizer. + * @param {string} text Text to tokenize. + * @returns {number} Token count. + */ +function counterWrapperOpenAI(text) { + const message = { role: 'system', content: text }; + return countTokensOpenAI(message, true); +} + +export function getTokenizerModel() { + // OpenAI models always provide their own tokenizer + if (oai_settings.chat_completion_source == chat_completion_sources.OPENAI) { + return oai_settings.openai_model; + } + + const turboTokenizer = 'gpt-3.5-turbo'; + const gpt4Tokenizer = 'gpt-4'; + const gpt2Tokenizer = 'gpt2'; + const claudeTokenizer = 'claude'; + + // Assuming no one would use it for different models.. right? + if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { + return gpt4Tokenizer; + } + + // Select correct tokenizer for WindowAI proxies + if (oai_settings.chat_completion_source == chat_completion_sources.WINDOWAI && oai_settings.windowai_model) { + if (oai_settings.windowai_model.includes('gpt-4')) { + return gpt4Tokenizer; + } + else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) { + return turboTokenizer; + } + else if (oai_settings.windowai_model.includes('claude')) { + return claudeTokenizer; + } + else if (oai_settings.windowai_model.includes('GPT-NeoXT')) { + return gpt2Tokenizer; + } + } + + // And for OpenRouter (if not a site model, then it's impossible to determine the tokenizer) + if (oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER && oai_settings.openrouter_model) { + if (oai_settings.openrouter_model.includes('gpt-4')) { + return gpt4Tokenizer; + } + else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) { + return turboTokenizer; + } + else if (oai_settings.openrouter_model.includes('claude')) { + return claudeTokenizer; + } + else if (oai_settings.openrouter_model.includes('GPT-NeoXT')) { + return gpt2Tokenizer; + } + } + + if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) { + return claudeTokenizer; + } + + // Default to Turbo 3.5 + return turboTokenizer; +} + +/** + * @param {any[] | Object} messages + */ +export function countTokensOpenAI(messages, full = false) { + const shouldTokenizeAI21 = oai_settings.chat_completion_source === chat_completion_sources.AI21 && oai_settings.use_ai21_tokenizer; + const cacheObject = getTokenCacheObject(); + + if (!Array.isArray(messages)) { + messages = [messages]; + } + + let token_count = -1; + + for (const message of messages) { + const model = getTokenizerModel(); + + if (model === 'claude' || shouldTokenizeAI21) { + full = true; + } + + const hash = getStringHash(JSON.stringify(message)); + const cacheKey = `${model}-${hash}`; + const cachedCount = cacheObject[cacheKey]; + + if (typeof cachedCount === 'number') { + token_count += cachedCount; + } + + else { + jQuery.ajax({ + async: false, + type: 'POST', // + url: shouldTokenizeAI21 ? '/tokenize_ai21' : `/tokenize_openai?model=${model}`, + data: JSON.stringify([message]), + dataType: "json", + contentType: "application/json", + success: function (data) { + token_count += Number(data.token_count); + cacheObject[cacheKey] = Number(data.token_count); + } + }); + } + } + + if (!full) token_count -= 2; + + return token_count; +} + +/** + * Gets the token cache object for the current chat. + * @returns {Object} Token cache object for the current chat. + */ +function getTokenCacheObject() { + let chatId = 'undefined'; + + try { + if (selected_group) { + chatId = groups.find(x => x.id == selected_group)?.chat_id; + } + else if (this_chid !== undefined) { + chatId = characters[this_chid].chat; + } + } catch { + console.log('No character / group selected. Using default cache item'); + } + + if (typeof tokenCache[chatId] !== 'object') { + tokenCache[chatId] = {}; + } + + return tokenCache[String(chatId)]; +} + +function countTokensRemote(endpoint, str, padding) { + let tokenCount = 0; + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify({ text: str }), + dataType: "json", + contentType: "application/json", + success: function (data) { + tokenCount = data.count; + } + }); + return tokenCount + padding; +} + +function getTextTokensRemote(endpoint, str) { + let ids = []; + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify({ text: str }), + dataType: "json", + contentType: "application/json", + success: function (data) { + ids = data.ids; + } + }); + return ids; +} + +export function getTextTokens(tokenizerType, str) { + switch (tokenizerType) { + case tokenizers.LLAMA: + return getTextTokensRemote('/tokenize_llama', str); + case tokenizers.NERD: + return getTextTokensRemote('/tokenize_nerdstash', str); + case tokenizers.NERD2: + return getTextTokensRemote('/tokenize_nerdstash_v2', str); + default: + console.warn("Calling getTextTokens with unsupported tokenizer type", tokenizerType); + return []; + } +} + +jQuery(async () => { + await loadTokenCache(); +}); diff --git a/public/scripts/world-info.js b/public/scripts/world-info.js index 630dc735b..9674914f6 100644 --- a/public/scripts/world-info.js +++ b/public/scripts/world-info.js @@ -1,10 +1,11 @@ -import { saveSettings, callPopup, substituteParams, getTokenCount, getRequestHeaders, chat_metadata, this_chid, characters, saveCharacterDebounced, menu_type, eventSource, event_types } from "../script.js"; +import { saveSettings, callPopup, substituteParams, getRequestHeaders, chat_metadata, this_chid, characters, saveCharacterDebounced, menu_type, eventSource, event_types } from "../script.js"; import { download, debounce, initScrollHeight, resetScrollHeight, parseJsonFile, extractDataFromPng, getFileBuffer, getCharaFilename, deepClone, getSortableDelay, escapeRegex, PAGINATION_TEMPLATE, navigation_option } from "./utils.js"; import { getContext } from "./extensions.js"; import { NOTE_MODULE_NAME, metadata_keys, shouldWIAddPrompt } from "./authors-note.js"; import { registerSlashCommand } from "./slash-commands.js"; import { deviceInfo } from "./RossAscends-mods.js"; import { FILTER_TYPES, FilterHelper } from "./filters.js"; +import { getTokenCount } from "./tokenizers.js"; export { world_info,