import { callPopup, main_api } from "../../../script.js"; import { getContext } from "../../extensions.js"; import { registerSlashCommand } from "../../slash-commands.js"; import { getTextTokens, getTokenCount, getTokenizerBestMatch, getTokenizerModel, tokenizers } from "../../tokenizers.js"; async function doTokenCounter() { const tokenizerOption = $("#tokenizer").find(':selected'); let tokenizerId = Number(tokenizerOption.val()); let tokenizerName = tokenizerOption.text(); if (main_api !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) { tokenizerId = getTokenizerBestMatch(); tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text(); } const selectedTokenizer = main_api == 'openai' ? getTokenizerModel() : tokenizerName; const html = `

Token Counter

Type / paste in the box below to see the number of tokens in the text.

Selected tokenizer: ${selectedTokenizer}

Input:
Tokens: 0

Tokenized text:

Token IDs:
`; const dialog = $(html); dialog.find('#token_counter_textarea').on('input', () => { const text = String($('#token_counter_textarea').val()); const ids = main_api == 'openai' ? getTextTokens(tokenizers.OPENAI, text) : getTextTokens(tokenizerId, text); if (Array.isArray(ids) && ids.length > 0) { $('#token_counter_ids').text(`[${ids.join(', ')}]`); $('#token_counter_result').text(ids.length); if (Object.hasOwnProperty.call(ids, 'chunks')) { drawChunks(Object.getOwnPropertyDescriptor(ids, 'chunks').value, ids); } } else { const context = getContext(); const count = context.getTokenCount(text); $('#token_counter_ids').text('—'); $('#token_counter_result').text(count); $('#tokenized_chunks_display').text('—'); } }); $('#dialogue_popup').addClass('wide_dialogue_popup'); callPopup(dialog, 'text', '', { wide: true, large: true }); } /** * Draws the tokenized chunks in the UI * @param {string[]} chunks * @param {number[]} ids */ function drawChunks(chunks, ids) { const pastelRainbow = [ '#FFB3BA', '#FFDFBA', '#FFFFBA', '#BFFFBF', '#BAE1FF', '#FFBAF3', ]; $('#tokenized_chunks_display').empty(); for (let i = 0; i < chunks.length; i++) { let chunk = chunks[i].replace(/▁/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581) // If <0xHEX>, decode it if (/^<0x[0-9A-F]+>$/i.test(chunk)) { const code = parseInt(chunk.substring(3, chunk.length - 1), 16); chunk = String.fromCodePoint(code); } // If newline - insert a line break if (chunk === '\n') { $('#tokenized_chunks_display').append('
'); continue; } const color = pastelRainbow[i % pastelRainbow.length]; const chunkHtml = $(`${chunk}`); chunkHtml.attr('title', ids[i]); $('#tokenized_chunks_display').append(chunkHtml); } } function doCount() { // get all of the messages in the chat const context = getContext(); const messages = context.chat.filter(x => x.mes && !x.is_system).map(x => x.mes); //concat all the messages into a single string const allMessages = messages.join(' '); console.debug('All messages:', allMessages); //toastr success with the token count of the chat toastr.success(`Token count: ${getTokenCount(allMessages)}`); } jQuery(() => { const buttonHtml = `
Token Counter
`; $('#extensionsMenu').prepend(buttonHtml); $('#token_counter').on('click', doTokenCounter); registerSlashCommand('count', doCount, [], '– counts the number of tokens in the current chat', true, false); });