SillyTavern/public/scripts/extensions/token-counter/index.js

136 lines
5.0 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { callPopup, main_api } from '../../../script.js';
import { getContext } from '../../extensions.js';
import { registerSlashCommand } from '../../slash-commands.js';
import { getFriendlyTokenizerName, getTextTokens, getTokenCountAsync, tokenizers } from '../../tokenizers.js';
import { resetScrollHeight, debounce } from '../../utils.js';
function rgb2hex(rgb) {
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
return (rgb && rgb.length === 4) ? '#' +
('0' + parseInt(rgb[1], 10).toString(16)).slice(-2) +
('0' + parseInt(rgb[2], 10).toString(16)).slice(-2) +
('0' + parseInt(rgb[3], 10).toString(16)).slice(-2) : '';
}
$('button').click(function () {
var hex = rgb2hex($('input').val());
$('.result').html(hex);
});
async function doTokenCounter() {
const { tokenizerName, tokenizerId } = getFriendlyTokenizerName(main_api);
const html = `
<div class="wide100p">
<h3>Token Counter</h3>
<div class="justifyLeft flex-container flexFlowColumn">
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
<p>Selected tokenizer: ${tokenizerName}</p>
<div>Input:</div>
<textarea id="token_counter_textarea" class="wide100p textarea_compact" rows="1"></textarea>
<div>Tokens: <span id="token_counter_result">0</span></div>
<hr>
<div>Tokenized text:</div>
<div id="tokenized_chunks_display" class="wide100p">—</div>
<hr>
<div>Token IDs:</div>
<textarea id="token_counter_ids" class="wide100p textarea_compact" readonly rows="1">—</textarea>
</div>
</div>`;
const dialog = $(html);
const countDebounced = debounce(async () => {
const text = String($('#token_counter_textarea').val());
const ids = main_api == 'openai' ? getTextTokens(tokenizers.OPENAI, text) : getTextTokens(tokenizerId, text);
if (Array.isArray(ids) && ids.length > 0) {
$('#token_counter_ids').text(`[${ids.join(', ')}]`);
$('#token_counter_result').text(ids.length);
if (Object.hasOwnProperty.call(ids, 'chunks')) {
drawChunks(Object.getOwnPropertyDescriptor(ids, 'chunks').value, ids);
}
} else {
const count = await getTokenCountAsync(text);
$('#token_counter_ids').text('—');
$('#token_counter_result').text(count);
$('#tokenized_chunks_display').text('—');
}
resetScrollHeight($('#token_counter_textarea'));
resetScrollHeight($('#token_counter_ids'));
}, 1000);
dialog.find('#token_counter_textarea').on('input', () => countDebounced());
$('#dialogue_popup').addClass('wide_dialogue_popup');
callPopup(dialog, 'text', '', { wide: true, large: true });
}
/**
* Draws the tokenized chunks in the UI
* @param {string[]} chunks
* @param {number[]} ids
*/
function drawChunks(chunks, ids) {
const pastelRainbow = [
//main_text_color,
//italics_text_color,
//quote_text_color,
'#FFB3BA',
'#FFDFBA',
'#FFFFBA',
'#BFFFBF',
'#BAE1FF',
'#FFBAF3',
];
$('#tokenized_chunks_display').empty();
for (let i = 0; i < chunks.length; i++) {
let chunk = chunks[i].replace(/▁/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
// If <0xHEX>, decode it
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
const code = parseInt(chunk.substring(3, chunk.length - 1), 16);
chunk = String.fromCodePoint(code);
}
// If newline - insert a line break
if (chunk === '\n') {
$('#tokenized_chunks_display').append('<br>');
continue;
}
const color = pastelRainbow[i % pastelRainbow.length];
const chunkHtml = $('<code></code>');
chunkHtml.css('background-color', color);
chunkHtml.text(chunk);
chunkHtml.attr('title', ids[i]);
$('#tokenized_chunks_display').append(chunkHtml);
}
}
async function doCount() {
// get all of the messages in the chat
const context = getContext();
const messages = context.chat.filter(x => x.mes && !x.is_system).map(x => x.mes);
//concat all the messages into a single string
const allMessages = messages.join(' ');
console.debug('All messages:', allMessages);
//toastr success with the token count of the chat
const count = await getTokenCountAsync(allMessages);
toastr.success(`Token count: ${count}`);
}
jQuery(() => {
const buttonHtml = `
<div id="token_counter" class="list-group-item flex-container flexGap5">
<div class="fa-solid fa-1 extensionsMenuExtensionButton" /></div>
Token Counter
</div>`;
$('#extensionsMenu').prepend(buttonHtml);
$('#token_counter').on('click', doTokenCounter);
registerSlashCommand('count', doCount, [], ' counts the number of tokens in the current chat', true, false);
});