2023-07-20 19:32:15 +02:00
|
|
|
|
import { callPopup, main_api } from "../../../script.js";
|
|
|
|
|
import { getContext } from "../../extensions.js";
|
2023-10-21 13:23:56 +02:00
|
|
|
|
import { registerSlashCommand } from "../../slash-commands.js";
|
2023-11-05 21:45:37 +01:00
|
|
|
|
import { getTextTokens, getTokenCount, getTokenizerBestMatch, getTokenizerModel, tokenizers } from "../../tokenizers.js";
|
2023-07-20 19:32:15 +02:00
|
|
|
|
|
|
|
|
|
async function doTokenCounter() {
|
2023-11-05 21:45:37 +01:00
|
|
|
|
const tokenizerOption = $("#tokenizer").find(':selected');
|
|
|
|
|
let tokenizerId = Number(tokenizerOption.val());
|
|
|
|
|
let tokenizerName = tokenizerOption.text();
|
|
|
|
|
|
|
|
|
|
if (main_api !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
|
|
|
|
|
tokenizerId = getTokenizerBestMatch();
|
|
|
|
|
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-20 19:32:15 +02:00
|
|
|
|
const selectedTokenizer = main_api == 'openai'
|
2023-11-05 21:45:37 +01:00
|
|
|
|
? getTokenizerModel()
|
|
|
|
|
: tokenizerName;
|
2023-07-20 19:32:15 +02:00
|
|
|
|
const html = `
|
|
|
|
|
<div class="wide100p">
|
|
|
|
|
<h3>Token Counter</h3>
|
|
|
|
|
<div class="justifyLeft">
|
|
|
|
|
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
|
|
|
|
|
<p>Selected tokenizer: ${selectedTokenizer}</p>
|
2023-11-06 01:42:51 +01:00
|
|
|
|
<div>Input:</div>
|
|
|
|
|
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="10"></textarea>
|
2023-07-20 19:32:15 +02:00
|
|
|
|
<div>Tokens: <span id="token_counter_result">0</span></div>
|
2023-11-05 21:45:37 +01:00
|
|
|
|
<br>
|
2023-11-06 01:42:51 +01:00
|
|
|
|
<div>Tokenized text:</div>
|
|
|
|
|
<div id="tokenized_chunks_display" class="wide100p">—</div>
|
|
|
|
|
<br>
|
|
|
|
|
<div>Token IDs:</div>
|
|
|
|
|
<textarea id="token_counter_ids" disabled rows="10">—</textarea>
|
2023-07-20 19:32:15 +02:00
|
|
|
|
</div>
|
|
|
|
|
</div>`;
|
|
|
|
|
|
|
|
|
|
const dialog = $(html);
|
|
|
|
|
dialog.find('#token_counter_textarea').on('input', () => {
|
2023-11-05 21:45:37 +01:00
|
|
|
|
const text = String($('#token_counter_textarea').val());
|
|
|
|
|
const ids = main_api == 'openai' ? getTextTokens(tokenizers.OPENAI, text) : getTextTokens(tokenizerId, text);
|
|
|
|
|
|
|
|
|
|
if (Array.isArray(ids) && ids.length > 0) {
|
2023-11-06 01:42:51 +01:00
|
|
|
|
$('#token_counter_ids').text(`[${ids.join(', ')}]`);
|
2023-11-05 21:45:37 +01:00
|
|
|
|
$('#token_counter_result').text(ids.length);
|
2023-11-06 01:42:51 +01:00
|
|
|
|
|
|
|
|
|
if (Object.hasOwnProperty.call(ids, 'chunks')) {
|
|
|
|
|
drawChunks(Object.getOwnPropertyDescriptor(ids, 'chunks').value, ids);
|
|
|
|
|
}
|
2023-11-05 21:45:37 +01:00
|
|
|
|
} else {
|
|
|
|
|
const context = getContext();
|
|
|
|
|
const count = context.getTokenCount(text);
|
|
|
|
|
$('#token_counter_ids').text('—');
|
|
|
|
|
$('#token_counter_result').text(count);
|
2023-11-06 01:42:51 +01:00
|
|
|
|
$('#tokenized_chunks_display').text('—');
|
2023-11-05 21:45:37 +01:00
|
|
|
|
}
|
2023-07-20 19:32:15 +02:00
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
$('#dialogue_popup').addClass('wide_dialogue_popup');
|
2023-11-05 21:55:10 +01:00
|
|
|
|
callPopup(dialog, 'text', '', { wide: true, large: true });
|
2023-07-20 19:32:15 +02:00
|
|
|
|
}
|
|
|
|
|
|
2023-11-06 01:42:51 +01:00
|
|
|
|
/**
|
|
|
|
|
* Draws the tokenized chunks in the UI
|
|
|
|
|
* @param {string[]} chunks
|
|
|
|
|
* @param {number[]} ids
|
|
|
|
|
*/
|
|
|
|
|
function drawChunks(chunks, ids) {
|
|
|
|
|
const pastelRainbow = [
|
|
|
|
|
'#FFB3BA',
|
|
|
|
|
'#FFDFBA',
|
|
|
|
|
'#FFFFBA',
|
|
|
|
|
'#BFFFBF',
|
|
|
|
|
'#BAE1FF',
|
|
|
|
|
'#FFBAF3',
|
|
|
|
|
];
|
|
|
|
|
$('#tokenized_chunks_display').empty();
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
|
|
|
let chunk = chunks[i].replace(/▁/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
|
|
|
|
|
|
|
|
|
|
// If <0xHEX>, decode it
|
|
|
|
|
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
|
|
|
|
|
const code = parseInt(chunk.substring(3, chunk.length - 1), 16);
|
|
|
|
|
chunk = String.fromCodePoint(code);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If newline - insert a line break
|
|
|
|
|
if (chunk === '\n') {
|
|
|
|
|
$('#tokenized_chunks_display').append('<br>');
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const color = pastelRainbow[i % pastelRainbow.length];
|
|
|
|
|
const chunkHtml = $(`<code style="background-color: ${color};">${chunk}</code>`);
|
|
|
|
|
chunkHtml.attr('title', ids[i]);
|
|
|
|
|
$('#tokenized_chunks_display').append(chunkHtml);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-21 13:23:56 +02:00
|
|
|
|
function doCount() {
|
|
|
|
|
// get all of the messages in the chat
|
|
|
|
|
const context = getContext();
|
|
|
|
|
const messages = context.chat.filter(x => x.mes && !x.is_system).map(x => x.mes);
|
|
|
|
|
|
|
|
|
|
//concat all the messages into a single string
|
|
|
|
|
const allMessages = messages.join(' ');
|
|
|
|
|
|
|
|
|
|
console.debug('All messages:', allMessages);
|
|
|
|
|
|
|
|
|
|
//toastr success with the token count of the chat
|
|
|
|
|
toastr.success(`Token count: ${getTokenCount(allMessages)}`);
|
|
|
|
|
}
|
|
|
|
|
|
2023-07-20 19:32:15 +02:00
|
|
|
|
jQuery(() => {
|
|
|
|
|
const buttonHtml = `
|
|
|
|
|
<div id="token_counter" class="list-group-item flex-container flexGap5">
|
|
|
|
|
<div class="fa-solid fa-1 extensionsMenuExtensionButton" /></div>
|
|
|
|
|
Token Counter
|
|
|
|
|
</div>`;
|
|
|
|
|
$('#extensionsMenu').prepend(buttonHtml);
|
|
|
|
|
$('#token_counter').on('click', doTokenCounter);
|
2023-10-21 13:23:56 +02:00
|
|
|
|
registerSlashCommand('count', doCount, [], '– counts the number of tokens in the current chat', true, false);
|
2023-07-20 19:32:15 +02:00
|
|
|
|
});
|