mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add text chunks display to token counter
This commit is contained in:
@ -22,11 +22,15 @@ async function doTokenCounter() {
|
||||
<div class="justifyLeft">
|
||||
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
|
||||
<p>Selected tokenizer: ${selectedTokenizer}</p>
|
||||
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="15"></textarea>
|
||||
<div>Input:</div>
|
||||
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="10"></textarea>
|
||||
<div>Tokens: <span id="token_counter_result">0</span></div>
|
||||
<br>
|
||||
<div>Token IDs (if applicable):</div>
|
||||
<textarea id="token_counter_ids" disabled rows="10"></textarea>
|
||||
<div>Tokenized text:</div>
|
||||
<div id="tokenized_chunks_display" class="wide100p">—</div>
|
||||
<br>
|
||||
<div>Token IDs:</div>
|
||||
<textarea id="token_counter_ids" disabled rows="10">—</textarea>
|
||||
</div>
|
||||
</div>`;
|
||||
|
||||
@ -36,13 +40,18 @@ async function doTokenCounter() {
|
||||
const ids = main_api == 'openai' ? getTextTokens(tokenizers.OPENAI, text) : getTextTokens(tokenizerId, text);
|
||||
|
||||
if (Array.isArray(ids) && ids.length > 0) {
|
||||
$('#token_counter_ids').text(JSON.stringify(ids));
|
||||
$('#token_counter_ids').text(`[${ids.join(', ')}]`);
|
||||
$('#token_counter_result').text(ids.length);
|
||||
|
||||
if (Object.hasOwnProperty.call(ids, 'chunks')) {
|
||||
drawChunks(Object.getOwnPropertyDescriptor(ids, 'chunks').value, ids);
|
||||
}
|
||||
} else {
|
||||
const context = getContext();
|
||||
const count = context.getTokenCount(text);
|
||||
$('#token_counter_ids').text('—');
|
||||
$('#token_counter_result').text(count);
|
||||
$('#tokenized_chunks_display').text('—');
|
||||
}
|
||||
});
|
||||
|
||||
@ -50,6 +59,44 @@ async function doTokenCounter() {
|
||||
callPopup(dialog, 'text', '', { wide: true, large: true });
|
||||
}
|
||||
|
||||
/**
|
||||
* Draws the tokenized chunks in the UI
|
||||
* @param {string[]} chunks
|
||||
* @param {number[]} ids
|
||||
*/
|
||||
function drawChunks(chunks, ids) {
|
||||
const pastelRainbow = [
|
||||
'#FFB3BA',
|
||||
'#FFDFBA',
|
||||
'#FFFFBA',
|
||||
'#BFFFBF',
|
||||
'#BAE1FF',
|
||||
'#FFBAF3',
|
||||
];
|
||||
$('#tokenized_chunks_display').empty();
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
let chunk = chunks[i].replace(/▁/g, ' '); // This is a leading space in sentencepiece. More info: Lower one eighth block (U+2581)
|
||||
|
||||
// If <0xHEX>, decode it
|
||||
if (/^<0x[0-9A-F]+>$/i.test(chunk)) {
|
||||
const code = parseInt(chunk.substring(3, chunk.length - 1), 16);
|
||||
chunk = String.fromCodePoint(code);
|
||||
}
|
||||
|
||||
// If newline - insert a line break
|
||||
if (chunk === '\n') {
|
||||
$('#tokenized_chunks_display').append('<br>');
|
||||
continue;
|
||||
}
|
||||
|
||||
const color = pastelRainbow[i % pastelRainbow.length];
|
||||
const chunkHtml = $(`<code style="background-color: ${color};">${chunk}</code>`);
|
||||
chunkHtml.attr('title', ids[i]);
|
||||
$('#tokenized_chunks_display').append(chunkHtml);
|
||||
}
|
||||
}
|
||||
|
||||
function doCount() {
|
||||
// get all of the messages in the chat
|
||||
const context = getContext();
|
||||
|
Reference in New Issue
Block a user