Resolve best match tokenizer for itemization. Adjust styles of token counter

This commit is contained in:
Cohee 2023-11-06 20:25:59 +02:00
parent 1a3f100018
commit 57e845d0d7
4 changed files with 60 additions and 27 deletions

View File

@ -183,7 +183,7 @@ import {
formatInstructModeSystemPrompt,
} from "./scripts/instruct-mode.js";
import { applyLocale } from "./scripts/i18n.js";
import { getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
import { getFriendlyTokenizerName, getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
import { initPersonas, selectCurrentPersona, setPersonaDescription } from "./scripts/personas.js";
import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
import { hideLoader, showLoader } from "./scripts/loader.js";
@ -3935,10 +3935,9 @@ function promptItemize(itemizedPrompts, requestedMesId) {
var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2);
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2);
var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2);
var selectedTokenizer = getTokenizerModel();
var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
var oaiSystemTokens = oaiImpersonateTokens + oaiJailbreakTokens + oaiNudgeTokens + oaiStartTokens + oaiNsfwTokens + oaiMainTokens;
var oaiSystemTokensPercentage = ((oaiSystemTokens / (finalPromptTokens)) * 100).toFixed(2);
} else {
//console.log('-- applying % on non-OAI tokens');
var storyStringTokensPercentage = ((storyStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
@ -3946,7 +3945,7 @@ function promptItemize(itemizedPrompts, requestedMesId) {
var promptBiasTokensPercentage = ((promptBiasTokens / (totalTokensInPrompt)) * 100).toFixed(2);
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
var allAnchorsTokensPercentage = ((allAnchorsTokens / (totalTokensInPrompt)) * 100).toFixed(2);
var selectedTokenizer = $("#tokenizer").find(':selected').text();
var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
}
const params = {

View File

@ -1,7 +1,8 @@
import { callPopup, main_api } from "../../../script.js";
import { getContext } from "../../extensions.js";
import { registerSlashCommand } from "../../slash-commands.js";
import { getTextTokens, getTokenCount, getTokenizerBestMatch, getTokenizerModel, tokenizers } from "../../tokenizers.js";
import { getFriendlyTokenizerName, getTextTokens, getTokenCount, tokenizers } from "../../tokenizers.js";
import { resetScrollHeight } from "../../utils.js";
function rgb2hex(rgb) {
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
@ -17,33 +18,22 @@ $('button').click(function () {
});
async function doTokenCounter() {
const tokenizerOption = $("#tokenizer").find(':selected');
let tokenizerId = Number(tokenizerOption.val());
let tokenizerName = tokenizerOption.text();
if (main_api !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
tokenizerId = getTokenizerBestMatch();
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
}
const selectedTokenizer = main_api == 'openai'
? getTokenizerModel()
: tokenizerName;
const { tokenizerName, tokenizerId } = getFriendlyTokenizerName(main_api);
const html = `
<div class="wide100p">
<h3>Token Counter</h3>
<div class="justifyLeft">
<div class="justifyLeft flex-container flexFlowColumn">
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
<p>Selected tokenizer: ${selectedTokenizer}</p>
<p>Selected tokenizer: ${tokenizerName}</p>
<div>Input:</div>
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="10"></textarea>
<textarea id="token_counter_textarea" class="wide100p textarea_compact" rows="1"></textarea>
<div>Tokens: <span id="token_counter_result">0</span></div>
<br>
<hr>
<div>Tokenized text:</div>
<div id="tokenized_chunks_display" class="wide100p"></div>
<br>
<hr>
<div>Token IDs:</div>
<textarea id="token_counter_ids" disabled rows="10"></textarea>
<textarea id="token_counter_ids" class="wide100p textarea_compact" disabled rows="1"></textarea>
</div>
</div>`;
@ -66,6 +56,9 @@ async function doTokenCounter() {
$('#token_counter_result').text(count);
$('#tokenized_chunks_display').text('—');
}
resetScrollHeight($('#token_counter_textarea'));
resetScrollHeight($('#token_counter_ids'));
});
$('#dialogue_popup').addClass('wide_dialogue_popup');

View File

@ -1,4 +1,6 @@
#tokenized_chunks_display > code {
color: black;
text-shadow: none;
padding: 2px;
display: inline-block;
}

View File

@ -63,8 +63,47 @@ async function resetTokenCache() {
}
}
export function getTokenizerBestMatch() {
if (main_api === 'novel') {
/**
* Gets the friendly name of the current tokenizer.
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
* @returns { { tokenizerName: string, tokenizerId: number } } Tokenizer info
*/
export function getFriendlyTokenizerName(forApi) {
if (!forApi) {
forApi = main_api;
}
const tokenizerOption = $("#tokenizer").find(':selected');
let tokenizerId = Number(tokenizerOption.val());
let tokenizerName = tokenizerOption.text();
if (forApi !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
tokenizerId = getTokenizerBestMatch(forApi);
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
}
tokenizerName = forApi == 'openai'
? getTokenizerModel()
: tokenizerName;
tokenizerId = forApi == 'openai'
? tokenizers.OPENAI
: tokenizerId;
return { tokenizerName, tokenizerId };
}
/**
* Gets the best tokenizer for the current API.
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
* @returns {number} Tokenizer type.
*/
export function getTokenizerBestMatch(forApi) {
if (!forApi) {
forApi = main_api;
}
if (forApi === 'novel') {
if (nai_settings.model_novel.includes('clio')) {
return tokenizers.NERD;
}
@ -72,7 +111,7 @@ export function getTokenizerBestMatch() {
return tokenizers.NERD2;
}
}
if (main_api === 'kobold' || main_api === 'textgenerationwebui' || main_api === 'koboldhorde') {
if (forApi === 'kobold' || forApi === 'textgenerationwebui' || forApi === 'koboldhorde') {
// Try to use the API tokenizer if possible:
// - API must be connected
// - Kobold must pass a version check
@ -140,7 +179,7 @@ export function getTokenCount(str, padding = undefined) {
}
if (tokenizerType === tokenizers.BEST_MATCH) {
tokenizerType = getTokenizerBestMatch();
tokenizerType = getTokenizerBestMatch(main_api);
}
if (padding === undefined) {