Resolve best match tokenizer for itemization. Adjust styles of token counter

This commit is contained in:
Cohee
2023-11-06 20:25:59 +02:00
parent 1a3f100018
commit 57e845d0d7
4 changed files with 60 additions and 27 deletions

View File

@@ -183,7 +183,7 @@ import {
formatInstructModeSystemPrompt, formatInstructModeSystemPrompt,
} from "./scripts/instruct-mode.js"; } from "./scripts/instruct-mode.js";
import { applyLocale } from "./scripts/i18n.js"; import { applyLocale } from "./scripts/i18n.js";
import { getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js"; import { getFriendlyTokenizerName, getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
import { initPersonas, selectCurrentPersona, setPersonaDescription } from "./scripts/personas.js"; import { initPersonas, selectCurrentPersona, setPersonaDescription } from "./scripts/personas.js";
import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js"; import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
import { hideLoader, showLoader } from "./scripts/loader.js"; import { hideLoader, showLoader } from "./scripts/loader.js";
@@ -3935,10 +3935,9 @@ function promptItemize(itemizedPrompts, requestedMesId) {
var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2); var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2);
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2); var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2);
var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2); var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2);
var selectedTokenizer = getTokenizerModel(); var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
var oaiSystemTokens = oaiImpersonateTokens + oaiJailbreakTokens + oaiNudgeTokens + oaiStartTokens + oaiNsfwTokens + oaiMainTokens; var oaiSystemTokens = oaiImpersonateTokens + oaiJailbreakTokens + oaiNudgeTokens + oaiStartTokens + oaiNsfwTokens + oaiMainTokens;
var oaiSystemTokensPercentage = ((oaiSystemTokens / (finalPromptTokens)) * 100).toFixed(2); var oaiSystemTokensPercentage = ((oaiSystemTokens / (finalPromptTokens)) * 100).toFixed(2);
} else { } else {
//console.log('-- applying % on non-OAI tokens'); //console.log('-- applying % on non-OAI tokens');
var storyStringTokensPercentage = ((storyStringTokens / (totalTokensInPrompt)) * 100).toFixed(2); var storyStringTokensPercentage = ((storyStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
@@ -3946,7 +3945,7 @@ function promptItemize(itemizedPrompts, requestedMesId) {
var promptBiasTokensPercentage = ((promptBiasTokens / (totalTokensInPrompt)) * 100).toFixed(2); var promptBiasTokensPercentage = ((promptBiasTokens / (totalTokensInPrompt)) * 100).toFixed(2);
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (totalTokensInPrompt)) * 100).toFixed(2); var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
var allAnchorsTokensPercentage = ((allAnchorsTokens / (totalTokensInPrompt)) * 100).toFixed(2); var allAnchorsTokensPercentage = ((allAnchorsTokens / (totalTokensInPrompt)) * 100).toFixed(2);
var selectedTokenizer = $("#tokenizer").find(':selected').text(); var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
} }
const params = { const params = {

View File

@@ -1,7 +1,8 @@
import { callPopup, main_api } from "../../../script.js"; import { callPopup, main_api } from "../../../script.js";
import { getContext } from "../../extensions.js"; import { getContext } from "../../extensions.js";
import { registerSlashCommand } from "../../slash-commands.js"; import { registerSlashCommand } from "../../slash-commands.js";
import { getTextTokens, getTokenCount, getTokenizerBestMatch, getTokenizerModel, tokenizers } from "../../tokenizers.js"; import { getFriendlyTokenizerName, getTextTokens, getTokenCount, tokenizers } from "../../tokenizers.js";
import { resetScrollHeight } from "../../utils.js";
function rgb2hex(rgb) { function rgb2hex(rgb) {
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i); rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
@@ -17,33 +18,22 @@ $('button').click(function () {
}); });
async function doTokenCounter() { async function doTokenCounter() {
const tokenizerOption = $("#tokenizer").find(':selected'); const { tokenizerName, tokenizerId } = getFriendlyTokenizerName(main_api);
let tokenizerId = Number(tokenizerOption.val());
let tokenizerName = tokenizerOption.text();
if (main_api !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
tokenizerId = getTokenizerBestMatch();
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
}
const selectedTokenizer = main_api == 'openai'
? getTokenizerModel()
: tokenizerName;
const html = ` const html = `
<div class="wide100p"> <div class="wide100p">
<h3>Token Counter</h3> <h3>Token Counter</h3>
<div class="justifyLeft"> <div class="justifyLeft flex-container flexFlowColumn">
<h4>Type / paste in the box below to see the number of tokens in the text.</h4> <h4>Type / paste in the box below to see the number of tokens in the text.</h4>
<p>Selected tokenizer: ${selectedTokenizer}</p> <p>Selected tokenizer: ${tokenizerName}</p>
<div>Input:</div> <div>Input:</div>
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="10"></textarea> <textarea id="token_counter_textarea" class="wide100p textarea_compact" rows="1"></textarea>
<div>Tokens: <span id="token_counter_result">0</span></div> <div>Tokens: <span id="token_counter_result">0</span></div>
<br> <hr>
<div>Tokenized text:</div> <div>Tokenized text:</div>
<div id="tokenized_chunks_display" class="wide100p">—</div> <div id="tokenized_chunks_display" class="wide100p">—</div>
<br> <hr>
<div>Token IDs:</div> <div>Token IDs:</div>
<textarea id="token_counter_ids" disabled rows="10">—</textarea> <textarea id="token_counter_ids" class="wide100p textarea_compact" disabled rows="1">—</textarea>
</div> </div>
</div>`; </div>`;
@@ -66,6 +56,9 @@ async function doTokenCounter() {
$('#token_counter_result').text(count); $('#token_counter_result').text(count);
$('#tokenized_chunks_display').text('—'); $('#tokenized_chunks_display').text('—');
} }
resetScrollHeight($('#token_counter_textarea'));
resetScrollHeight($('#token_counter_ids'));
}); });
$('#dialogue_popup').addClass('wide_dialogue_popup'); $('#dialogue_popup').addClass('wide_dialogue_popup');

View File

@@ -1,4 +1,6 @@
#tokenized_chunks_display > code { #tokenized_chunks_display > code {
color: black; color: black;
text-shadow: none; text-shadow: none;
padding: 2px;
display: inline-block;
} }

View File

@@ -63,8 +63,47 @@ async function resetTokenCache() {
} }
} }
export function getTokenizerBestMatch() { /**
if (main_api === 'novel') { * Gets the friendly name of the current tokenizer.
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
* @returns { { tokenizerName: string, tokenizerId: number } } Tokenizer info
*/
export function getFriendlyTokenizerName(forApi) {
if (!forApi) {
forApi = main_api;
}
const tokenizerOption = $("#tokenizer").find(':selected');
let tokenizerId = Number(tokenizerOption.val());
let tokenizerName = tokenizerOption.text();
if (forApi !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
tokenizerId = getTokenizerBestMatch(forApi);
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
}
tokenizerName = forApi == 'openai'
? getTokenizerModel()
: tokenizerName;
tokenizerId = forApi == 'openai'
? tokenizers.OPENAI
: tokenizerId;
return { tokenizerName, tokenizerId };
}
/**
* Gets the best tokenizer for the current API.
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
* @returns {number} Tokenizer type.
*/
export function getTokenizerBestMatch(forApi) {
if (!forApi) {
forApi = main_api;
}
if (forApi === 'novel') {
if (nai_settings.model_novel.includes('clio')) { if (nai_settings.model_novel.includes('clio')) {
return tokenizers.NERD; return tokenizers.NERD;
} }
@@ -72,7 +111,7 @@ export function getTokenizerBestMatch() {
return tokenizers.NERD2; return tokenizers.NERD2;
} }
} }
if (main_api === 'kobold' || main_api === 'textgenerationwebui' || main_api === 'koboldhorde') { if (forApi === 'kobold' || forApi === 'textgenerationwebui' || forApi === 'koboldhorde') {
// Try to use the API tokenizer if possible: // Try to use the API tokenizer if possible:
// - API must be connected // - API must be connected
// - Kobold must pass a version check // - Kobold must pass a version check
@@ -140,7 +179,7 @@ export function getTokenCount(str, padding = undefined) {
} }
if (tokenizerType === tokenizers.BEST_MATCH) { if (tokenizerType === tokenizers.BEST_MATCH) {
tokenizerType = getTokenizerBestMatch(); tokenizerType = getTokenizerBestMatch(main_api);
} }
if (padding === undefined) { if (padding === undefined) {