Resolve best match tokenizer for itemization. Adjust styles of token counter
This commit is contained in:
parent
1a3f100018
commit
57e845d0d7
|
@ -183,7 +183,7 @@ import {
|
|||
formatInstructModeSystemPrompt,
|
||||
} from "./scripts/instruct-mode.js";
|
||||
import { applyLocale } from "./scripts/i18n.js";
|
||||
import { getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
|
||||
import { getFriendlyTokenizerName, getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
|
||||
import { initPersonas, selectCurrentPersona, setPersonaDescription } from "./scripts/personas.js";
|
||||
import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
|
||||
import { hideLoader, showLoader } from "./scripts/loader.js";
|
||||
|
@ -3935,10 +3935,9 @@ function promptItemize(itemizedPrompts, requestedMesId) {
|
|||
var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||
var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||
var selectedTokenizer = getTokenizerModel();
|
||||
var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
|
||||
var oaiSystemTokens = oaiImpersonateTokens + oaiJailbreakTokens + oaiNudgeTokens + oaiStartTokens + oaiNsfwTokens + oaiMainTokens;
|
||||
var oaiSystemTokensPercentage = ((oaiSystemTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||
|
||||
} else {
|
||||
//console.log('-- applying % on non-OAI tokens');
|
||||
var storyStringTokensPercentage = ((storyStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||
|
@ -3946,7 +3945,7 @@ function promptItemize(itemizedPrompts, requestedMesId) {
|
|||
var promptBiasTokensPercentage = ((promptBiasTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||
var allAnchorsTokensPercentage = ((allAnchorsTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||
var selectedTokenizer = $("#tokenizer").find(':selected').text();
|
||||
var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
|
||||
}
|
||||
|
||||
const params = {
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
import { callPopup, main_api } from "../../../script.js";
|
||||
import { getContext } from "../../extensions.js";
|
||||
import { registerSlashCommand } from "../../slash-commands.js";
|
||||
import { getTextTokens, getTokenCount, getTokenizerBestMatch, getTokenizerModel, tokenizers } from "../../tokenizers.js";
|
||||
import { getFriendlyTokenizerName, getTextTokens, getTokenCount, tokenizers } from "../../tokenizers.js";
|
||||
import { resetScrollHeight } from "../../utils.js";
|
||||
|
||||
function rgb2hex(rgb) {
|
||||
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
|
||||
|
@ -17,33 +18,22 @@ $('button').click(function () {
|
|||
});
|
||||
|
||||
async function doTokenCounter() {
|
||||
const tokenizerOption = $("#tokenizer").find(':selected');
|
||||
let tokenizerId = Number(tokenizerOption.val());
|
||||
let tokenizerName = tokenizerOption.text();
|
||||
|
||||
if (main_api !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
|
||||
tokenizerId = getTokenizerBestMatch();
|
||||
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
|
||||
}
|
||||
|
||||
const selectedTokenizer = main_api == 'openai'
|
||||
? getTokenizerModel()
|
||||
: tokenizerName;
|
||||
const { tokenizerName, tokenizerId } = getFriendlyTokenizerName(main_api);
|
||||
const html = `
|
||||
<div class="wide100p">
|
||||
<h3>Token Counter</h3>
|
||||
<div class="justifyLeft">
|
||||
<div class="justifyLeft flex-container flexFlowColumn">
|
||||
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
|
||||
<p>Selected tokenizer: ${selectedTokenizer}</p>
|
||||
<p>Selected tokenizer: ${tokenizerName}</p>
|
||||
<div>Input:</div>
|
||||
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="10"></textarea>
|
||||
<textarea id="token_counter_textarea" class="wide100p textarea_compact" rows="1"></textarea>
|
||||
<div>Tokens: <span id="token_counter_result">0</span></div>
|
||||
<br>
|
||||
<hr>
|
||||
<div>Tokenized text:</div>
|
||||
<div id="tokenized_chunks_display" class="wide100p">—</div>
|
||||
<br>
|
||||
<hr>
|
||||
<div>Token IDs:</div>
|
||||
<textarea id="token_counter_ids" disabled rows="10">—</textarea>
|
||||
<textarea id="token_counter_ids" class="wide100p textarea_compact" disabled rows="1">—</textarea>
|
||||
</div>
|
||||
</div>`;
|
||||
|
||||
|
@ -66,6 +56,9 @@ async function doTokenCounter() {
|
|||
$('#token_counter_result').text(count);
|
||||
$('#tokenized_chunks_display').text('—');
|
||||
}
|
||||
|
||||
resetScrollHeight($('#token_counter_textarea'));
|
||||
resetScrollHeight($('#token_counter_ids'));
|
||||
});
|
||||
|
||||
$('#dialogue_popup').addClass('wide_dialogue_popup');
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
#tokenized_chunks_display > code {
|
||||
color: black;
|
||||
text-shadow: none;
|
||||
padding: 2px;
|
||||
display: inline-block;
|
||||
}
|
||||
|
|
|
@ -63,8 +63,47 @@ async function resetTokenCache() {
|
|||
}
|
||||
}
|
||||
|
||||
export function getTokenizerBestMatch() {
|
||||
if (main_api === 'novel') {
|
||||
/**
|
||||
* Gets the friendly name of the current tokenizer.
|
||||
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
|
||||
* @returns { { tokenizerName: string, tokenizerId: number } } Tokenizer info
|
||||
*/
|
||||
export function getFriendlyTokenizerName(forApi) {
|
||||
if (!forApi) {
|
||||
forApi = main_api;
|
||||
}
|
||||
|
||||
const tokenizerOption = $("#tokenizer").find(':selected');
|
||||
let tokenizerId = Number(tokenizerOption.val());
|
||||
let tokenizerName = tokenizerOption.text();
|
||||
|
||||
if (forApi !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
|
||||
tokenizerId = getTokenizerBestMatch(forApi);
|
||||
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
|
||||
}
|
||||
|
||||
tokenizerName = forApi == 'openai'
|
||||
? getTokenizerModel()
|
||||
: tokenizerName;
|
||||
|
||||
tokenizerId = forApi == 'openai'
|
||||
? tokenizers.OPENAI
|
||||
: tokenizerId;
|
||||
|
||||
return { tokenizerName, tokenizerId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best tokenizer for the current API.
|
||||
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
|
||||
* @returns {number} Tokenizer type.
|
||||
*/
|
||||
export function getTokenizerBestMatch(forApi) {
|
||||
if (!forApi) {
|
||||
forApi = main_api;
|
||||
}
|
||||
|
||||
if (forApi === 'novel') {
|
||||
if (nai_settings.model_novel.includes('clio')) {
|
||||
return tokenizers.NERD;
|
||||
}
|
||||
|
@ -72,7 +111,7 @@ export function getTokenizerBestMatch() {
|
|||
return tokenizers.NERD2;
|
||||
}
|
||||
}
|
||||
if (main_api === 'kobold' || main_api === 'textgenerationwebui' || main_api === 'koboldhorde') {
|
||||
if (forApi === 'kobold' || forApi === 'textgenerationwebui' || forApi === 'koboldhorde') {
|
||||
// Try to use the API tokenizer if possible:
|
||||
// - API must be connected
|
||||
// - Kobold must pass a version check
|
||||
|
@ -140,7 +179,7 @@ export function getTokenCount(str, padding = undefined) {
|
|||
}
|
||||
|
||||
if (tokenizerType === tokenizers.BEST_MATCH) {
|
||||
tokenizerType = getTokenizerBestMatch();
|
||||
tokenizerType = getTokenizerBestMatch(main_api);
|
||||
}
|
||||
|
||||
if (padding === undefined) {
|
||||
|
|
Loading…
Reference in New Issue