mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Resolve best match tokenizer for itemization. Adjust styles of token counter
This commit is contained in:
@@ -183,7 +183,7 @@ import {
|
|||||||
formatInstructModeSystemPrompt,
|
formatInstructModeSystemPrompt,
|
||||||
} from "./scripts/instruct-mode.js";
|
} from "./scripts/instruct-mode.js";
|
||||||
import { applyLocale } from "./scripts/i18n.js";
|
import { applyLocale } from "./scripts/i18n.js";
|
||||||
import { getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
|
import { getFriendlyTokenizerName, getTokenCount, getTokenizerModel, initTokenizers, saveTokenCache } from "./scripts/tokenizers.js";
|
||||||
import { initPersonas, selectCurrentPersona, setPersonaDescription } from "./scripts/personas.js";
|
import { initPersonas, selectCurrentPersona, setPersonaDescription } from "./scripts/personas.js";
|
||||||
import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
|
import { getBackgrounds, initBackgrounds } from "./scripts/backgrounds.js";
|
||||||
import { hideLoader, showLoader } from "./scripts/loader.js";
|
import { hideLoader, showLoader } from "./scripts/loader.js";
|
||||||
@@ -3935,10 +3935,9 @@ function promptItemize(itemizedPrompts, requestedMesId) {
|
|||||||
var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2);
|
var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||||
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2);
|
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||||
var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2);
|
var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||||
var selectedTokenizer = getTokenizerModel();
|
var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
|
||||||
var oaiSystemTokens = oaiImpersonateTokens + oaiJailbreakTokens + oaiNudgeTokens + oaiStartTokens + oaiNsfwTokens + oaiMainTokens;
|
var oaiSystemTokens = oaiImpersonateTokens + oaiJailbreakTokens + oaiNudgeTokens + oaiStartTokens + oaiNsfwTokens + oaiMainTokens;
|
||||||
var oaiSystemTokensPercentage = ((oaiSystemTokens / (finalPromptTokens)) * 100).toFixed(2);
|
var oaiSystemTokensPercentage = ((oaiSystemTokens / (finalPromptTokens)) * 100).toFixed(2);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//console.log('-- applying % on non-OAI tokens');
|
//console.log('-- applying % on non-OAI tokens');
|
||||||
var storyStringTokensPercentage = ((storyStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
var storyStringTokensPercentage = ((storyStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||||
@@ -3946,7 +3945,7 @@ function promptItemize(itemizedPrompts, requestedMesId) {
|
|||||||
var promptBiasTokensPercentage = ((promptBiasTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
var promptBiasTokensPercentage = ((promptBiasTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||||
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||||
var allAnchorsTokensPercentage = ((allAnchorsTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
var allAnchorsTokensPercentage = ((allAnchorsTokens / (totalTokensInPrompt)) * 100).toFixed(2);
|
||||||
var selectedTokenizer = $("#tokenizer").find(':selected').text();
|
var selectedTokenizer = getFriendlyTokenizerName(this_main_api).tokenizerName;
|
||||||
}
|
}
|
||||||
|
|
||||||
const params = {
|
const params = {
|
||||||
|
@@ -1,7 +1,8 @@
|
|||||||
import { callPopup, main_api } from "../../../script.js";
|
import { callPopup, main_api } from "../../../script.js";
|
||||||
import { getContext } from "../../extensions.js";
|
import { getContext } from "../../extensions.js";
|
||||||
import { registerSlashCommand } from "../../slash-commands.js";
|
import { registerSlashCommand } from "../../slash-commands.js";
|
||||||
import { getTextTokens, getTokenCount, getTokenizerBestMatch, getTokenizerModel, tokenizers } from "../../tokenizers.js";
|
import { getFriendlyTokenizerName, getTextTokens, getTokenCount, tokenizers } from "../../tokenizers.js";
|
||||||
|
import { resetScrollHeight } from "../../utils.js";
|
||||||
|
|
||||||
function rgb2hex(rgb) {
|
function rgb2hex(rgb) {
|
||||||
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
|
rgb = rgb.match(/^rgba?[\s+]?\([\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?,[\s+]?(\d+)[\s+]?/i);
|
||||||
@@ -17,33 +18,22 @@ $('button').click(function () {
|
|||||||
});
|
});
|
||||||
|
|
||||||
async function doTokenCounter() {
|
async function doTokenCounter() {
|
||||||
const tokenizerOption = $("#tokenizer").find(':selected');
|
const { tokenizerName, tokenizerId } = getFriendlyTokenizerName(main_api);
|
||||||
let tokenizerId = Number(tokenizerOption.val());
|
|
||||||
let tokenizerName = tokenizerOption.text();
|
|
||||||
|
|
||||||
if (main_api !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
|
|
||||||
tokenizerId = getTokenizerBestMatch();
|
|
||||||
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
|
|
||||||
}
|
|
||||||
|
|
||||||
const selectedTokenizer = main_api == 'openai'
|
|
||||||
? getTokenizerModel()
|
|
||||||
: tokenizerName;
|
|
||||||
const html = `
|
const html = `
|
||||||
<div class="wide100p">
|
<div class="wide100p">
|
||||||
<h3>Token Counter</h3>
|
<h3>Token Counter</h3>
|
||||||
<div class="justifyLeft">
|
<div class="justifyLeft flex-container flexFlowColumn">
|
||||||
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
|
<h4>Type / paste in the box below to see the number of tokens in the text.</h4>
|
||||||
<p>Selected tokenizer: ${selectedTokenizer}</p>
|
<p>Selected tokenizer: ${tokenizerName}</p>
|
||||||
<div>Input:</div>
|
<div>Input:</div>
|
||||||
<textarea id="token_counter_textarea" class="wide100p textarea_compact margin-bot-10px" rows="10"></textarea>
|
<textarea id="token_counter_textarea" class="wide100p textarea_compact" rows="1"></textarea>
|
||||||
<div>Tokens: <span id="token_counter_result">0</span></div>
|
<div>Tokens: <span id="token_counter_result">0</span></div>
|
||||||
<br>
|
<hr>
|
||||||
<div>Tokenized text:</div>
|
<div>Tokenized text:</div>
|
||||||
<div id="tokenized_chunks_display" class="wide100p">—</div>
|
<div id="tokenized_chunks_display" class="wide100p">—</div>
|
||||||
<br>
|
<hr>
|
||||||
<div>Token IDs:</div>
|
<div>Token IDs:</div>
|
||||||
<textarea id="token_counter_ids" disabled rows="10">—</textarea>
|
<textarea id="token_counter_ids" class="wide100p textarea_compact" disabled rows="1">—</textarea>
|
||||||
</div>
|
</div>
|
||||||
</div>`;
|
</div>`;
|
||||||
|
|
||||||
@@ -66,6 +56,9 @@ async function doTokenCounter() {
|
|||||||
$('#token_counter_result').text(count);
|
$('#token_counter_result').text(count);
|
||||||
$('#tokenized_chunks_display').text('—');
|
$('#tokenized_chunks_display').text('—');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resetScrollHeight($('#token_counter_textarea'));
|
||||||
|
resetScrollHeight($('#token_counter_ids'));
|
||||||
});
|
});
|
||||||
|
|
||||||
$('#dialogue_popup').addClass('wide_dialogue_popup');
|
$('#dialogue_popup').addClass('wide_dialogue_popup');
|
||||||
|
@@ -1,4 +1,6 @@
|
|||||||
#tokenized_chunks_display > code {
|
#tokenized_chunks_display > code {
|
||||||
color: black;
|
color: black;
|
||||||
text-shadow: none;
|
text-shadow: none;
|
||||||
|
padding: 2px;
|
||||||
|
display: inline-block;
|
||||||
}
|
}
|
||||||
|
@@ -63,8 +63,47 @@ async function resetTokenCache() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getTokenizerBestMatch() {
|
/**
|
||||||
if (main_api === 'novel') {
|
* Gets the friendly name of the current tokenizer.
|
||||||
|
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
|
||||||
|
* @returns { { tokenizerName: string, tokenizerId: number } } Tokenizer info
|
||||||
|
*/
|
||||||
|
export function getFriendlyTokenizerName(forApi) {
|
||||||
|
if (!forApi) {
|
||||||
|
forApi = main_api;
|
||||||
|
}
|
||||||
|
|
||||||
|
const tokenizerOption = $("#tokenizer").find(':selected');
|
||||||
|
let tokenizerId = Number(tokenizerOption.val());
|
||||||
|
let tokenizerName = tokenizerOption.text();
|
||||||
|
|
||||||
|
if (forApi !== 'openai' && tokenizerId === tokenizers.BEST_MATCH) {
|
||||||
|
tokenizerId = getTokenizerBestMatch(forApi);
|
||||||
|
tokenizerName = $(`#tokenizer option[value="${tokenizerId}"]`).text();
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenizerName = forApi == 'openai'
|
||||||
|
? getTokenizerModel()
|
||||||
|
: tokenizerName;
|
||||||
|
|
||||||
|
tokenizerId = forApi == 'openai'
|
||||||
|
? tokenizers.OPENAI
|
||||||
|
: tokenizerId;
|
||||||
|
|
||||||
|
return { tokenizerName, tokenizerId };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the best tokenizer for the current API.
|
||||||
|
* @param {string} forApi API to get the tokenizer for. Defaults to the main API.
|
||||||
|
* @returns {number} Tokenizer type.
|
||||||
|
*/
|
||||||
|
export function getTokenizerBestMatch(forApi) {
|
||||||
|
if (!forApi) {
|
||||||
|
forApi = main_api;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (forApi === 'novel') {
|
||||||
if (nai_settings.model_novel.includes('clio')) {
|
if (nai_settings.model_novel.includes('clio')) {
|
||||||
return tokenizers.NERD;
|
return tokenizers.NERD;
|
||||||
}
|
}
|
||||||
@@ -72,7 +111,7 @@ export function getTokenizerBestMatch() {
|
|||||||
return tokenizers.NERD2;
|
return tokenizers.NERD2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (main_api === 'kobold' || main_api === 'textgenerationwebui' || main_api === 'koboldhorde') {
|
if (forApi === 'kobold' || forApi === 'textgenerationwebui' || forApi === 'koboldhorde') {
|
||||||
// Try to use the API tokenizer if possible:
|
// Try to use the API tokenizer if possible:
|
||||||
// - API must be connected
|
// - API must be connected
|
||||||
// - Kobold must pass a version check
|
// - Kobold must pass a version check
|
||||||
@@ -140,7 +179,7 @@ export function getTokenCount(str, padding = undefined) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (tokenizerType === tokenizers.BEST_MATCH) {
|
if (tokenizerType === tokenizers.BEST_MATCH) {
|
||||||
tokenizerType = getTokenizerBestMatch();
|
tokenizerType = getTokenizerBestMatch(main_api);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (padding === undefined) {
|
if (padding === undefined) {
|
||||||
|
Reference in New Issue
Block a user