From afd2e810a8abac433fc5f14253ef9812cfdbf95c Mon Sep 17 00:00:00 2001 From: SillyLossy Date: Sun, 14 May 2023 20:17:14 +0300 Subject: [PATCH] Fix OAI tokenization --- public/script.js | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/public/script.js b/public/script.js index ac35d41b2..82b2e5de9 100644 --- a/public/script.js +++ b/public/script.js @@ -408,18 +408,21 @@ async function getClientVersion() { } } -function getTokenCount(str, padding = 0) { +function getTokenCount(str, padding = undefined) { let tokenizerType = power_user.tokenizer; if (main_api === 'openai') { - // For main prompt building - if (padding == power_user.token_padding) { + if (padding === power_user.token_padding) { + // For main "shadow" prompt building tokenizerType = tokenizers.NONE; - // For extensions and WI } else { + // For extensions and WI return getTokenCountOpenAI(str); } + } + if (padding === undefined) { + padding = 0; } switch (tokenizerType) { @@ -2190,7 +2193,6 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, if (main_api == 'openai') { let [prompt, counts] = await prepareOpenAIMessages(name2, storyString, worldInfoBefore, worldInfoAfter, afterScenarioAnchor, promptBias, type); - // counts will return false if the user has not enabled the token breakdown feature if (counts) { @@ -2198,7 +2200,7 @@ async function Generate(type, { automatic_trigger, force_name2, resolve, reject, const breakdown_bar = $('#token_breakdown div:first-child'); breakdown_bar.empty(); - const total = Object.values(counts).reduce((acc, val) => acc + val, 0); + const total = Object.values(counts).filter(x => !Number.isNaN(x)).reduce((acc, val) => acc + val, 0); console.log(`oai start tokens: ${Object.entries(counts)[0][1]}`); thisPromptBits.push({ @@ -2507,8 +2509,10 @@ function promptItemize(itemizedPrompts, requestedMesId) { var oaiJailbreakTokens = itemizedPrompts[thisPromptSet].oaiJailbreakTokens; var oaiNudgeTokens = itemizedPrompts[thisPromptSet].oaiNudgeTokens; var oaiImpersonateTokens = itemizedPrompts[thisPromptSet].oaiImpersonateTokens; - - + // OAI doesn't use padding + thisPrompt_padding = 0; + // Max context size - max completion tokens + thisPrompt_max_context = (oai_settings.openai_max_context - oai_settings.openai_max_tokens); } else { //for non-OAI APIs //console.log('-- Counting non-OAI Tokens'); @@ -2538,7 +2542,7 @@ function promptItemize(itemizedPrompts, requestedMesId) { var promptBiasTokensPercentage = ((oaiBiasTokens / (finalPromptTokens)) * 100).toFixed(2); var worldInfoStringTokensPercentage = ((worldInfoStringTokens / (finalPromptTokens)) * 100).toFixed(2); var allAnchorsTokensPercentage = ((allAnchorsTokens / (finalPromptTokens)) * 100).toFixed(2); - var selectedTokenizer = $("#tokenizer").find(':selected').text(); + var selectedTokenizer = `tiktoken (${oai_settings.openai_model})`; } else { //console.log('-- applying % on non-OAI tokens'); @@ -2635,7 +2639,7 @@ function promptItemize(itemizedPrompts, requestedMesId) {
Total Tokens in Prompt:
${finalPromptTokens}
-
Max Context:
${thisPrompt_max_context}
+
Max Context (Context Size - Response Length):
${thisPrompt_max_context}
- Padding:
${thisPrompt_padding}