diff --git a/public/index.html b/public/index.html index 073c6dd38..7a8327cf0 100644 --- a/public/index.html +++ b/public/index.html @@ -1735,6 +1735,8 @@ + +
diff --git a/public/script.js b/public/script.js index 4c9a5a8cc..b1562b462 100644 --- a/public/script.js +++ b/public/script.js @@ -481,22 +481,33 @@ function getTokenCount(str, padding = undefined) { case tokenizers.CLASSIC: return encode(str).length + padding; case tokenizers.LLAMA: - let tokenCount = 0; - jQuery.ajax({ - async: false, - type: 'POST', // - url: `/tokenize_llama`, - data: JSON.stringify({ text: str }), - dataType: "json", - contentType: "application/json", - success: function (data) { - tokenCount = data.count; - } - }); - return tokenCount + padding; + return countTokensRemote('/tokenize_llama', str, padding); + case tokenizers.NERD: + return countTokensRemote('/tokenize_nerdstash', str, padding); + case tokenizers.NERD2: + return countTokensRemote('/tokenize_nerdstash_v2', str, padding); + default: + console.warn("Unknown tokenizer type", tokenizerType); + return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding; } } +function countTokensRemote(endpoint, str, padding) { + let tokenCount = 0; + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify({ text: str }), + dataType: "json", + contentType: "application/json", + success: function (data) { + tokenCount = data.count; + } + }); + return tokenCount + padding; +} + function reloadMarkdownProcessor(render_formulas = false) { if (render_formulas) { converter = new showdown.Converter({ @@ -2589,12 +2600,14 @@ function getMaxContextSize() { } else { this_max_context = Number(max_context); if (nai_settings.model_novel == 'krake-v2') { - this_max_context -= 160; + // Krake has a max context of 2048 + // Should be used with nerdstash tokenizer for best results + this_max_context = Math.min(max_context, 2048); } if (nai_settings.model_novel == 'clio-v1') { // Clio has a max context of 8192 - // TODO: Evaluate the relevance of nerdstash-v1 tokenizer, changes quite a bit. - this_max_context = 8192 - 60 - 160; + // Should be used with nerdstash_v2 tokenizer for best results + this_max_context = Math.min(max_context, 8192); } } } diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js index f2c701c5c..9a97f2a08 100644 --- a/public/scripts/power-user.js +++ b/public/scripts/power-user.js @@ -60,6 +60,8 @@ const tokenizers = { GPT3: 1, CLASSIC: 2, LLAMA: 3, + NERD: 4, + NERD2: 5, } const send_on_enter_options = { diff --git a/server.js b/server.js index 7bc8ec65a..3b3d1782a 100644 --- a/server.js +++ b/server.js @@ -128,23 +128,25 @@ const delay = ms => new Promise(resolve => setTimeout(resolve, ms)) const { SentencePieceProcessor, cleanText } = require("sentencepiece-js"); -let spp; +let spp_llama; +let spp_nerd; +let spp_nerd_v2; -async function loadSentencepieceTokenizer() { +async function loadSentencepieceTokenizer(modelPath) { try { const spp = new SentencePieceProcessor(); - await spp.load("src/sentencepiece/tokenizer.model"); + await spp.load(modelPath); return spp; } catch (error) { - console.error("Sentencepiece tokenizer failed to load."); + console.error("Sentencepiece tokenizer failed to load: " + modelPath, error); return null; } }; -async function countTokensLlama(text) { +async function countSentencepieceTokens(spp, text) { // Fallback to strlen estimation if (!spp) { - return Math.ceil(v.length / 3.35); + return Math.ceil(text.length / 3.35); } let cleaned = cleanText(text); @@ -2795,14 +2797,22 @@ app.post("/savepreset_openai", jsonParser, function (request, response) { return response.send({ name }); }); -app.post("/tokenize_llama", jsonParser, async function (request, response) { - if (!request.body) { - return response.sendStatus(400); - } +function createTokenizationHandler(getTokenizerFn) { + return async function (request, response) { + if (!request.body) { + return response.sendStatus(400); + } - const count = await countTokensLlama(request.body.text); - return response.send({ count }); -}); + const text = request.body.text || ''; + const tokenizer = getTokenizerFn(); + const count = await countSentencepieceTokens(tokenizer, text); + return response.send({ count }); + }; +} + +app.post("/tokenize_llama", jsonParser, createTokenizationHandler(() => spp_llama)); +app.post("/tokenize_nerdstash", jsonParser, createTokenizationHandler(() => spp_nerd)); +app.post("/tokenize_nerdstash_v2", jsonParser, createTokenizationHandler(() => spp_nerd_v2)); // ** REST CLIENT ASYNC WRAPPERS ** @@ -2861,7 +2871,11 @@ const setupTasks = async function () { // Colab users could run the embedded tool if (!is_colab) await convertWebp(); - spp = await loadSentencepieceTokenizer(); + [spp_llama, spp_nerd, spp_nerd_v2] = await Promise.all([ + loadSentencepieceTokenizer('src/sentencepiece/tokenizer.model'), + loadSentencepieceTokenizer('src/sentencepiece/nerdstash.model'), + loadSentencepieceTokenizer('src/sentencepiece/nerdstash_v2.model'), + ]); console.log('Launching...'); diff --git a/src/sentencepiece/nerdstash.model b/src/sentencepiece/nerdstash.model new file mode 100644 index 000000000..b95958a4c Binary files /dev/null and b/src/sentencepiece/nerdstash.model differ diff --git a/src/sentencepiece/nerdstash_v2.model b/src/sentencepiece/nerdstash_v2.model new file mode 100644 index 000000000..ec2453194 Binary files /dev/null and b/src/sentencepiece/nerdstash_v2.model differ