diff --git a/public/index.html b/public/index.html
index 073c6dd38..7a8327cf0 100644
--- a/public/index.html
+++ b/public/index.html
@@ -1735,6 +1735,8 @@
+
+
diff --git a/public/script.js b/public/script.js
index 4c9a5a8cc..b1562b462 100644
--- a/public/script.js
+++ b/public/script.js
@@ -481,22 +481,33 @@ function getTokenCount(str, padding = undefined) {
case tokenizers.CLASSIC:
return encode(str).length + padding;
case tokenizers.LLAMA:
- let tokenCount = 0;
- jQuery.ajax({
- async: false,
- type: 'POST', //
- url: `/tokenize_llama`,
- data: JSON.stringify({ text: str }),
- dataType: "json",
- contentType: "application/json",
- success: function (data) {
- tokenCount = data.count;
- }
- });
- return tokenCount + padding;
+ return countTokensRemote('/tokenize_llama', str, padding);
+ case tokenizers.NERD:
+ return countTokensRemote('/tokenize_nerdstash', str, padding);
+ case tokenizers.NERD2:
+ return countTokensRemote('/tokenize_nerdstash_v2', str, padding);
+ default:
+ console.warn("Unknown tokenizer type", tokenizerType);
+ return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding;
}
}
+function countTokensRemote(endpoint, str, padding) {
+ let tokenCount = 0;
+ jQuery.ajax({
+ async: false,
+ type: 'POST',
+ url: endpoint,
+ data: JSON.stringify({ text: str }),
+ dataType: "json",
+ contentType: "application/json",
+ success: function (data) {
+ tokenCount = data.count;
+ }
+ });
+ return tokenCount + padding;
+}
+
function reloadMarkdownProcessor(render_formulas = false) {
if (render_formulas) {
converter = new showdown.Converter({
@@ -2589,12 +2600,14 @@ function getMaxContextSize() {
} else {
this_max_context = Number(max_context);
if (nai_settings.model_novel == 'krake-v2') {
- this_max_context -= 160;
+ // Krake has a max context of 2048
+ // Should be used with nerdstash tokenizer for best results
+ this_max_context = Math.min(max_context, 2048);
}
if (nai_settings.model_novel == 'clio-v1') {
// Clio has a max context of 8192
- // TODO: Evaluate the relevance of nerdstash-v1 tokenizer, changes quite a bit.
- this_max_context = 8192 - 60 - 160;
+ // Should be used with nerdstash_v2 tokenizer for best results
+ this_max_context = Math.min(max_context, 8192);
}
}
}
diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js
index f2c701c5c..9a97f2a08 100644
--- a/public/scripts/power-user.js
+++ b/public/scripts/power-user.js
@@ -60,6 +60,8 @@ const tokenizers = {
GPT3: 1,
CLASSIC: 2,
LLAMA: 3,
+ NERD: 4,
+ NERD2: 5,
}
const send_on_enter_options = {
diff --git a/server.js b/server.js
index 7bc8ec65a..3b3d1782a 100644
--- a/server.js
+++ b/server.js
@@ -128,23 +128,25 @@ const delay = ms => new Promise(resolve => setTimeout(resolve, ms))
const { SentencePieceProcessor, cleanText } = require("sentencepiece-js");
-let spp;
+let spp_llama;
+let spp_nerd;
+let spp_nerd_v2;
-async function loadSentencepieceTokenizer() {
+async function loadSentencepieceTokenizer(modelPath) {
try {
const spp = new SentencePieceProcessor();
- await spp.load("src/sentencepiece/tokenizer.model");
+ await spp.load(modelPath);
return spp;
} catch (error) {
- console.error("Sentencepiece tokenizer failed to load.");
+ console.error("Sentencepiece tokenizer failed to load: " + modelPath, error);
return null;
}
};
-async function countTokensLlama(text) {
+async function countSentencepieceTokens(spp, text) {
// Fallback to strlen estimation
if (!spp) {
- return Math.ceil(v.length / 3.35);
+ return Math.ceil(text.length / 3.35);
}
let cleaned = cleanText(text);
@@ -2795,14 +2797,22 @@ app.post("/savepreset_openai", jsonParser, function (request, response) {
return response.send({ name });
});
-app.post("/tokenize_llama", jsonParser, async function (request, response) {
- if (!request.body) {
- return response.sendStatus(400);
- }
+function createTokenizationHandler(getTokenizerFn) {
+ return async function (request, response) {
+ if (!request.body) {
+ return response.sendStatus(400);
+ }
- const count = await countTokensLlama(request.body.text);
- return response.send({ count });
-});
+ const text = request.body.text || '';
+ const tokenizer = getTokenizerFn();
+ const count = await countSentencepieceTokens(tokenizer, text);
+ return response.send({ count });
+ };
+}
+
+app.post("/tokenize_llama", jsonParser, createTokenizationHandler(() => spp_llama));
+app.post("/tokenize_nerdstash", jsonParser, createTokenizationHandler(() => spp_nerd));
+app.post("/tokenize_nerdstash_v2", jsonParser, createTokenizationHandler(() => spp_nerd_v2));
// ** REST CLIENT ASYNC WRAPPERS **
@@ -2861,7 +2871,11 @@ const setupTasks = async function () {
// Colab users could run the embedded tool
if (!is_colab) await convertWebp();
- spp = await loadSentencepieceTokenizer();
+ [spp_llama, spp_nerd, spp_nerd_v2] = await Promise.all([
+ loadSentencepieceTokenizer('src/sentencepiece/tokenizer.model'),
+ loadSentencepieceTokenizer('src/sentencepiece/nerdstash.model'),
+ loadSentencepieceTokenizer('src/sentencepiece/nerdstash_v2.model'),
+ ]);
console.log('Launching...');
diff --git a/src/sentencepiece/nerdstash.model b/src/sentencepiece/nerdstash.model
new file mode 100644
index 000000000..b95958a4c
Binary files /dev/null and b/src/sentencepiece/nerdstash.model differ
diff --git a/src/sentencepiece/nerdstash_v2.model b/src/sentencepiece/nerdstash_v2.model
new file mode 100644
index 000000000..ec2453194
Binary files /dev/null and b/src/sentencepiece/nerdstash_v2.model differ