From a782dcec336cabe2695b3661ee98cb945aacb55a Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Thu, 19 Sep 2024 00:10:22 +0300
Subject: [PATCH] Add Mistral Nemo downloadable tokenizer
---
public/index.html | 3 ++-
public/scripts/tokenizers.js | 11 +++++++++++
src/endpoints/tokenizers.js | 24 ++++++++++++++++++++++++
3 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/public/index.html b/public/index.html
index 3f54f3068..64659cc82 100644
--- a/public/index.html
+++ b/public/index.html
@@ -3339,7 +3339,8 @@
-
+
+
diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js
index 773d4e408..0d759226d 100644
--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@@ -30,6 +30,7 @@ export const tokenizers = {
JAMBA: 14,
QWEN2: 15,
COMMAND_R: 16,
+ NEMO: 17,
BEST_MATCH: 99,
};
@@ -43,6 +44,7 @@ export const ENCODE_TOKENIZERS = [
tokenizers.JAMBA,
tokenizers.QWEN2,
tokenizers.COMMAND_R,
+ tokenizers.NEMO,
// uncomment when NovelAI releases Kayra and Clio weights, lol
//tokenizers.NERD,
//tokenizers.NERD2,
@@ -121,6 +123,11 @@ const TOKENIZER_URLS = {
decode: '/api/tokenizers/command-r/decode',
count: '/api/tokenizers/command-r/encode',
},
+ [tokenizers.NEMO]: {
+ encode: '/api/tokenizers/nemo/encode',
+ decode: '/api/tokenizers/nemo/decode',
+ count: '/api/tokenizers/nemo/encode',
+ },
[tokenizers.API_TEXTGENERATIONWEBUI]: {
encode: '/api/tokenizers/remote/textgenerationwebui/encode',
count: '/api/tokenizers/remote/textgenerationwebui/encode',
@@ -535,6 +542,7 @@ export function getTokenizerModel() {
const jambaTokenizer = 'jamba';
const qwen2Tokenizer = 'qwen2';
const commandRTokenizer = 'command-r';
+ const nemoTokenizer = 'nemo';
// Assuming no one would use it for different models.. right?
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
@@ -628,6 +636,9 @@ export function getTokenizerModel() {
}
if (oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI) {
+ if (oai_settings.mistralai_model.includes('nemo') || oai_settings.mistralai_model.includes('pixtral')) {
+ return nemoTokenizer;
+ }
return mistralTokenizer;
}
diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js
index 89d530174..0be60d3a4 100644
--- a/src/endpoints/tokenizers.js
+++ b/src/endpoints/tokenizers.js
@@ -221,6 +221,7 @@ const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json');
const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');
const commandTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/command-r.json', 'src/tokenizers/llama3.json');
const qwen2Tokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/qwen2.json', 'src/tokenizers/llama3.json');
+const nemoTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/nemo.json', 'src/tokenizers/llama3.json');
const sentencepieceTokenizers = [
'llama',
@@ -418,6 +419,10 @@ function getTokenizerModel(requestModel) {
return 'command-r';
}
+ if (requestModel.includes('nemo')) {
+ return 'nemo';
+ }
+
// default
return 'gpt-3.5-turbo';
}
@@ -645,6 +650,7 @@ router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(clau
router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer));
router.post('/qwen2/encode', jsonParser, createWebTokenizerEncodingHandler(qwen2Tokenizer));
router.post('/command-r/encode', jsonParser, createWebTokenizerEncodingHandler(commandTokenizer));
+router.post('/nemo/encode', jsonParser, createWebTokenizerEncodingHandler(nemoTokenizer));
router.post('/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
@@ -657,6 +663,7 @@ router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(clau
router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer));
router.post('/qwen2/decode', jsonParser, createWebTokenizerDecodingHandler(qwen2Tokenizer));
router.post('/command-r/decode', jsonParser, createWebTokenizerDecodingHandler(commandTokenizer));
+router.post('/nemo/decode', jsonParser, createWebTokenizerDecodingHandler(nemoTokenizer));
router.post('/openai/encode', jsonParser, async function (req, res) {
try {
@@ -707,6 +714,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) {
return handler(req, res);
}
+ if (queryModel.includes('nemo')) {
+ const handler = createWebTokenizerEncodingHandler(nemoTokenizer);
+ return handler(req, res);
+ }
+
const model = getTokenizerModel(queryModel);
const handler = createTiktokenEncodingHandler(model);
return handler(req, res);
@@ -765,6 +777,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
return handler(req, res);
}
+ if (queryModel.includes('nemo')) {
+ const handler = createWebTokenizerDecodingHandler(nemoTokenizer);
+ return handler(req, res);
+ }
+
const model = getTokenizerModel(queryModel);
const handler = createTiktokenDecodingHandler(model);
return handler(req, res);
@@ -835,6 +852,13 @@ router.post('/openai/count', jsonParser, async function (req, res) {
return res.send({ 'token_count': num_tokens });
}
+ if (model === 'nemo') {
+ const instance = await nemoTokenizer.get();
+ if (!instance) throw new Error('Failed to load the Nemo tokenizer');
+ num_tokens = countWebTokenizerTokens(instance, req.body);
+ return res.send({ 'token_count': num_tokens });
+ }
+
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
const tokensPadding = 3;