diff --git a/public/index.html b/public/index.html index 3f54f3068..64659cc82 100644 --- a/public/index.html +++ b/public/index.html @@ -3339,7 +3339,8 @@ - + + diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index 773d4e408..0d759226d 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -30,6 +30,7 @@ export const tokenizers = { JAMBA: 14, QWEN2: 15, COMMAND_R: 16, + NEMO: 17, BEST_MATCH: 99, }; @@ -43,6 +44,7 @@ export const ENCODE_TOKENIZERS = [ tokenizers.JAMBA, tokenizers.QWEN2, tokenizers.COMMAND_R, + tokenizers.NEMO, // uncomment when NovelAI releases Kayra and Clio weights, lol //tokenizers.NERD, //tokenizers.NERD2, @@ -121,6 +123,11 @@ const TOKENIZER_URLS = { decode: '/api/tokenizers/command-r/decode', count: '/api/tokenizers/command-r/encode', }, + [tokenizers.NEMO]: { + encode: '/api/tokenizers/nemo/encode', + decode: '/api/tokenizers/nemo/decode', + count: '/api/tokenizers/nemo/encode', + }, [tokenizers.API_TEXTGENERATIONWEBUI]: { encode: '/api/tokenizers/remote/textgenerationwebui/encode', count: '/api/tokenizers/remote/textgenerationwebui/encode', @@ -535,6 +542,7 @@ export function getTokenizerModel() { const jambaTokenizer = 'jamba'; const qwen2Tokenizer = 'qwen2'; const commandRTokenizer = 'command-r'; + const nemoTokenizer = 'nemo'; // Assuming no one would use it for different models.. right? if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) { @@ -628,6 +636,9 @@ export function getTokenizerModel() { } if (oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI) { + if (oai_settings.mistralai_model.includes('nemo') || oai_settings.mistralai_model.includes('pixtral')) { + return nemoTokenizer; + } return mistralTokenizer; } diff --git a/src/endpoints/tokenizers.js b/src/endpoints/tokenizers.js index 89d530174..0be60d3a4 100644 --- a/src/endpoints/tokenizers.js +++ b/src/endpoints/tokenizers.js @@ -221,6 +221,7 @@ const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json'); const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json'); const commandTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/command-r.json', 'src/tokenizers/llama3.json'); const qwen2Tokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/qwen2.json', 'src/tokenizers/llama3.json'); +const nemoTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/nemo.json', 'src/tokenizers/llama3.json'); const sentencepieceTokenizers = [ 'llama', @@ -418,6 +419,10 @@ function getTokenizerModel(requestModel) { return 'command-r'; } + if (requestModel.includes('nemo')) { + return 'nemo'; + } + // default return 'gpt-3.5-turbo'; } @@ -645,6 +650,7 @@ router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(clau router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer)); router.post('/qwen2/encode', jsonParser, createWebTokenizerEncodingHandler(qwen2Tokenizer)); router.post('/command-r/encode', jsonParser, createWebTokenizerEncodingHandler(commandTokenizer)); +router.post('/nemo/encode', jsonParser, createWebTokenizerEncodingHandler(nemoTokenizer)); router.post('/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama)); router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd)); router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2)); @@ -657,6 +663,7 @@ router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(clau router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer)); router.post('/qwen2/decode', jsonParser, createWebTokenizerDecodingHandler(qwen2Tokenizer)); router.post('/command-r/decode', jsonParser, createWebTokenizerDecodingHandler(commandTokenizer)); +router.post('/nemo/decode', jsonParser, createWebTokenizerDecodingHandler(nemoTokenizer)); router.post('/openai/encode', jsonParser, async function (req, res) { try { @@ -707,6 +714,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) { return handler(req, res); } + if (queryModel.includes('nemo')) { + const handler = createWebTokenizerEncodingHandler(nemoTokenizer); + return handler(req, res); + } + const model = getTokenizerModel(queryModel); const handler = createTiktokenEncodingHandler(model); return handler(req, res); @@ -765,6 +777,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) { return handler(req, res); } + if (queryModel.includes('nemo')) { + const handler = createWebTokenizerDecodingHandler(nemoTokenizer); + return handler(req, res); + } + const model = getTokenizerModel(queryModel); const handler = createTiktokenDecodingHandler(model); return handler(req, res); @@ -835,6 +852,13 @@ router.post('/openai/count', jsonParser, async function (req, res) { return res.send({ 'token_count': num_tokens }); } + if (model === 'nemo') { + const instance = await nemoTokenizer.get(); + if (!instance) throw new Error('Failed to load the Nemo tokenizer'); + num_tokens = countWebTokenizerTokens(instance, req.body); + return res.send({ 'token_count': num_tokens }); + } + const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1; const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3; const tokensPadding = 3;