Merge pull request #2873 from SillyTavern/nemo-tokenizer
Add Mistral Nemo downloadable tokenizer
This commit is contained in:
commit
83c3f6d1bf
|
@ -3339,7 +3339,8 @@
|
||||||
<option value="16">Command-R</option>
|
<option value="16">Command-R</option>
|
||||||
<option value="4">NerdStash (NovelAI Clio)</option>
|
<option value="4">NerdStash (NovelAI Clio)</option>
|
||||||
<option value="5">NerdStash v2 (NovelAI Kayra)</option>
|
<option value="5">NerdStash v2 (NovelAI Kayra)</option>
|
||||||
<option value="7">Mistral</option>
|
<option value="7">Mistral V1</option>
|
||||||
|
<option value="17">Mistral Nemo</option>
|
||||||
<option value="8">Yi</option>
|
<option value="8">Yi</option>
|
||||||
<option value="11">Claude 1/2</option>
|
<option value="11">Claude 1/2</option>
|
||||||
<option value="6">API (WebUI / koboldcpp)</option>
|
<option value="6">API (WebUI / koboldcpp)</option>
|
||||||
|
|
|
@ -30,6 +30,7 @@ export const tokenizers = {
|
||||||
JAMBA: 14,
|
JAMBA: 14,
|
||||||
QWEN2: 15,
|
QWEN2: 15,
|
||||||
COMMAND_R: 16,
|
COMMAND_R: 16,
|
||||||
|
NEMO: 17,
|
||||||
BEST_MATCH: 99,
|
BEST_MATCH: 99,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -43,6 +44,7 @@ export const ENCODE_TOKENIZERS = [
|
||||||
tokenizers.JAMBA,
|
tokenizers.JAMBA,
|
||||||
tokenizers.QWEN2,
|
tokenizers.QWEN2,
|
||||||
tokenizers.COMMAND_R,
|
tokenizers.COMMAND_R,
|
||||||
|
tokenizers.NEMO,
|
||||||
// uncomment when NovelAI releases Kayra and Clio weights, lol
|
// uncomment when NovelAI releases Kayra and Clio weights, lol
|
||||||
//tokenizers.NERD,
|
//tokenizers.NERD,
|
||||||
//tokenizers.NERD2,
|
//tokenizers.NERD2,
|
||||||
|
@ -121,6 +123,11 @@ const TOKENIZER_URLS = {
|
||||||
decode: '/api/tokenizers/command-r/decode',
|
decode: '/api/tokenizers/command-r/decode',
|
||||||
count: '/api/tokenizers/command-r/encode',
|
count: '/api/tokenizers/command-r/encode',
|
||||||
},
|
},
|
||||||
|
[tokenizers.NEMO]: {
|
||||||
|
encode: '/api/tokenizers/nemo/encode',
|
||||||
|
decode: '/api/tokenizers/nemo/decode',
|
||||||
|
count: '/api/tokenizers/nemo/encode',
|
||||||
|
},
|
||||||
[tokenizers.API_TEXTGENERATIONWEBUI]: {
|
[tokenizers.API_TEXTGENERATIONWEBUI]: {
|
||||||
encode: '/api/tokenizers/remote/textgenerationwebui/encode',
|
encode: '/api/tokenizers/remote/textgenerationwebui/encode',
|
||||||
count: '/api/tokenizers/remote/textgenerationwebui/encode',
|
count: '/api/tokenizers/remote/textgenerationwebui/encode',
|
||||||
|
@ -535,6 +542,7 @@ export function getTokenizerModel() {
|
||||||
const jambaTokenizer = 'jamba';
|
const jambaTokenizer = 'jamba';
|
||||||
const qwen2Tokenizer = 'qwen2';
|
const qwen2Tokenizer = 'qwen2';
|
||||||
const commandRTokenizer = 'command-r';
|
const commandRTokenizer = 'command-r';
|
||||||
|
const nemoTokenizer = 'nemo';
|
||||||
|
|
||||||
// Assuming no one would use it for different models.. right?
|
// Assuming no one would use it for different models.. right?
|
||||||
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
|
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
|
||||||
|
@ -628,6 +636,9 @@ export function getTokenizerModel() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI) {
|
if (oai_settings.chat_completion_source == chat_completion_sources.MISTRALAI) {
|
||||||
|
if (oai_settings.mistralai_model.includes('nemo') || oai_settings.mistralai_model.includes('pixtral')) {
|
||||||
|
return nemoTokenizer;
|
||||||
|
}
|
||||||
return mistralTokenizer;
|
return mistralTokenizer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -221,6 +221,7 @@ const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json');
|
||||||
const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');
|
const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');
|
||||||
const commandTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/command-r.json', 'src/tokenizers/llama3.json');
|
const commandTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/command-r.json', 'src/tokenizers/llama3.json');
|
||||||
const qwen2Tokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/qwen2.json', 'src/tokenizers/llama3.json');
|
const qwen2Tokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/qwen2.json', 'src/tokenizers/llama3.json');
|
||||||
|
const nemoTokenizer = new WebTokenizer('https://github.com/SillyTavern/SillyTavern-Tokenizers/raw/main/nemo.json', 'src/tokenizers/llama3.json');
|
||||||
|
|
||||||
const sentencepieceTokenizers = [
|
const sentencepieceTokenizers = [
|
||||||
'llama',
|
'llama',
|
||||||
|
@ -418,6 +419,10 @@ function getTokenizerModel(requestModel) {
|
||||||
return 'command-r';
|
return 'command-r';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (requestModel.includes('nemo')) {
|
||||||
|
return 'nemo';
|
||||||
|
}
|
||||||
|
|
||||||
// default
|
// default
|
||||||
return 'gpt-3.5-turbo';
|
return 'gpt-3.5-turbo';
|
||||||
}
|
}
|
||||||
|
@ -645,6 +650,7 @@ router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(clau
|
||||||
router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer));
|
router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer));
|
||||||
router.post('/qwen2/encode', jsonParser, createWebTokenizerEncodingHandler(qwen2Tokenizer));
|
router.post('/qwen2/encode', jsonParser, createWebTokenizerEncodingHandler(qwen2Tokenizer));
|
||||||
router.post('/command-r/encode', jsonParser, createWebTokenizerEncodingHandler(commandTokenizer));
|
router.post('/command-r/encode', jsonParser, createWebTokenizerEncodingHandler(commandTokenizer));
|
||||||
|
router.post('/nemo/encode', jsonParser, createWebTokenizerEncodingHandler(nemoTokenizer));
|
||||||
router.post('/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
|
router.post('/llama/decode', jsonParser, createSentencepieceDecodingHandler(spp_llama));
|
||||||
router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
|
router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd));
|
||||||
router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
|
router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
|
||||||
|
@ -657,6 +663,7 @@ router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(clau
|
||||||
router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer));
|
router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer));
|
||||||
router.post('/qwen2/decode', jsonParser, createWebTokenizerDecodingHandler(qwen2Tokenizer));
|
router.post('/qwen2/decode', jsonParser, createWebTokenizerDecodingHandler(qwen2Tokenizer));
|
||||||
router.post('/command-r/decode', jsonParser, createWebTokenizerDecodingHandler(commandTokenizer));
|
router.post('/command-r/decode', jsonParser, createWebTokenizerDecodingHandler(commandTokenizer));
|
||||||
|
router.post('/nemo/decode', jsonParser, createWebTokenizerDecodingHandler(nemoTokenizer));
|
||||||
|
|
||||||
router.post('/openai/encode', jsonParser, async function (req, res) {
|
router.post('/openai/encode', jsonParser, async function (req, res) {
|
||||||
try {
|
try {
|
||||||
|
@ -707,6 +714,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) {
|
||||||
return handler(req, res);
|
return handler(req, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (queryModel.includes('nemo')) {
|
||||||
|
const handler = createWebTokenizerEncodingHandler(nemoTokenizer);
|
||||||
|
return handler(req, res);
|
||||||
|
}
|
||||||
|
|
||||||
const model = getTokenizerModel(queryModel);
|
const model = getTokenizerModel(queryModel);
|
||||||
const handler = createTiktokenEncodingHandler(model);
|
const handler = createTiktokenEncodingHandler(model);
|
||||||
return handler(req, res);
|
return handler(req, res);
|
||||||
|
@ -765,6 +777,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
|
||||||
return handler(req, res);
|
return handler(req, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (queryModel.includes('nemo')) {
|
||||||
|
const handler = createWebTokenizerDecodingHandler(nemoTokenizer);
|
||||||
|
return handler(req, res);
|
||||||
|
}
|
||||||
|
|
||||||
const model = getTokenizerModel(queryModel);
|
const model = getTokenizerModel(queryModel);
|
||||||
const handler = createTiktokenDecodingHandler(model);
|
const handler = createTiktokenDecodingHandler(model);
|
||||||
return handler(req, res);
|
return handler(req, res);
|
||||||
|
@ -835,6 +852,13 @@ router.post('/openai/count', jsonParser, async function (req, res) {
|
||||||
return res.send({ 'token_count': num_tokens });
|
return res.send({ 'token_count': num_tokens });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (model === 'nemo') {
|
||||||
|
const instance = await nemoTokenizer.get();
|
||||||
|
if (!instance) throw new Error('Failed to load the Nemo tokenizer');
|
||||||
|
num_tokens = countWebTokenizerTokens(instance, req.body);
|
||||||
|
return res.send({ 'token_count': num_tokens });
|
||||||
|
}
|
||||||
|
|
||||||
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
|
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
|
||||||
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
|
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
|
||||||
const tokensPadding = 3;
|
const tokensPadding = 3;
|
||||||
|
|
Loading…
Reference in New Issue