mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-03-02 02:47:52 +01:00
Add tokenizer for Gemma/Gemini
This commit is contained in:
parent
ce8b0aae96
commit
e707def7dd
@ -3449,6 +3449,7 @@
|
||||
<!-- Option #2 was a legacy GPT-2/3 tokenizer -->
|
||||
<option value="3">Llama 1/2</option>
|
||||
<option value="12">Llama 3</option>
|
||||
<option value="13">Gemma / Gemini</option>
|
||||
<option value="4">NerdStash (NovelAI Clio)</option>
|
||||
<option value="5">NerdStash v2 (NovelAI Kayra)</option>
|
||||
<option value="7">Mistral</option>
|
||||
|
@ -599,6 +599,10 @@ export function getCurrentOpenRouterModelTokenizer() {
|
||||
return tokenizers.YI;
|
||||
case 'Mistral':
|
||||
return tokenizers.MISTRAL;
|
||||
case 'Gemini':
|
||||
return tokenizers.GEMMA;
|
||||
case 'Claude':
|
||||
return tokenizers.CLAUDE;
|
||||
default:
|
||||
return tokenizers.OPENAI;
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ export const tokenizers = {
|
||||
API_KOBOLD: 10,
|
||||
CLAUDE: 11,
|
||||
LLAMA3: 12,
|
||||
GEMMA: 13,
|
||||
BEST_MATCH: 99,
|
||||
};
|
||||
|
||||
@ -34,6 +35,7 @@ export const SENTENCEPIECE_TOKENIZERS = [
|
||||
tokenizers.MISTRAL,
|
||||
tokenizers.YI,
|
||||
tokenizers.LLAMA3,
|
||||
tokenizers.GEMMA,
|
||||
// uncomment when NovelAI releases Kayra and Clio weights, lol
|
||||
//tokenizers.NERD,
|
||||
//tokenizers.NERD2,
|
||||
@ -91,6 +93,11 @@ const TOKENIZER_URLS = {
|
||||
decode: '/api/tokenizers/llama3/decode',
|
||||
count: '/api/tokenizers/llama3/encode',
|
||||
},
|
||||
[tokenizers.GEMMA]: {
|
||||
encode: '/api/tokenizers/gemma/encode',
|
||||
decode: '/api/tokenizers/gemma/decode',
|
||||
count: '/api/tokenizers/gemma/encode',
|
||||
},
|
||||
[tokenizers.API_TEXTGENERATIONWEBUI]: {
|
||||
encode: '/api/tokenizers/remote/textgenerationwebui/encode',
|
||||
count: '/api/tokenizers/remote/textgenerationwebui/encode',
|
||||
@ -232,6 +239,9 @@ export function getTokenizerBestMatch(forApi) {
|
||||
if (model.includes('mistral') || model.includes('mixtral')) {
|
||||
return tokenizers.MISTRAL;
|
||||
}
|
||||
if (model.includes('gemma')) {
|
||||
return tokenizers.GEMMA;
|
||||
}
|
||||
}
|
||||
|
||||
return tokenizers.LLAMA;
|
||||
@ -441,12 +451,14 @@ export function getTokenizerModel() {
|
||||
const turbo0301Tokenizer = 'gpt-3.5-turbo-0301';
|
||||
const turboTokenizer = 'gpt-3.5-turbo';
|
||||
const gpt4Tokenizer = 'gpt-4';
|
||||
const gpt4oTokenizer = 'gpt-4o';
|
||||
const gpt2Tokenizer = 'gpt2';
|
||||
const claudeTokenizer = 'claude';
|
||||
const llamaTokenizer = 'llama';
|
||||
const llama3Tokenizer = 'llama3';
|
||||
const mistralTokenizer = 'mistral';
|
||||
const yiTokenizer = 'yi';
|
||||
const gemmaTokenizer = 'gemma';
|
||||
|
||||
// Assuming no one would use it for different models.. right?
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
|
||||
@ -491,6 +503,12 @@ export function getTokenizerModel() {
|
||||
else if (model?.architecture?.tokenizer === 'Yi') {
|
||||
return yiTokenizer;
|
||||
}
|
||||
else if (model?.architecture?.tokenizer === 'Gemini') {
|
||||
return gemmaTokenizer;
|
||||
}
|
||||
else if (oai_settings.openrouter_model.includes('gpt-4o')) {
|
||||
return gpt4oTokenizer;
|
||||
}
|
||||
else if (oai_settings.openrouter_model.includes('gpt-4')) {
|
||||
return gpt4Tokenizer;
|
||||
}
|
||||
@ -509,7 +527,7 @@ export function getTokenizerModel() {
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.MAKERSUITE) {
|
||||
return oai_settings.google_model;
|
||||
return gemmaTokenizer;
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.CLAUDE) {
|
||||
@ -543,6 +561,9 @@ export function getTokenizerModel() {
|
||||
if (oai_settings.groq_model.includes('mistral') || oai_settings.groq_model.includes('mixtral')) {
|
||||
return mistralTokenizer;
|
||||
}
|
||||
if (oai_settings.groq_model.includes('gemma')) {
|
||||
return gemmaTokenizer;
|
||||
}
|
||||
}
|
||||
|
||||
if (oai_settings.chat_completion_source === chat_completion_sources.ZEROONEAI) {
|
||||
|
@ -143,6 +143,7 @@ const spp_nerd = new SentencePieceTokenizer('src/tokenizers/nerdstash.model');
|
||||
const spp_nerd_v2 = new SentencePieceTokenizer('src/tokenizers/nerdstash_v2.model');
|
||||
const spp_mistral = new SentencePieceTokenizer('src/tokenizers/mistral.model');
|
||||
const spp_yi = new SentencePieceTokenizer('src/tokenizers/yi.model');
|
||||
const spp_gemma = new SentencePieceTokenizer('src/tokenizers/gemma.model');
|
||||
const claude_tokenizer = new WebTokenizer('src/tokenizers/claude.json');
|
||||
const llama3_tokenizer = new WebTokenizer('src/tokenizers/llama3.json');
|
||||
|
||||
@ -152,6 +153,7 @@ const sentencepieceTokenizers = [
|
||||
'nerdstash_v2',
|
||||
'mistral',
|
||||
'yi',
|
||||
'gemma',
|
||||
];
|
||||
|
||||
/**
|
||||
@ -180,6 +182,10 @@ function getSentencepiceTokenizer(model) {
|
||||
return spp_yi;
|
||||
}
|
||||
|
||||
if (model.includes('gemma')) {
|
||||
return spp_gemma;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -312,8 +318,8 @@ function getTokenizerModel(requestModel) {
|
||||
return 'yi';
|
||||
}
|
||||
|
||||
if (requestModel.includes('gemini')) {
|
||||
return 'gpt-4o';
|
||||
if (requestModel.includes('gemma') || requestModel.includes('gemini')) {
|
||||
return 'gemma';
|
||||
}
|
||||
|
||||
// default
|
||||
@ -583,6 +589,7 @@ router.post('/nerdstash/encode', jsonParser, createSentencepieceEncodingHandler(
|
||||
router.post('/nerdstash_v2/encode', jsonParser, createSentencepieceEncodingHandler(spp_nerd_v2));
|
||||
router.post('/mistral/encode', jsonParser, createSentencepieceEncodingHandler(spp_mistral));
|
||||
router.post('/yi/encode', jsonParser, createSentencepieceEncodingHandler(spp_yi));
|
||||
router.post('/gemma/encode', jsonParser, createSentencepieceEncodingHandler(spp_gemma));
|
||||
router.post('/gpt2/encode', jsonParser, createTiktokenEncodingHandler('gpt2'));
|
||||
router.post('/claude/encode', jsonParser, createWebTokenizerEncodingHandler(claude_tokenizer));
|
||||
router.post('/llama3/encode', jsonParser, createWebTokenizerEncodingHandler(llama3_tokenizer));
|
||||
@ -591,6 +598,7 @@ router.post('/nerdstash/decode', jsonParser, createSentencepieceDecodingHandler(
|
||||
router.post('/nerdstash_v2/decode', jsonParser, createSentencepieceDecodingHandler(spp_nerd_v2));
|
||||
router.post('/mistral/decode', jsonParser, createSentencepieceDecodingHandler(spp_mistral));
|
||||
router.post('/yi/decode', jsonParser, createSentencepieceDecodingHandler(spp_yi));
|
||||
router.post('/gemma/decode', jsonParser, createSentencepieceDecodingHandler(spp_gemma));
|
||||
router.post('/gpt2/decode', jsonParser, createTiktokenDecodingHandler('gpt2'));
|
||||
router.post('/claude/decode', jsonParser, createWebTokenizerDecodingHandler(claude_tokenizer));
|
||||
router.post('/llama3/decode', jsonParser, createWebTokenizerDecodingHandler(llama3_tokenizer));
|
||||
@ -624,6 +632,11 @@ router.post('/openai/encode', jsonParser, async function (req, res) {
|
||||
return handler(req, res);
|
||||
}
|
||||
|
||||
if (queryModel.includes('gemma') || queryModel.includes('gemini')) {
|
||||
const handler = createSentencepieceEncodingHandler(spp_gemma);
|
||||
return handler(req, res);
|
||||
}
|
||||
|
||||
const model = getTokenizerModel(queryModel);
|
||||
const handler = createTiktokenEncodingHandler(model);
|
||||
return handler(req, res);
|
||||
@ -662,6 +675,11 @@ router.post('/openai/decode', jsonParser, async function (req, res) {
|
||||
return handler(req, res);
|
||||
}
|
||||
|
||||
if (queryModel.includes('gemma') || queryModel.includes('gemini')) {
|
||||
const handler = createSentencepieceDecodingHandler(spp_gemma);
|
||||
return handler(req, res);
|
||||
}
|
||||
|
||||
const model = getTokenizerModel(queryModel);
|
||||
const handler = createTiktokenDecodingHandler(model);
|
||||
return handler(req, res);
|
||||
@ -708,6 +726,11 @@ router.post('/openai/count', jsonParser, async function (req, res) {
|
||||
return res.send({ 'token_count': num_tokens });
|
||||
}
|
||||
|
||||
if (model === 'gemma' || model === 'gemini') {
|
||||
num_tokens = await countSentencepieceArrayTokens(spp_gemma, req.body);
|
||||
return res.send({ 'token_count': num_tokens });
|
||||
}
|
||||
|
||||
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
|
||||
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
|
||||
const tokensPadding = 3;
|
||||
|
BIN
src/tokenizers/gemma.model
Normal file
BIN
src/tokenizers/gemma.model
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user