Add support for Yi tokenizer

This commit is contained in:
Cohee
2023-11-21 00:21:58 +02:00
parent 4222b2aa21
commit 9b75e49b54
4 changed files with 44 additions and 15 deletions

View File

@ -18,6 +18,7 @@ export const tokenizers = {
NERD2: 5,
API: 6,
MISTRAL: 7,
YI: 8,
BEST_MATCH: 99,
};
@ -148,6 +149,8 @@ function callTokenizer(type, str, padding) {
return countTokensRemote('/api/tokenize/nerdstash_v2', str, padding);
case tokenizers.MISTRAL:
return countTokensRemote('/api/tokenize/mistral', str, padding);
case tokenizers.YI:
return countTokensRemote('/api/tokenize/yi', str, padding);
case tokenizers.API:
return countTokensRemote('/tokenize_via_api', str, padding);
default:
@ -229,6 +232,7 @@ export function getTokenizerModel() {
const claudeTokenizer = 'claude';
const llamaTokenizer = 'llama';
const mistralTokenizer = 'mistral';
const yiTokenizer = 'yi';
// Assuming no one would use it for different models.. right?
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
@ -264,6 +268,9 @@ export function getTokenizerModel() {
else if (model?.architecture?.tokenizer === 'Mistral') {
return mistralTokenizer;
}
else if (model?.architecture?.tokenizer === 'Yi') {
return yiTokenizer;
}
else if (oai_settings.openrouter_model.includes('gpt-4')) {
return gpt4Tokenizer;
}
@ -485,6 +492,8 @@ export function getTextTokens(tokenizerType, str) {
return getTextTokensRemote('/api/tokenize/nerdstash_v2', str);
case tokenizers.MISTRAL:
return getTextTokensRemote('/api/tokenize/mistral', str);
case tokenizers.YI:
return getTextTokensRemote('/api/tokenize/yi', str);
case tokenizers.OPENAI:
const model = getTokenizerModel();
return getTextTokensRemote('/api/tokenize/openai-encode', str, model);
@ -513,6 +522,8 @@ export function decodeTextTokens(tokenizerType, ids) {
return decodeTextTokensRemote('/api/decode/nerdstash_v2', ids);
case tokenizers.MISTRAL:
return decodeTextTokensRemote('/api/decode/mistral', ids);
case tokenizers.YI:
return decodeTextTokensRemote('/api/decode/yi', ids);
default:
console.warn("Calling decodeTextTokens with unsupported tokenizer type", tokenizerType);
return '';