Add llama2 tokenizer for OpenRouter models
This commit is contained in:
parent
e1e472bf79
commit
fedc3b887f
|
@ -153,7 +153,7 @@ const textCompletionModels = [
|
|||
];
|
||||
|
||||
let biasCache = undefined;
|
||||
let model_list = [];
|
||||
export let model_list = [];
|
||||
|
||||
export const chat_completion_sources = {
|
||||
OPENAI: 'openai',
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
import { characters, main_api, nai_settings, online_status, this_chid } from "../script.js";
|
||||
import { power_user, registerDebugFunction } from "./power-user.js";
|
||||
import { chat_completion_sources, oai_settings } from "./openai.js";
|
||||
import { chat_completion_sources, model_list, oai_settings } from "./openai.js";
|
||||
import { groups, selected_group } from "./group-chats.js";
|
||||
import { getStringHash } from "./utils.js";
|
||||
import { kai_flags } from "./kai-settings.js";
|
||||
|
@ -187,6 +187,7 @@ export function getTokenizerModel() {
|
|||
const gpt4Tokenizer = 'gpt-4';
|
||||
const gpt2Tokenizer = 'gpt2';
|
||||
const claudeTokenizer = 'claude';
|
||||
const llamaTokenizer = 'llama';
|
||||
|
||||
// Assuming no one would use it for different models.. right?
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.SCALE) {
|
||||
|
@ -214,7 +215,12 @@ export function getTokenizerModel() {
|
|||
|
||||
// And for OpenRouter (if not a site model, then it's impossible to determine the tokenizer)
|
||||
if (oai_settings.chat_completion_source == chat_completion_sources.OPENROUTER && oai_settings.openrouter_model) {
|
||||
if (oai_settings.openrouter_model.includes('gpt-4')) {
|
||||
const model = model_list.find(x => x.id === oai_settings.openrouter_model);
|
||||
|
||||
if (model?.architecture?.tokenizer === 'Llama2') {
|
||||
return llamaTokenizer;
|
||||
}
|
||||
else if (oai_settings.openrouter_model.includes('gpt-4')) {
|
||||
return gpt4Tokenizer;
|
||||
}
|
||||
else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo-0301')) {
|
||||
|
|
|
@ -87,6 +87,10 @@ function getTokenizerModel(requestModel) {
|
|||
return 'claude';
|
||||
}
|
||||
|
||||
if (requestModel.includes('llama')) {
|
||||
return 'llama';
|
||||
}
|
||||
|
||||
if (requestModel.includes('gpt-4-32k')) {
|
||||
return 'gpt-4-32k';
|
||||
}
|
||||
|
@ -288,7 +292,8 @@ function registerEndpoints(app, jsonParser) {
|
|||
app.post("/api/decode/nerdstash_v2", jsonParser, createSentencepieceDecodingHandler(() => spp_nerd_v2));
|
||||
app.post("/api/decode/gpt2", jsonParser, createTiktokenDecodingHandler('gpt2'));
|
||||
|
||||
app.post("/api/tokenize/openai", jsonParser, function (req, res) {
|
||||
app.post("/api/tokenize/openai", jsonParser, async function (req, res) {
|
||||
try {
|
||||
if (!req.body) return res.sendStatus(400);
|
||||
|
||||
let num_tokens = 0;
|
||||
|
@ -300,6 +305,14 @@ function registerEndpoints(app, jsonParser) {
|
|||
return res.send({ "token_count": num_tokens });
|
||||
}
|
||||
|
||||
if (model == 'llama') {
|
||||
const jsonBody = req.body.flatMap(x => Object.values(x)).join('\n\n');
|
||||
const llamaResult = await countSentencepieceTokens(spp_llama, jsonBody);
|
||||
console.log('jsonBody', jsonBody, 'llamaResult', llamaResult);
|
||||
num_tokens = llamaResult.count;
|
||||
return res.send({ "token_count": num_tokens });
|
||||
}
|
||||
|
||||
const tokensPerName = queryModel.includes('gpt-3.5-turbo-0301') ? -1 : 1;
|
||||
const tokensPerMessage = queryModel.includes('gpt-3.5-turbo-0301') ? 4 : 3;
|
||||
const tokensPadding = 3;
|
||||
|
@ -331,6 +344,12 @@ function registerEndpoints(app, jsonParser) {
|
|||
//tokenizer.free();
|
||||
|
||||
res.send({ "token_count": num_tokens });
|
||||
} catch (error) {
|
||||
console.error('An error counting tokens, using fallback estimation method', error);
|
||||
const jsonBody = JSON.stringify(req.body);
|
||||
const num_tokens = Math.ceil(jsonBody.length / CHARS_PER_TOKEN);
|
||||
res.send({ "token_count": num_tokens });
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue