diff --git a/public/scripts/tokenizers.js b/public/scripts/tokenizers.js index c25075a0f..4d3eb1eb7 100644 --- a/public/scripts/tokenizers.js +++ b/public/scripts/tokenizers.js @@ -32,6 +32,7 @@ export const tokenizers = { COMMAND_R: 16, NEMO: 17, BEST_MATCH: 99, + MANUAL_SELECTION: 411, }; // A list of local tokenizers that support encoding and decoding token ids. @@ -536,7 +537,6 @@ export function getTokenizerModel() { return oai_settings.openai_model; } - const turbo0301Tokenizer = 'gpt-3.5-turbo-0301'; const turboTokenizer = 'gpt-3.5-turbo'; const gpt4Tokenizer = 'gpt-4'; const gpt4oTokenizer = 'gpt-4o'; @@ -562,9 +562,6 @@ export function getTokenizerModel() { if (oai_settings.windowai_model.includes('gpt-4')) { return gpt4Tokenizer; } - else if (oai_settings.windowai_model.includes('gpt-3.5-turbo-0301')) { - return turbo0301Tokenizer; - } else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) { return turboTokenizer; } @@ -610,9 +607,6 @@ export function getTokenizerModel() { else if (oai_settings.openrouter_model.includes('gpt-4')) { return gpt4Tokenizer; } - else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo-0301')) { - return turbo0301Tokenizer; - } else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) { return turboTokenizer; } @@ -1064,9 +1058,14 @@ function decodeTextTokensFromServer(endpoint, ids, resolve) { * Encodes a string to tokens using the server API. * @param {number} tokenizerType Tokenizer type. * @param {string} str String to tokenize. + * @param {string} overrideModel Tokenizer for {tokenizers.MANUAL_SELECTION}. * @returns {number[]} Array of token ids. */ -export function getTextTokens(tokenizerType, str) { +export function getTextTokens(tokenizerType, str, overrideModel = undefined) { + if (overrideModel && tokenizerType !== tokenizers.MANUAL_SELECTION) { + console.warn('overrideModel must be undefined unless using tokenizers.MANUAL_SELECTION', tokenizerType); + return []; + } switch (tokenizerType) { case tokenizers.API_CURRENT: return getTextTokens(currentRemoteTokenizerAPI(), str); @@ -1087,6 +1086,9 @@ export function getTextTokens(tokenizerType, str) { console.warn('This tokenizer type does not support encoding', tokenizerType); return []; } + if (tokenizerType === tokenizers.MANUAL_SELECTION) { + endpointUrl += `?model=${overrideModel}`; + } if (tokenizerType === tokenizers.OPENAI) { endpointUrl += `?model=${getTokenizerModel()}`; }