tokenizers.js: add MANUAL_SELECTION (& removes turbo-0301 lines)

This commit is contained in:
Succubyss
2024-12-24 11:50:21 -06:00
parent 09dd9762f7
commit c89804677a

View File

@ -32,6 +32,7 @@ export const tokenizers = {
COMMAND_R: 16,
NEMO: 17,
BEST_MATCH: 99,
MANUAL_SELECTION: 411,
};
// A list of local tokenizers that support encoding and decoding token ids.
@ -536,7 +537,6 @@ export function getTokenizerModel() {
return oai_settings.openai_model;
}
const turbo0301Tokenizer = 'gpt-3.5-turbo-0301';
const turboTokenizer = 'gpt-3.5-turbo';
const gpt4Tokenizer = 'gpt-4';
const gpt4oTokenizer = 'gpt-4o';
@ -562,9 +562,6 @@ export function getTokenizerModel() {
if (oai_settings.windowai_model.includes('gpt-4')) {
return gpt4Tokenizer;
}
else if (oai_settings.windowai_model.includes('gpt-3.5-turbo-0301')) {
return turbo0301Tokenizer;
}
else if (oai_settings.windowai_model.includes('gpt-3.5-turbo')) {
return turboTokenizer;
}
@ -610,9 +607,6 @@ export function getTokenizerModel() {
else if (oai_settings.openrouter_model.includes('gpt-4')) {
return gpt4Tokenizer;
}
else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo-0301')) {
return turbo0301Tokenizer;
}
else if (oai_settings.openrouter_model.includes('gpt-3.5-turbo')) {
return turboTokenizer;
}
@ -1064,9 +1058,14 @@ function decodeTextTokensFromServer(endpoint, ids, resolve) {
* Encodes a string to tokens using the server API.
* @param {number} tokenizerType Tokenizer type.
* @param {string} str String to tokenize.
* @param {string} overrideModel Tokenizer for {tokenizers.MANUAL_SELECTION}.
* @returns {number[]} Array of token ids.
*/
export function getTextTokens(tokenizerType, str) {
export function getTextTokens(tokenizerType, str, overrideModel = undefined) {
if (overrideModel && tokenizerType !== tokenizers.MANUAL_SELECTION) {
console.warn('overrideModel must be undefined unless using tokenizers.MANUAL_SELECTION', tokenizerType);
return [];
}
switch (tokenizerType) {
case tokenizers.API_CURRENT:
return getTextTokens(currentRemoteTokenizerAPI(), str);
@ -1087,6 +1086,9 @@ export function getTextTokens(tokenizerType, str) {
console.warn('This tokenizer type does not support encoding', tokenizerType);
return [];
}
if (tokenizerType === tokenizers.MANUAL_SELECTION) {
endpointUrl += `?model=${overrideModel}`;
}
if (tokenizerType === tokenizers.OPENAI) {
endpointUrl += `?model=${getTokenizerModel()}`;
}