Improve tokenizer detection

2025-06-05 21:59:27 +02:00 · 2024-01-05 16:17:06 +02:00
parent a39b6b31f4
commit 86d715cc16
2 changed files with 19 additions and 8 deletions
--- a/public/scripts/tokenizers.js
+++ b/public/scripts/tokenizers.js
@@ -30,14 +30,13 @@ export const SENTENCEPIECE_TOKENIZERS = [
    tokenizers.LLAMA,
    tokenizers.MISTRAL,
    tokenizers.YI,
-    tokenizers.API_CURRENT,
-    tokenizers.API_KOBOLD,
-    tokenizers.API_TEXTGENERATIONWEBUI,
    // uncomment when NovelAI releases Kayra and Clio weights, lol
    //tokenizers.NERD,
    //tokenizers.NERD2,
 ];

+export const TEXTGEN_TOKENIZERS = [OOBA, TABBY, KOBOLDCPP, LLAMACPP];
+
 const TOKENIZER_URLS = {
    [tokenizers.GPT2]: {
        encode: '/api/tokenizers/gpt2/encode',
@@ -193,7 +192,7 @@ export function getTokenizerBestMatch(forApi) {
        // - Tokenizer haven't reported an error previously
        const hasTokenizerError = sessionStorage.getItem(TOKENIZER_WARNING_KEY);
        const isConnected = online_status !== 'no_connection';
-        const isTokenizerSupported = [OOBA, TABBY, KOBOLDCPP, LLAMACPP].includes(textgen_settings.type);
+        const isTokenizerSupported = TEXTGEN_TOKENIZERS.includes(textgen_settings.type);

        if (!hasTokenizerError && isConnected) {
            if (forApi === 'kobold' && kai_flags.can_use_tokenization) {