From 2997522c5251328e94b255a91ebb1b93a8755092 Mon Sep 17 00:00:00 2001
From: Tony Ribeiro <tonyribeiro.research.aca@gmail.com>
Date: Fri, 13 Oct 2023 01:34:00 +0200
Subject: [PATCH] Removed speech recognition extension (now third party
 extension download from assets menu).

---
 .../extensions/speech-recognition/browser.js  | 233 ---------
 .../extensions/speech-recognition/index.js    | 452 ------------------
 .../speech-recognition/manifest.json          |  14 -
 .../speech-recognition/streaming.js           | 109 -----
 .../extensions/speech-recognition/style.css   |   3 -
 .../extensions/speech-recognition/vosk.js     |  65 ---
 .../extensions/speech-recognition/whisper.js  |  67 ---
 7 files changed, 943 deletions(-)
 delete mode 100644 public/scripts/extensions/speech-recognition/browser.js
 delete mode 100644 public/scripts/extensions/speech-recognition/index.js
 delete mode 100644 public/scripts/extensions/speech-recognition/manifest.json
 delete mode 100644 public/scripts/extensions/speech-recognition/streaming.js
 delete mode 100644 public/scripts/extensions/speech-recognition/style.css
 delete mode 100644 public/scripts/extensions/speech-recognition/vosk.js
 delete mode 100644 public/scripts/extensions/speech-recognition/whisper.js
diff --git a/public/scripts/extensions/speech-recognition/browser.js b/public/scripts/extensions/speech-recognition/browser.js
deleted file mode 100644
index f51019894..000000000
--- a/public/scripts/extensions/speech-recognition/browser.js
+++ /dev/null
@@ -1,233 +0,0 @@
-// Borrowed from Agnai (AGPLv3)
-// https://github.com/agnaistic/agnai/blob/dev/web/pages/Chat/components/SpeechRecognitionRecorder.tsx
-// First version by Cohee#1207
-// Adapted by Tony-sama
-
-export { BrowserSttProvider }
-
-const DEBUG_PREFIX = "<Speech Recognition module (Browser)> "
-
-class BrowserSttProvider {
-    //########//
-    // Config //
-    //########//
-
-    settings = {
-        language: ""
-    }
-
-    defaultSettings = {
-        language: "en-US",
-    }
-
-    processTranscriptFunction = null;
-
-    get settingsHtml() {
-        let html = ' \
-        <span>Language</span> </br> \
-        <select id="speech_recognition_browser_provider_language"> \
-            <option value="ar-SA">ar-SA: Arabic (Saudi Arabia)</option> \
-            <option value="bn-BD">bn-BD: Bangla (Bangladesh)</option> \
-            <option value="bn-IN">bn-IN: Bangla (India)</option> \
-            <option value="cs-CZ">cs-CZ: Czech (Czech Republic)</option> \
-            <option value="da-DK">da-DK: Danish (Denmark)</option> \
-            <option value="de-AT">de-AT: German (Austria)</option> \
-            <option value="de-CH">de-CH: German (Switzerland)</option> \
-            <option value="de-DE">de-DE: German (Germany)</option> \
-            <option value="el-GR">el-GR: Greek (Greece)</option> \
-            <option value="en-AU">en-AU: English (Australia)</option> \
-            <option value="en-CA">en-CA: English (Canada)</option> \
-            <option value="en-GB">en-GB: English (United Kingdom)</option> \
-            <option value="en-IE">en-IE: English (Ireland)</option> \
-            <option value="en-IN">en-IN: English (India)</option> \
-            <option value="en-NZ">en-NZ: English (New Zealand)</option> \
-            <option value="en-US">en-US: English (United States)</option> \
-            <option value="en-ZA">en-ZA: English (South Africa)</option> \
-            <option value="es-AR">es-AR: Spanish (Argentina)</option> \
-            <option value="es-CL">es-CL: Spanish (Chile)</option> \
-            <option value="es-CO">es-CO: Spanish (Columbia)</option> \
-            <option value="es-ES">es-ES: Spanish (Spain)</option> \
-            <option value="es-MX">es-MX: Spanish (Mexico)</option> \
-            <option value="es-US">es-US: Spanish (United States)</option> \
-            <option value="fi-FI">fi-FI: Finnish (Finland)</option> \
-            <option value="fr-BE">fr-BE: French (Belgium)</option> \
-            <option value="fr-CA">fr-CA: French (Canada)</option> \
-            <option value="fr-CH">fr-CH: French (Switzerland)</option> \
-            <option value="fr-FR">fr-FR: French (France)</option> \
-            <option value="he-IL">he-IL: Hebrew (Israel)</option> \
-            <option value="hi-IN">hi-IN: Hindi (India)</option> \
-            <option value="hu-HU">hu-HU: Hungarian (Hungary)</option> \
-            <option value="id-ID">id-ID: Indonesian (Indonesia)</option> \
-            <option value="it-CH">it-CH: Italian (Switzerland)</option> \
-            <option value="it-IT">it-IT: Italian (Italy)</option> \
-            <option value="ja-JP">ja-JP: Japanese (Japan)</option> \
-            <option value="ko-KR">ko-KR: Korean (Republic of Korea)</option> \
-            <option value="nl-BE">nl-BE: Dutch (Belgium)</option> \
-            <option value="nl-NL">nl-NL: Dutch (The Netherlands)</option> \
-            <option value="no-NO">no-NO: Norwegian (Norway)</option> \
-            <option value="pl-PL">pl-PL: Polish (Poland)</option> \
-            <option value="pt-BR">pt-BR: Portugese (Brazil)</option> \
-            <option value="pt-PT">pt-PT: Portugese (Portugal)</option> \
-            <option value="ro-RO">ro-RO: Romanian (Romania)</option> \
-            <option value="ru-RU">ru-RU: Russian (Russian Federation)</option> \
-            <option value="sk-SK">sk-SK: Slovak (Slovakia)</option> \
-            <option value="sv-SE">sv-SE: Swedish (Sweden)</option> \
-            <option value="ta-IN">ta-IN: Tamil (India)</option> \
-            <option value="ta-LK">ta-LK: Tamil (Sri Lanka)</option> \
-            <option value="th-TH">th-TH: Thai (Thailand)</option> \
-            <option value="tr-TR">tr-TR: Turkish (Turkey)</option> \
-            <option value="zh-CN">zh-CN: Chinese (China)</option> \
-            <option value="zh-HK">zh-HK: Chinese (Hond Kong)</option> \
-            <option value="zh-TW">zh-TW: Chinese (Taiwan)</option> \
-        </select> \
-        '
-        return html
-    }
-
-    onSettingsChange() {
-        // Used when provider settings are updated from UI
-        this.settings.language = $("#speech_recognition_browser_provider_language").val();
-        console.debug(DEBUG_PREFIX+"Change language to",this.settings.language);
-        this.loadSettings(this.settings);
-    }
-
-    static capitalizeInterim(interimTranscript) {
-        let capitalizeIndex = -1;
-        if (interimTranscript.length > 2 && interimTranscript[0] === ' ') capitalizeIndex = 1;
-        else if (interimTranscript.length > 1) capitalizeIndex = 0;
-        if (capitalizeIndex > -1) {
-            const spacing = capitalizeIndex > 0 ? ' '.repeat(capitalizeIndex - 1) : '';
-            const capitalized = interimTranscript[capitalizeIndex].toLocaleUpperCase();
-            const rest = interimTranscript.substring(capitalizeIndex + 1);
-            interimTranscript = spacing + capitalized + rest;
-        }
-        return interimTranscript;
-    }
-    
-    static composeValues(previous, interim) {
-        let spacing = '';
-        if (previous.endsWith('.')) spacing = ' ';
-        return previous + spacing + interim;
-    }
-
-    loadSettings(settings) {
-        const processTranscript = this.processTranscriptFunction;
-        
-        // Populate Provider UI given input settings
-        if (Object.keys(settings).length == 0) {
-            console.debug(DEBUG_PREFIX+"Using default browser STT settings")
-        }
-
-        // Initialise as defaultSettings
-        this.settings = this.defaultSettings;
-
-        for (const key in settings){
-            if (key in this.settings){
-                this.settings[key] = settings[key]
-            } else {
-                throw `Invalid setting passed to Speech recogniton extension (browser): ${key}`
-            }
-        }
-
-        $("#speech_recognition_browser_provider_language").val(this.settings.language);
-
-        const speechRecognitionSettings = $.extend({
-            grammar: '' // Custom grammar
-        }, options);
-
-        const speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
-        const speechRecognitionList = window.SpeechGrammarList || window.webkitSpeechGrammarList;
-
-        if (!speechRecognition) {
-            console.warn(DEBUG_PREFIX+'Speech recognition is not supported in this browser.');
-            $("#microphone_button").hide();
-            toastr.error("Speech recognition is not supported in this browser, use another browser or another provider of SillyTavern-extras Speech recognition extension.", "Speech recognition activation Failed (Browser)", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            return;
-        }
-
-        const recognition = new speechRecognition();
-
-        if (speechRecognitionSettings.grammar && speechRecognitionList) {
-            speechRecognitionList.addFromString(speechRecognitionSettings.grammar, 1);
-            recognition.grammars = speechRecognitionList;
-        }
-
-        recognition.continuous = true;
-        recognition.interimResults = true;
-        recognition.lang = this.settings.language;
-
-        const textarea = $('#send_textarea');
-        const button = $('#microphone_button');
-
-        let listening = false;
-        button.off('click').on("click", function () {
-            if (listening) {
-                recognition.stop();
-            } else {
-                recognition.start();
-            }
-            listening = !listening;
-        });
-
-        let initialText = '';
-
-        recognition.onresult = function (speechEvent) {
-            let finalTranscript = '';
-            let interimTranscript = ''
-
-            for (let i = speechEvent.resultIndex; i < speechEvent.results.length; ++i) {
-            const transcript = speechEvent.results[i][0].transcript;
-
-            if (speechEvent.results[i].isFinal) {
-                let interim = BrowserSttProvider.capitalizeInterim(transcript);
-                if (interim != '') {
-                let final = finalTranscript;
-                final = BrowserSttProvider.composeValues(final, interim);
-                if (final.slice(-1) != '.' & final.slice(-1) != '?') final += '.';
-                finalTranscript = final;
-                recognition.abort();
-                listening = false;
-                }
-                interimTranscript = ' ';
-            } else {
-                interimTranscript += transcript;
-            }
-            }
-
-            interimTranscript = BrowserSttProvider.capitalizeInterim(interimTranscript);
-                
-            textarea.val(initialText + finalTranscript + interimTranscript);
-        };
-
-        recognition.onerror = function (event) {
-            console.error('Error occurred in recognition:', event.error);
-            //if ($('#speech_recognition_debug').is(':checked'))
-            //    toastr.error('Error occurred in recognition:'+ event.error, 'STT Generation error (Browser)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-        };
-
-        recognition.onend = function () {
-            listening = false;
-            button.toggleClass('fa-microphone fa-microphone-slash');
-            const newText = textarea.val().substring(initialText.length);
-            textarea.val(textarea.val().substring(0,initialText.length));
-            processTranscript(newText);
-
-        };
-
-        recognition.onstart = function () {
-            initialText = textarea.val();
-            button.toggleClass('fa-microphone fa-microphone-slash');
-            
-            if ($("#speech_recognition_message_mode").val() == "replace") {
-                textarea.val("");
-                initialText = ""
-            }
-        };
-        
-        $("#microphone_button").show();
-        
-        console.debug(DEBUG_PREFIX+"Browser STT settings loaded")
-    }
-
-
-}
diff --git a/public/scripts/extensions/speech-recognition/index.js b/public/scripts/extensions/speech-recognition/index.js
deleted file mode 100644
index 3ac3df229..000000000
--- a/public/scripts/extensions/speech-recognition/index.js
+++ /dev/null
@@ -1,452 +0,0 @@
-/*
-TODO:
- - try pseudo streaming audio by just sending chunk every X seconds and asking VOSK if it is full text.
-*/
-
-import { saveSettingsDebounced } from "../../../script.js";
-import { getContext, extension_settings, ModuleWorkerWrapper } from "../../extensions.js";
-import { VoskSttProvider } from './vosk.js'
-import { WhisperSttProvider } from './whisper.js'
-import { BrowserSttProvider } from './browser.js'
-import { StreamingSttProvider } from './streaming.js'
-import { getMessageTimeStamp } from "../../RossAscends-mods.js";
-export { MODULE_NAME };
-
-const MODULE_NAME = 'Speech Recognition';
-const DEBUG_PREFIX = "<Speech Recognition module> "
-const UPDATE_INTERVAL = 100;
-
-let inApiCall = false;
-
-let sttProviders = {
-    None: null,
-    Browser: BrowserSttProvider,
-    Whisper: WhisperSttProvider,
-    Vosk: VoskSttProvider,
-    Streaming: StreamingSttProvider,
-}
-
-let sttProvider = null
-let sttProviderName = "None"
-
-let audioRecording = false
-const constraints = { audio: { sampleSize: 16, channelCount: 1, sampleRate: 16000 } };
-let audioChunks = [];
-
-async function moduleWorker() {
-    if (sttProviderName != "Streaming") {
-        return;
-    }
-
-    // API is busy
-    if (inApiCall) {
-        return;
-    }
-
-    try {
-        inApiCall = true;
-        const userMessageOriginal =  await sttProvider.getUserMessage();
-        let userMessageFormatted = userMessageOriginal.trim();
-
-        if (userMessageFormatted.length > 0)
-        {
-            console.debug(DEBUG_PREFIX+"recorded transcript: \""+userMessageFormatted+"\"");
-
-            let userMessageLower = userMessageFormatted.toLowerCase();
-            // remove punctuation
-            let userMessageRaw = userMessageLower.replace(/[^\w\s\']|_/g, "").replace(/\s+/g, " ");
-
-            console.debug(DEBUG_PREFIX+"raw transcript:",userMessageRaw);
-
-            // Detect trigger words
-            let messageStart = -1;
-
-            if (extension_settings.speech_recognition.Streaming.triggerWordsEnabled) {
-
-                for (const triggerWord of extension_settings.speech_recognition.Streaming.triggerWords) {
-                    const triggerPos = userMessageRaw.indexOf(triggerWord.toLowerCase());
-
-                    // Trigger word not found or not starting message and just a substring
-                    if (triggerPos == -1){ // | (triggerPos > 0 & userMessageFormatted[triggerPos-1] != " ")) {
-                        console.debug(DEBUG_PREFIX+"trigger word not found: ", triggerWord);
-                    }
-                    else {
-                        console.debug(DEBUG_PREFIX+"Found trigger word: ", triggerWord, " at index ", triggerPos);
-                        if (triggerPos < messageStart || messageStart == -1) { // & (triggerPos + triggerWord.length) < userMessageFormatted.length)) {
-                            messageStart = triggerPos; // + triggerWord.length + 1;
-
-                            if (!extension_settings.speech_recognition.Streaming.triggerWordsIncluded)
-                                messageStart = triggerPos + triggerWord.length + 1;
-                        }
-                    }
-                }
-            } else {
-                messageStart = 0;
-            }
-
-            if (messageStart == -1) {
-                console.debug(DEBUG_PREFIX+"message ignored, no trigger word preceding a message. Voice transcript: \""+ userMessageOriginal +"\"");
-                if (extension_settings.speech_recognition.Streaming.debug) {
-                    toastr.info(
-                        "No trigger word preceding a message. Voice transcript: \""+ userMessageOriginal +"\"",
-                        DEBUG_PREFIX+"message ignored.",
-                        { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true },
-                    );
-                }
-            }
-            else{
-                userMessageFormatted = userMessageFormatted.substring(messageStart);
-                // Trim non alphanumeric character from the start
-                messageStart = 0;
-                for(const i of userMessageFormatted) {
-                    if(/^[a-z]$/i.test(i)) {
-                        break;
-                    }
-                    messageStart += 1;
-                }
-                userMessageFormatted = userMessageFormatted.substring(messageStart);
-                userMessageFormatted = userMessageFormatted.charAt(0).toUpperCase() + userMessageFormatted.substring(1);
-                processTranscript(userMessageFormatted);
-            }
-        }
-        else
-        {
-            console.debug(DEBUG_PREFIX+"Received empty transcript, ignored");
-        }
-    }
-    catch (error) {
-        console.debug(error);
-    }
-    finally {
-        inApiCall = false;
-    }
-}
-
-async function processTranscript(transcript) {
-    try {
-        const transcriptOriginal =  transcript;
-        let transcriptFormatted = transcriptOriginal.trim();
-
-        if (transcriptFormatted.length > 0)
-        {
-            console.debug(DEBUG_PREFIX+"recorded transcript: \""+transcriptFormatted+"\"");
-            const messageMode = extension_settings.speech_recognition.messageMode;
-            console.debug(DEBUG_PREFIX+"mode: "+messageMode);
-
-            let transcriptLower = transcriptFormatted.toLowerCase()
-            // remove punctuation
-            let transcriptRaw = transcriptLower.replace(/[^\w\s\']|_/g, "").replace(/\s+/g, " ");
-
-            // Check message mapping
-            if (extension_settings.speech_recognition.messageMappingEnabled) {
-                console.debug(DEBUG_PREFIX+"Start searching message mapping into:",transcriptRaw)
-                for (const key in extension_settings.speech_recognition.messageMapping) {
-                    console.debug(DEBUG_PREFIX+"message mapping searching: ", key,"=>",extension_settings.speech_recognition.messageMapping[key]);
-                    if (transcriptRaw.includes(key)) {
-                        var message = extension_settings.speech_recognition.messageMapping[key];
-                        console.debug(DEBUG_PREFIX+"message mapping found: ", key,"=>",extension_settings.speech_recognition.messageMapping[key]);
-                        $("#send_textarea").val(message);
-
-                        if (messageMode == "auto_send") await getContext().generate();
-                        return;
-                    }
-                }
-            }
-
-            console.debug(DEBUG_PREFIX+"no message mapping found, processing transcript as normal message");
-
-            switch (messageMode) {
-                case "auto_send":
-                    $('#send_textarea').val("") // clear message area to avoid double message
-
-                    console.debug(DEBUG_PREFIX+"Sending message")
-                    const context = getContext();
-                    const messageText = transcriptFormatted;
-                    const message = {
-                        name: context.name1,
-                        is_user: true,
-                        send_date: getMessageTimeStamp(),
-                        mes: messageText,
-                    };
-                    context.chat.push(message);
-                    context.addOneMessage(message);
-
-                    await context.generate();
-
-                    $('#debug_output').text("<SST-module DEBUG>: message sent: \""+ transcriptFormatted +"\"");
-                    break;
-
-                case "replace":
-                    console.debug(DEBUG_PREFIX+"Replacing message")
-                    $('#send_textarea').val(transcriptFormatted);
-                    break;
-
-                case "append":
-                    console.debug(DEBUG_PREFIX+"Appending message")
-                    $('#send_textarea').val($('#send_textarea').val()+" "+transcriptFormatted);
-                    break;
-
-                default:
-                    console.debug(DEBUG_PREFIX+"Not supported stt message mode: "+messageMode)
-
-            }
-        }
-        else
-        {
-            console.debug(DEBUG_PREFIX+"Empty transcript, do nothing");
-        }
-    }
-    catch (error) {
-        console.debug(error);
-    }
-}
-
-function loadNavigatorAudioRecording() {
-    if (navigator.mediaDevices.getUserMedia) {
-        console.debug(DEBUG_PREFIX+' getUserMedia supported by browser.');
-
-        let onSuccess = function(stream) {
-          const mediaRecorder = new MediaRecorder(stream);
-
-          $("#microphone_button").off('click').on("click", function() {
-            if (!audioRecording) {
-                mediaRecorder.start();
-                console.debug(mediaRecorder.state);
-                console.debug("recorder started");
-                audioRecording = true;
-                $("#microphone_button").toggleClass('fa-microphone fa-microphone-slash');
-            }
-            else {
-                mediaRecorder.stop();
-                console.debug(mediaRecorder.state);
-                console.debug("recorder stopped");
-                audioRecording = false;
-                $("#microphone_button").toggleClass('fa-microphone fa-microphone-slash');
-            }
-          });
-
-          mediaRecorder.onstop = async function() {
-            console.debug(DEBUG_PREFIX+"data available after MediaRecorder.stop() called: ", audioChunks.length, " chunks");
-            const audioBlob = new Blob(audioChunks, { type: "audio/wav; codecs=0" });
-            audioChunks = [];
-
-            const transcript = await sttProvider.processAudio(audioBlob);
-
-            // TODO: lock and release recording while processing?
-            console.debug(DEBUG_PREFIX+"received transcript:", transcript);
-            processTranscript(transcript);
-          }
-
-          mediaRecorder.ondataavailable = function(e) {
-            audioChunks.push(e.data);
-          }
-        }
-
-        let onError = function(err) {
-          console.debug(DEBUG_PREFIX+"The following error occured: " + err);
-        }
-
-        navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
-
-      } else {
-         console.debug(DEBUG_PREFIX+"getUserMedia not supported on your browser!");
-         toastr.error("getUserMedia not supported", DEBUG_PREFIX+"not supported for your browser.", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-      }
-}
-
-//##############//
-// STT Provider //
-//##############//
-
-function loadSttProvider(provider) {
-    //Clear the current config and add new config
-    $("#speech_recognition_provider_settings").html("");
-
-    // Init provider references
-    extension_settings.speech_recognition.currentProvider = provider;
-    sttProviderName = provider;
-
-    if (!(sttProviderName in extension_settings.speech_recognition)) {
-        console.warn(`Provider ${sttProviderName} not in Extension Settings, initiatilizing provider in settings`);
-        extension_settings.speech_recognition[sttProviderName] = {};
-    }
-
-    $('#speech_recognition_provider').val(sttProviderName);
-
-    if (sttProviderName == "None") {
-        $("#microphone_button").hide();
-        $("#speech_recognition_message_mode_div").hide();
-        $("#speech_recognition_message_mapping_div").hide();
-        return;
-    }
-
-    $("#speech_recognition_message_mode_div").show();
-    $("#speech_recognition_message_mapping_div").show();
-
-    sttProvider = new sttProviders[sttProviderName]
-
-    // Init provider settings
-    $('#speech_recognition_provider_settings').append(sttProvider.settingsHtml);
-
-    // Use microphone button as push to talk
-    if (sttProviderName == "Browser") {
-        sttProvider.processTranscriptFunction = processTranscript;
-        sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
-        $("#microphone_button").show();
-    }
-
-    if (sttProviderName == "Vosk" | sttProviderName == "Whisper") {
-        sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
-        loadNavigatorAudioRecording();
-        $("#microphone_button").show();
-    }
-
-    if (sttProviderName == "Streaming") {
-        sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
-        $("#microphone_button").off('click');
-        $("#microphone_button").hide();
-    }
-
-}
-
-function onSttProviderChange() {
-    const sttProviderSelection = $('#speech_recognition_provider').val();
-    loadSttProvider(sttProviderSelection);
-    saveSettingsDebounced();
-}
-
-function onSttProviderSettingsInput() {
-    sttProvider.onSettingsChange();
-
-    // Persist changes to SillyTavern stt extension settings
-    extension_settings.speech_recognition[sttProviderName] = sttProvider.settings;
-    saveSettingsDebounced();
-    console.info(`Saved settings ${sttProviderName} ${JSON.stringify(sttProvider.settings)}`);
-}
-
-//#############################//
-//  Extension UI and Settings  //
-//#############################//
-
-const defaultSettings = {
-    currentProvider: "None",
-    messageMode: "append",
-    messageMappingText: "",
-    messageMapping: [],
-    messageMappingEnabled: false,
-}
-
-function loadSettings() {
-    if (Object.keys(extension_settings.speech_recognition).length === 0) {
-        Object.assign(extension_settings.speech_recognition, defaultSettings)
-    }
-    $('#speech_recognition_enabled').prop('checked',extension_settings.speech_recognition.enabled);
-    $('#speech_recognition_message_mode').val(extension_settings.speech_recognition.messageMode);
-
-    if (extension_settings.speech_recognition.messageMappingText.length > 0) {
-        $('#speech_recognition_message_mapping').val(extension_settings.speech_recognition.messageMappingText);
-    }
-
-    $('#speech_recognition_message_mapping_enabled').prop('checked',extension_settings.speech_recognition.messageMappingEnabled);
-}
-
-async function onMessageModeChange() {
-    extension_settings.speech_recognition.messageMode = $('#speech_recognition_message_mode').val();
-
-    if(sttProviderName != "Browser" & extension_settings.speech_recognition.messageMode == "auto_send") {
-        $("#speech_recognition_wait_response_div").show()
-    }
-    else {
-        $("#speech_recognition_wait_response_div").hide()
-    }
-
-    saveSettingsDebounced();
-}
-
-async function onMessageMappingChange() {
-    let array = $('#speech_recognition_message_mapping').val().split(",");
-    array = array.map(element => {return element.trim();});
-    array = array.filter((str) => str !== '');
-    extension_settings.speech_recognition.messageMapping = {};
-    for (const text of array) {
-        if (text.includes("=")) {
-            const pair = text.toLowerCase().split("=")
-            extension_settings.speech_recognition.messageMapping[pair[0].trim()] = pair[1].trim()
-            console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.speech_recognition.messageMapping[pair[0]]);
-        }
-        else {
-            console.debug(DEBUG_PREFIX+"Wrong syntax for message mapping, no '=' found in:", text);
-        }
-    }
-
-    $("#speech_recognition_message_mapping_status").text("Message mapping updated to: "+JSON.stringify(extension_settings.speech_recognition.messageMapping))
-    console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.speech_recognition.messageMapping);
-    extension_settings.speech_recognition.messageMappingText = $('#speech_recognition_message_mapping').val()
-    saveSettingsDebounced();
-}
-
-async function onMessageMappingEnabledClick() {
-    extension_settings.speech_recognition.messageMappingEnabled = $('#speech_recognition_message_mapping_enabled').is(':checked');
-    saveSettingsDebounced()
-}
-
-$(document).ready(function () {
-    function addExtensionControls() {
-        const settingsHtml = `
-        <div id="speech_recognition_settings">
-            <div class="inline-drawer">
-                <div class="inline-drawer-toggle inline-drawer-header">
-                    <b>Speech Recognition</b>
-                    <div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
-                </div>
-                <div class="inline-drawer-content">
-                    <div>
-                        <span>Select Speech-to-text Provider</span> </br>
-                        <select id="speech_recognition_provider">
-                        </select>
-                    </div>
-                    <div id="speech_recognition_message_mode_div">
-                        <span>Message Mode</span> </br>
-                        <select id="speech_recognition_message_mode">
-                            <option value="append">Append</option>
-                            <option value="replace">Replace</option>
-                            <option value="auto_send">Auto send</option>
-                        </select>
-                    </div>
-                    <div id="speech_recognition_message_mapping_div">
-                        <span>Message Mapping</span>
-                        <textarea id="speech_recognition_message_mapping" class="text_pole textarea_compact" type="text" rows="4" placeholder="Enter comma separated phrases mapping, example:\ncommand delete = /del 2,\nslash delete = /del 2,\nsystem roll = /roll 2d6,\nhey continue = /continue"></textarea>
-                        <span id="speech_recognition_message_mapping_status"></span>
-                        <label class="checkbox_label" for="speech_recognition_message_mapping_enabled">
-                            <input type="checkbox" id="speech_recognition_message_mapping_enabled" name="speech_recognition_message_mapping_enabled">
-                            <small>Enable messages mapping</small>
-                        </label>
-                    </div>
-                    <form id="speech_recognition_provider_settings" class="inline-drawer-content">
-                    </form>
-                </div>
-            </div>
-        </div>
-        `;
-        $('#extensions_settings').append(settingsHtml);
-        $('#speech_recognition_provider_settings').on('input', onSttProviderSettingsInput);
-        for (const provider in sttProviders) {
-            $('#speech_recognition_provider').append($("<option />").val(provider).text(provider));
-            console.debug(DEBUG_PREFIX+"added option "+provider);
-        }
-        $('#speech_recognition_provider').on('change', onSttProviderChange);
-        $('#speech_recognition_message_mode').on('change', onMessageModeChange);
-        $('#speech_recognition_message_mapping').on('change', onMessageMappingChange);
-        $('#speech_recognition_message_mapping_enabled').on('click', onMessageMappingEnabledClick);
-
-        const $button = $('<div id="microphone_button" class="fa-solid fa-microphone speech-toggle" title="Click to speak"></div>');
-        $('#send_but_sheld').prepend($button);
-
-    }
-    addExtensionControls(); // No init dependencies
-    loadSettings(); // Depends on Extension Controls and loadTtsProvider
-    loadSttProvider(extension_settings.speech_recognition.currentProvider); // No dependencies
-    const wrapper = new ModuleWorkerWrapper(moduleWorker);
-    setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
-    moduleWorker();
-})
diff --git a/public/scripts/extensions/speech-recognition/manifest.json b/public/scripts/extensions/speech-recognition/manifest.json
deleted file mode 100644
index 53f0943a5..000000000
--- a/public/scripts/extensions/speech-recognition/manifest.json
+++ /dev/null
@@ -1,14 +0,0 @@
-{
-    "display_name": "Speech Recognition",
-    "loading_order": 13,
-    "requires": [],
-    "optional": [
-        "vosk-speech-recognition",
-        "whisper-speech-recognition"
-    ],
-    "js": "index.js",
-    "css": "style.css",
-    "author": "Cohee#1207 and Keij#6799",
-    "version": "1.1.0",
-    "homePage": "https://github.com/SillyTavern/SillyTavern"
-}
diff --git a/public/scripts/extensions/speech-recognition/streaming.js b/public/scripts/extensions/speech-recognition/streaming.js
deleted file mode 100644
index 2dc5d24c9..000000000
--- a/public/scripts/extensions/speech-recognition/streaming.js
+++ /dev/null
@@ -1,109 +0,0 @@
-import { getApiUrl, doExtrasFetch, modules } from "../../extensions.js";
-export { StreamingSttProvider }
-
-const DEBUG_PREFIX = "<Speech Recognition module (streaming)> "
-
-class StreamingSttProvider {
-    //########//
-    // Config //
-    //########//
-
-    settings
-
-    defaultSettings = {
-        triggerWordsText: "",
-        triggerWords : [],
-        triggerWordsEnabled : false,
-        triggerWordsIncluded: false,
-        debug : false,
-    }
-
-    get settingsHtml() {
-        let html = '\
-        <div id="speech_recognition_streaming_trigger_words_div">\
-        <span>Trigger words</span>\
-        <textarea id="speech_recognition_streaming_trigger_words" class="text_pole textarea_compact" type="text" rows="4" placeholder="Enter comma separated words that triggers new message, example:\nhey, hey aqua, record, listen"></textarea>\
-        <label class="checkbox_label" for="speech_recognition_streaming_trigger_words_enabled">\
-            <input type="checkbox" id="speech_recognition_streaming_trigger_words_enabled" name="speech_recognition_trigger_words_enabled">\
-            <small>Enable trigger words</small>\
-        </label>\
-        <label class="checkbox_label" for="speech_recognition_trigger_words_included">\
-            <input type="checkbox" id="speech_recognition_trigger_words_included" name="speech_recognition_trigger_words_included">\
-            <small>Include trigger words in message</small>\
-        </label>\
-        <label class="checkbox_label" for="speech_recognition_streaming_debug">\
-            <input type="checkbox" id="speech_recognition_streaming_debug" name="speech_recognition_streaming_debug">\
-            <small>Enable debug pop ups</small>\
-        </label>\
-        </div>\
-        '
-        return html
-    }
-
-    onSettingsChange() {
-        this.settings.triggerWordsText = $('#speech_recognition_streaming_trigger_words').val();
-        let array = $('#speech_recognition_streaming_trigger_words').val().split(",");
-        array = array.map(element => {return element.trim().toLowerCase();});
-        array = array.filter((str) => str !== '');
-        this.settings.triggerWords = array;
-        this.settings.triggerWordsEnabled = $("#speech_recognition_streaming_trigger_words_enabled").is(':checked');
-        this.settings.triggerWordsIncluded = $("#speech_recognition_trigger_words_included").is(':checked');
-        this.settings.debug = $("#speech_recognition_streaming_debug").is(':checked');
-        console.debug(DEBUG_PREFIX+" Updated settings: ", this.settings);
-        this.loadSettings(this.settings);
-    }
-
-    loadSettings(settings) {
-        // Populate Provider UI given input settings
-        if (Object.keys(settings).length == 0) {
-            console.debug(DEBUG_PREFIX+"Using default Whisper STT extension settings")
-        }
-
-        // Only accept keys defined in defaultSettings
-        this.settings = this.defaultSettings
-
-        for (const key in settings){
-            if (key in this.settings){
-                this.settings[key] = settings[key]
-            } else {
-                throw `Invalid setting passed to STT extension: ${key}`
-            }
-        }
-
-        $("#speech_recognition_streaming_trigger_words").val(this.settings.triggerWordsText);
-        $("#speech_recognition_streaming_trigger_words_enabled").prop('checked',this.settings.triggerWordsEnabled);
-        $("#speech_recognition_trigger_words_included").prop('checked',this.settings.triggerWordsIncluded);
-        $("#speech_recognition_streaming_debug").prop('checked',this.settings.debug);
-
-        console.debug(DEBUG_PREFIX+"streaming STT settings loaded")
-    }
-
-    async getUserMessage() {
-        // Return if module is not loaded
-        if (!modules.includes('streaming-stt')) {
-            console.debug(DEBUG_PREFIX+"Module streaming-stt must be activated in Sillytavern Extras for streaming user voice.")
-            return "";
-        }
-
-        const url = new URL(getApiUrl());
-        url.pathname = '/api/speech-recognition/streaming/record-and-transcript';
-
-        const apiResult = await doExtrasFetch(url, {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-                'Bypass-Tunnel-Reminder': 'bypass',
-            },
-            body: JSON.stringify({ text: "" }),
-        });
-
-        if (!apiResult.ok) {
-            toastr.error(apiResult.statusText, DEBUG_PREFIX+'STT Generation Failed  (streaming)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
-        }
-
-        const data = await apiResult.json();
-        return data.transcript;
-    }
-
-}
\ No newline at end of file
diff --git a/public/scripts/extensions/speech-recognition/style.css b/public/scripts/extensions/speech-recognition/style.css
deleted file mode 100644
index 23ef2f78d..000000000
--- a/public/scripts/extensions/speech-recognition/style.css
+++ /dev/null
@@ -1,3 +0,0 @@
-.speech-toggle {
-    display: flex;
-}
diff --git a/public/scripts/extensions/speech-recognition/vosk.js b/public/scripts/extensions/speech-recognition/vosk.js
deleted file mode 100644
index 5a6a04981..000000000
--- a/public/scripts/extensions/speech-recognition/vosk.js
+++ /dev/null
@@ -1,65 +0,0 @@
-import { getApiUrl, doExtrasFetch } from "../../extensions.js";
-export { VoskSttProvider }
-
-const DEBUG_PREFIX = "<Speech Recognition module (Vosk)> "
-
-class VoskSttProvider {
-    //########//
-    // Config //
-    //########//
-
-    settings
-
-    defaultSettings = {
-    }
-
-    get settingsHtml() {
-        let html = ""
-        return html
-    }
-
-    onSettingsChange() {
-        // Used when provider settings are updated from UI
-    }
-
-    loadSettings(settings) {
-        // Populate Provider UI given input settings
-        if (Object.keys(settings).length == 0) {
-            console.debug(DEBUG_PREFIX+"Using default vosk STT extension settings")
-        }
-
-        // Only accept keys defined in defaultSettings
-        this.settings = this.defaultSettings
-
-        for (const key in settings){
-            if (key in this.settings){
-                this.settings[key] = settings[key]
-            } else {
-                throw `Invalid setting passed to STT extension: ${key}`
-            }
-        }
-
-        console.debug(DEBUG_PREFIX+"Vosk STT settings loaded")
-    }
-
-    async processAudio(audioblob) {
-        var requestData = new FormData();
-        requestData.append('AudioFile', audioblob, 'record.wav');
-        
-        const url = new URL(getApiUrl());
-        url.pathname = '/api/speech-recognition/vosk/process-audio';
-
-        const apiResult = await doExtrasFetch(url, {
-            method: 'POST',
-            body: requestData,
-        });
-
-        if (!apiResult.ok) {
-            toastr.error(apiResult.statusText, 'STT Generation Failed  (Vosk)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
-        }
-        
-        const result = await apiResult.json();
-        return  result.transcript;
-    }
-}
\ No newline at end of file
diff --git a/public/scripts/extensions/speech-recognition/whisper.js b/public/scripts/extensions/speech-recognition/whisper.js
deleted file mode 100644
index ab80748a2..000000000
--- a/public/scripts/extensions/speech-recognition/whisper.js
+++ /dev/null
@@ -1,67 +0,0 @@
-import { getApiUrl, doExtrasFetch } from "../../extensions.js";
-export { WhisperSttProvider }
-
-const DEBUG_PREFIX = "<Speech Recognition module (Vosk)> "
-
-class WhisperSttProvider {
-    //########//
-    // Config //
-    //########//
-
-    settings
-
-    defaultSettings = {
-        //model_path: "",
-    }
-
-    get settingsHtml() {
-        let html = ""
-        return html
-    }
-
-    onSettingsChange() {
-        // Used when provider settings are updated from UI
-    }
-
-    loadSettings(settings) {
-        // Populate Provider UI given input settings
-        if (Object.keys(settings).length == 0) {
-            console.debug(DEBUG_PREFIX+"Using default Whisper STT extension settings")
-        }
-
-        // Only accept keys defined in defaultSettings
-        this.settings = this.defaultSettings
-
-        for (const key in settings){
-            if (key in this.settings){
-                this.settings[key] = settings[key]
-            } else {
-                throw `Invalid setting passed to STT extension: ${key}`
-            }
-        }
-
-        console.debug(DEBUG_PREFIX+"Whisper STT settings loaded")
-    }
-
-    async processAudio(audioblob) {
-        var requestData = new FormData();
-        requestData.append('AudioFile', audioblob, 'record.wav');
-        
-        const url = new URL(getApiUrl());
-        url.pathname = '/api/speech-recognition/whisper/process-audio';
-
-        const apiResult = await doExtrasFetch(url, {
-            method: 'POST',
-            body: requestData,
-        });
-
-        if (!apiResult.ok) {
-            toastr.error(apiResult.statusText, 'STT Generation Failed (Whisper)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
-        }
-        
-        const result = await apiResult.json();
-        return  result.transcript;
-    }
-
-}
\ No newline at end of file