Merge branch 'staging' of http://github.com/cohee1207/SillyTavern into staging

2023-07-27 23:35:02 +03:00 · 2023-07-27 23:35:02 +03:00 · 87b9da11c6
parent f56834bb96 77124056b8
commit 87b9da11c6
10 changed files with 1003 additions and 380 deletions
--- a/public/scripts/extensions.js
+++ b/public/scripts/extensions.js
@ -73,6 +73,7 @@ const extension_settings = {
        fluctuation: 0.1,
        enabled: false,
    },
+    speech_recognition: {},
 };

 let modules = [];
--- a/public/scripts/extensions/objective/index.js
+++ b/public/scripts/extensions/objective/index.js
@ -372,8 +372,6 @@ function onEditPromptClick() {
    let popupText = ''
    popupText += `
    <div class="objective_prompt_modal">
-        <div class="alignitemsflexstart flex-container">
-        </div>
        <div>
            <label for="objective-prompt-generate">Generation Prompt</label>
            <textarea id="objective-prompt-generate" type="text" class="text_pole textarea_compact" rows="8"></textarea>
@ -382,12 +380,14 @@ function onEditPromptClick() {
            <label for="objective-prompt-extension-prompt">Injected Prompt</label>
            <textarea id="objective-prompt-extension-prompt" type="text" class="text_pole textarea_compact" rows="8"></textarea>
        </div>
-        <div class="alignitemsflexstart flex-container">
-            <input id="objective-custom-prompt-name" type="text" class="flex1 heightFitContent text_pole widthNatural" maxlength="250" placeholder="Custom Prompt Name">
-            <input id="objective-custom-prompt-save" class="menu_button" type="submit" value="Save Custom Prompt" />
-            <label for="objective-prompt-load"> Load Prompt </label>
+        <div class="objective_prompt_block">
+            <input id="objective-custom-prompt-name" style="flex-grow:2" type="text" class="flex1 heightFitContent text_pole widthNatural" maxlength="250" placeholder="Custom Prompt Name">
+            <input id="objective-custom-prompt-save" style="flex-grow:1" class="menu_button" type="submit" value="Save Prompt" />
+        </div>
+        <div class="objective_prompt_block">
+            <label for="objective-prompt-load">Load Prompt</label>
            <select id="objective-prompt-load"><select>
-            <input id="objective-custom-prompt-delete" class="menu_button" type="submit" value="Delete Custom Prompt" />
+            <input id="objective-custom-prompt-delete" class="menu_button" type="submit" value="Delete Prompt" />
        </div>
    </div>`
    callPopup(popupText, 'text')
--- a/public/scripts/extensions/objective/style.css
+++ b/public/scripts/extensions/objective/style.css
@ -10,6 +10,13 @@
    flex-wrap: wrap;
 }

+.objective_prompt_block {
+    display: flex;
+    align-items: baseline;
+    column-gap: 5px;
+    flex-wrap: wrap;
+}
+
 .objective_block_control {
    align-items: baseline;
 }
--- a/public/scripts/extensions/speech-recognition/browser.js
+++ b/public/scripts/extensions/speech-recognition/browser.js
@ -0,0 +1,233 @@
+// Borrowed from Agnai (AGPLv3)
+// https://github.com/agnaistic/agnai/blob/dev/web/pages/Chat/components/SpeechRecognitionRecorder.tsx
+// First version by Cohee#1207
+// Adapted by Tony-sama
+
+export { BrowserSttProvider }
+
+const DEBUG_PREFIX = "<Speech Recognition module (Browser)> "
+
+class BrowserSttProvider {
+    //########//
+    // Config //
+    //########//
+
+    settings = {
+        language: ""
+    }
+
+    defaultSettings = {
+        language: "en-US",
+    }
+
+    processTranscriptFunction = null;
+
+    get settingsHtml() {
+        let html = ' \
+        <span>Language</span> </br> \
+        <select id="speech_recognition_browser_provider_language"> \
+            <option value="ar-SA">ar-SA: Arabic (Saudi Arabia)</option> \
+            <option value="bn-BD">bn-BD: Bangla (Bangladesh)</option> \
+            <option value="bn-IN">bn-IN: Bangla (India)</option> \
+            <option value="cs-CZ">cs-CZ: Czech (Czech Republic)</option> \
+            <option value="da-DK">da-DK: Danish (Denmark)</option> \
+            <option value="de-AT">de-AT: German (Austria)</option> \
+            <option value="de-CH">de-CH: German (Switzerland)</option> \
+            <option value="de-DE">de-DE: German (Germany)</option> \
+            <option value="el-GR">el-GR: Greek (Greece)</option> \
+            <option value="en-AU">en-AU: English (Australia)</option> \
+            <option value="en-CA">en-CA: English (Canada)</option> \
+            <option value="en-GB">en-GB: English (United Kingdom)</option> \
+            <option value="en-IE">en-IE: English (Ireland)</option> \
+            <option value="en-IN">en-IN: English (India)</option> \
+            <option value="en-NZ">en-NZ: English (New Zealand)</option> \
+            <option value="en-US">en-US: English (United States)</option> \
+            <option value="en-ZA">en-ZA: English (South Africa)</option> \
+            <option value="es-AR">es-AR: Spanish (Argentina)</option> \
+            <option value="es-CL">es-CL: Spanish (Chile)</option> \
+            <option value="es-CO">es-CO: Spanish (Columbia)</option> \
+            <option value="es-ES">es-ES: Spanish (Spain)</option> \
+            <option value="es-MX">es-MX: Spanish (Mexico)</option> \
+            <option value="es-US">es-US: Spanish (United States)</option> \
+            <option value="fi-FI">fi-FI: Finnish (Finland)</option> \
+            <option value="fr-BE">fr-BE: French (Belgium)</option> \
+            <option value="fr-CA">fr-CA: French (Canada)</option> \
+            <option value="fr-CH">fr-CH: French (Switzerland)</option> \
+            <option value="fr-FR">fr-FR: French (France)</option> \
+            <option value="he-IL">he-IL: Hebrew (Israel)</option> \
+            <option value="hi-IN">hi-IN: Hindi (India)</option> \
+            <option value="hu-HU">hu-HU: Hungarian (Hungary)</option> \
+            <option value="id-ID">id-ID: Indonesian (Indonesia)</option> \
+            <option value="it-CH">it-CH: Italian (Switzerland)</option> \
+            <option value="it-IT">it-IT: Italian (Italy)</option> \
+            <option value="ja-JP">ja-JP: Japanese (Japan)</option> \
+            <option value="ko-KR">ko-KR: Korean (Republic of Korea)</option> \
+            <option value="nl-BE">nl-BE: Dutch (Belgium)</option> \
+            <option value="nl-NL">nl-NL: Dutch (The Netherlands)</option> \
+            <option value="no-NO">no-NO: Norwegian (Norway)</option> \
+            <option value="pl-PL">pl-PL: Polish (Poland)</option> \
+            <option value="pt-BR">pt-BR: Portugese (Brazil)</option> \
+            <option value="pt-PT">pt-PT: Portugese (Portugal)</option> \
+            <option value="ro-RO">ro-RO: Romanian (Romania)</option> \
+            <option value="ru-RU">ru-RU: Russian (Russian Federation)</option> \
+            <option value="sk-SK">sk-SK: Slovak (Slovakia)</option> \
+            <option value="sv-SE">sv-SE: Swedish (Sweden)</option> \
+            <option value="ta-IN">ta-IN: Tamil (India)</option> \
+            <option value="ta-LK">ta-LK: Tamil (Sri Lanka)</option> \
+            <option value="th-TH">th-TH: Thai (Thailand)</option> \
+            <option value="tr-TR">tr-TR: Turkish (Turkey)</option> \
+            <option value="zh-CN">zh-CN: Chinese (China)</option> \
+            <option value="zh-HK">zh-HK: Chinese (Hond Kong)</option> \
+            <option value="zh-TW">zh-TW: Chinese (Taiwan)</option> \
+        </select> \
+        '
+        return html
+    }
+
+    onSettingsChange() {
+        // Used when provider settings are updated from UI
+        this.settings.language = $("#speech_recognition_browser_provider_language").val();
+        console.debug(DEBUG_PREFIX+"Change language to",this.settings.language);
+        this.loadSettings(this.settings);
+    }
+
+    static capitalizeInterim(interimTranscript) {
+        let capitalizeIndex = -1;
+        if (interimTranscript.length > 2 && interimTranscript[0] === ' ') capitalizeIndex = 1;
+        else if (interimTranscript.length > 1) capitalizeIndex = 0;
+        if (capitalizeIndex > -1) {
+            const spacing = capitalizeIndex > 0 ? ' '.repeat(capitalizeIndex - 1) : '';
+            const capitalized = interimTranscript[capitalizeIndex].toLocaleUpperCase();
+            const rest = interimTranscript.substring(capitalizeIndex + 1);
+            interimTranscript = spacing + capitalized + rest;
+        }
+        return interimTranscript;
+    }
+    
+    static composeValues(previous, interim) {
+        let spacing = '';
+        if (previous.endsWith('.')) spacing = ' ';
+        return previous + spacing + interim;
+    }
+
+    loadSettings(settings) {
+        const processTranscript = this.processTranscriptFunction;
+        
+        // Populate Provider UI given input settings
+        if (Object.keys(settings).length == 0) {
+            console.debug(DEBUG_PREFIX+"Using default browser STT settings")
+        }
+
+        // Initialise as defaultSettings
+        this.settings = this.defaultSettings;
+
+        for (const key in settings){
+            if (key in this.settings){
+                this.settings[key] = settings[key]
+            } else {
+                throw `Invalid setting passed to Speech recogniton extension (browser): ${key}`
+            }
+        }
+
+        $("#speech_recognition_browser_provider_language").val(this.settings.language);
+
+        const speechRecognitionSettings = $.extend({
+            grammar: '' // Custom grammar
+        }, options);
+
+        const speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+        const speechRecognitionList = window.SpeechGrammarList || window.webkitSpeechGrammarList;
+
+        if (!speechRecognition) {
+            console.warn(DEBUG_PREFIX+'Speech recognition is not supported in this browser.');
+            $("#microphone_button").hide();
+            toastr.error("Speech recognition is not supported in this browser, use another browser or another provider of SillyTavern-extras Speech recognition extension.", "Speech recognition activation Failed (Browser)", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+            return;
+        }
+
+        const recognition = new speechRecognition();
+
+        if (speechRecognitionSettings.grammar && speechRecognitionList) {
+            speechRecognitionList.addFromString(speechRecognitionSettings.grammar, 1);
+            recognition.grammars = speechRecognitionList;
+        }
+
+        recognition.continuous = true;
+        recognition.interimResults = true;
+        recognition.lang = this.settings.language;
+
+        const textarea = $('#send_textarea');
+        const button = $('#microphone_button');
+
+        let listening = false;
+        button.off('click').on("click", function () {
+            if (listening) {
+                recognition.stop();
+            } else {
+                recognition.start();
+            }
+            listening = !listening;
+        });
+
+        let initialText = '';
+
+        recognition.onresult = function (speechEvent) {
+            let finalTranscript = '';
+            let interimTranscript = ''
+
+            for (let i = speechEvent.resultIndex; i < speechEvent.results.length; ++i) {
+            const transcript = speechEvent.results[i][0].transcript;
+
+            if (speechEvent.results[i].isFinal) {
+                let interim = BrowserSttProvider.capitalizeInterim(transcript);
+                if (interim != '') {
+                let final = finalTranscript;
+                final = BrowserSttProvider.composeValues(final, interim);
+                if (final.slice(-1) != '.' & final.slice(-1) != '?') final += '.';
+                finalTranscript = final;
+                recognition.abort();
+                listening = false;
+                }
+                interimTranscript = ' ';
+            } else {
+                interimTranscript += transcript;
+            }
+            }
+
+            interimTranscript = BrowserSttProvider.capitalizeInterim(interimTranscript);
+                
+            textarea.val(initialText + finalTranscript + interimTranscript);
+        };
+
+        recognition.onerror = function (event) {
+            console.error('Error occurred in recognition:', event.error);
+            //if ($('#speech_recognition_debug').is(':checked'))
+            //    toastr.error('Error occurred in recognition:'+ event.error, 'STT Generation error (Browser)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+        };
+
+        recognition.onend = function () {
+            listening = false;
+            button.toggleClass('fa-microphone fa-microphone-slash');
+            const newText = textarea.val().substring(initialText.length);
+            textarea.val(textarea.val().substring(0,initialText.length));
+            processTranscript(newText);
+
+        };
+
+        recognition.onstart = function () {
+            initialText = textarea.val();
+            button.toggleClass('fa-microphone fa-microphone-slash');
+            
+            if ($("#speech_recognition_message_mode").val() == "replace") {
+                textarea.val("");
+                initialText = ""
+            }
+        };
+        
+        $("#microphone_button").show();
+        
+        console.debug(DEBUG_PREFIX+"Browser STT settings loaded")
+    }
+
+
+}
--- a/public/scripts/extensions/speech-recognition/index.js
+++ b/public/scripts/extensions/speech-recognition/index.js
@ -1,110 +1,351 @@
-// Borrowed from Agnai (AGPLv3)
-// https://github.com/agnaistic/agnai/blob/dev/web/pages/Chat/components/SpeechRecognitionRecorder.tsx
-function capitalizeInterim(interimTranscript) {
-    let capitalizeIndex = -1;
-    if (interimTranscript.length > 2 && interimTranscript[0] === ' ') capitalizeIndex = 1;
-    else if (interimTranscript.length > 1) capitalizeIndex = 0;
-    if (capitalizeIndex > -1) {
-        const spacing = capitalizeIndex > 0 ? ' '.repeat(capitalizeIndex - 1) : '';
-        const capitalized = interimTranscript[capitalizeIndex].toLocaleUpperCase();
-        const rest = interimTranscript.substring(capitalizeIndex + 1);
-        interimTranscript = spacing + capitalized + rest;
+/*
+TODO:
+ - try pseudo streaming audio by just sending chunk every X seconds and asking VOSK if it is full text.
+*/
+
+import { saveSettingsDebounced } from "../../../script.js";
+import { getContext, getApiUrl, modules, extension_settings, ModuleWorkerWrapper, doExtrasFetch } from "../../extensions.js";
+import { VoskSttProvider } from './vosk.js'
+import { WhisperSttProvider } from './whisper.js'
+import { BrowserSttProvider } from './browser.js'
+export { MODULE_NAME };
+
+const MODULE_NAME = 'Speech Recognition';
+const DEBUG_PREFIX = "<Speech Recognition module> "
+
+let sttProviders = {
+    None: null,
+    Browser: BrowserSttProvider,
+    Whisper: WhisperSttProvider,
+    Vosk: VoskSttProvider,
+}
+
+let sttProvider = null
+let sttProviderName = "None"
+
+let audioRecording = false
+const constraints = { audio: { sampleSize: 16, channelCount: 1, sampleRate: 16000 } };
+let audioChunks = [];
+
+async function processTranscript(transcript) {
+    try {
+        const transcriptOriginal =  transcript;
+        let transcriptFormatted = transcriptOriginal.trim();
+
+        if (transcriptFormatted.length > 0)
+        {
+            console.debug(DEBUG_PREFIX+"recorded transcript: \""+transcriptFormatted+"\"");
+            const messageMode = extension_settings.speech_recognition.messageMode;
+            console.debug(DEBUG_PREFIX+"mode: "+messageMode);
+
+            let transcriptLower = transcriptFormatted.toLowerCase()
+            // remove punctuation
+            let transcriptRaw = transcriptLower.replace(/[^\w\s\']|_/g, "").replace(/\s+/g, " ");
+
+            // Check message mapping
+            if (extension_settings.speech_recognition.messageMappingEnabled) {
+                console.debug(DEBUG_PREFIX+"Start searching message mapping into:",transcriptRaw)
+                for (const key in extension_settings.speech_recognition.messageMapping) {
+                    console.debug(DEBUG_PREFIX+"message mapping searching: ", key,"=>",extension_settings.speech_recognition.messageMapping[key]);
+                    if (transcriptRaw.includes(key)) {
+                        var message = extension_settings.speech_recognition.messageMapping[key];
+                        console.debug(DEBUG_PREFIX+"message mapping found: ", key,"=>",extension_settings.speech_recognition.messageMapping[key]);
+                        $("#send_textarea").val(message);
+
+                        if (messageMode == "auto_send") await getContext().generate();
+                        return;
+                    }
+                }
+            }
+
+            console.debug(DEBUG_PREFIX+"no message mapping found, processing transcript as normal message");
+
+            switch (messageMode) {
+                case "auto_send":
+                    $('#send_textarea').val("") // clear message area to avoid double message
+
+                    console.debug(DEBUG_PREFIX+"Sending message")
+                    const context = getContext();
+                    const messageText = transcriptFormatted;
+                    const message = {
+                        name: context.name1,
+                        is_user: true,
+                        is_name: true,
+                        send_date: Date.now(),
+                        mes: messageText,
+                    };
+                    context.chat.push(message);
+                    context.addOneMessage(message);
+                    
+                    await context.generate();
+
+                    $('#debug_output').text("<SST-module DEBUG>: message sent: \""+ transcriptFormatted +"\"");
+                    break;
+
+                case "replace":
+                    console.debug(DEBUG_PREFIX+"Replacing message")
+                    $('#send_textarea').val(transcriptFormatted);
+                    break;
+
+                case "append":
+                    console.debug(DEBUG_PREFIX+"Appending message")
+                    $('#send_textarea').val($('#send_textarea').val()+" "+transcriptFormatted);
+                    break;
+
+                default:
+                    console.debug(DEBUG_PREFIX+"Not supported stt message mode: "+messageMode)
+
+            }
+        }
+        else
+        {
+            console.debug(DEBUG_PREFIX+"Empty transcript, do nothing");
+        }
+    }
+    catch (error) {
+        console.debug(error);
    }
-    return interimTranscript;
 }

-function composeValues(previous, interim) {
-    let spacing = '';
-    if (previous.endsWith('.')) spacing = ' ';
-    return previous + spacing + interim;
+function loadNavigatorAudioRecording() {
+    if (navigator.mediaDevices.getUserMedia) {
+        console.debug(DEBUG_PREFIX+' getUserMedia supported by browser.');
+      
+        let onSuccess = function(stream) {
+          const mediaRecorder = new MediaRecorder(stream);
+      
+          $("#microphone_button").off('click').on("click", function() {
+            if (!audioRecording) {
+                mediaRecorder.start();
+                console.debug(mediaRecorder.state);
+                console.debug("recorder started");
+                audioRecording = true;
+                $("#microphone_button").toggleClass('fa-microphone fa-microphone-slash');
+            }
+            else {
+                mediaRecorder.stop();
+                console.debug(mediaRecorder.state);
+                console.debug("recorder stopped");
+                audioRecording = false;
+                $("#microphone_button").toggleClass('fa-microphone fa-microphone-slash');
+            }
+          });
+      
+          mediaRecorder.onstop = async function() {
+            console.debug(DEBUG_PREFIX+"data available after MediaRecorder.stop() called: ", audioChunks.length, " chunks");
+            const audioBlob = new Blob(audioChunks, { type: "audio/wav; codecs=0" });
+            audioChunks = [];
+            
+            const transcript = await sttProvider.processAudio(audioBlob);
+            
+            // TODO: lock and release recording while processing?
+            console.debug(DEBUG_PREFIX+"received transcript:", transcript);
+            processTranscript(transcript);
+          }
+      
+          mediaRecorder.ondataavailable = function(e) {
+            audioChunks.push(e.data);
+          }
+        }
+      
+        let onError = function(err) {
+          console.debug(DEBUG_PREFIX+"The following error occured: " + err);
+        }
+      
+        navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+      
+      } else {
+         console.debug(DEBUG_PREFIX+"getUserMedia not supported on your browser!");
+         toastr.error("getUserMedia not supported", DEBUG_PREFIX+"not supported for your browser.", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+      }
 }

-(function ($) {
-    $.fn.speechRecognitionPlugin = function (options) {
-        const settings = $.extend({
-            grammar: '' // Custom grammar
-        }, options);
+//##############//
+// STT Provider //
+//##############//

-        const speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
-        const speechRecognitionList = window.SpeechGrammarList || window.webkitSpeechGrammarList;
+function loadSttProvider(provider) {
+    //Clear the current config and add new config
+    $("#speech_recognition_provider_settings").html("");

-        if (!speechRecognition) {
-            console.warn('Speech recognition is not supported in this browser.');
-            return;
+    // Init provider references
+    extension_settings.speech_recognition.currentProvider = provider;
+    sttProviderName = provider;
+
+    if (!(sttProviderName in extension_settings.speech_recognition)) {
+        console.warn(`Provider ${sttProviderName} not in Extension Settings, initiatilizing provider in settings`);
+        extension_settings.speech_recognition[sttProviderName] = {};
+    }
+    
+    $('#speech_recognition_provider').val(sttProviderName);
+
+    if (sttProviderName == "None") {
+        $("#microphone_button").hide();
+        $("#speech_recognition_message_mode_div").hide();
+        $("#speech_recognition_message_mapping_div").hide();
+        return;
+    }
+
+    $("#speech_recognition_message_mode_div").show();
+    $("#speech_recognition_message_mapping_div").show();
+
+    sttProvider = new sttProviders[sttProviderName]
+
+    // Init provider settings
+    $('#speech_recognition_provider_settings').append(sttProvider.settingsHtml);
+
+    // Use microphone button as push to talk
+    if (sttProviderName == "Browser") {
+        sttProvider.processTranscriptFunction = processTranscript;
+        sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
+    }
+    else {
+        sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
+        loadNavigatorAudioRecording();
+
+        $("#microphone_button").show();
+    }
+}
+
+function onSttProviderChange() {
+    const sttProviderSelection = $('#speech_recognition_provider').val();
+    loadSttProvider(sttProviderSelection);
+    saveSettingsDebounced();
+}
+
+function onSttProviderSettingsInput() {
+    sttProvider.onSettingsChange();
+
+    // Persist changes to SillyTavern stt extension settings
+    extension_settings.speech_recognition[sttProviderName] = sttProvider.settings;
+    saveSettingsDebounced();
+    console.info(`Saved settings ${sttProviderName} ${JSON.stringify(sttProvider.settings)}`);
+}
+
+//#############################//
+//  Extension UI and Settings  //
+//#############################//
+
+const defaultSettings = {
+    currentProvider: "None",
+    messageMode: "append",
+    messageMappingText: "",
+    messageMapping: [],
+    messageMappingEnabled: false
+}
+
+function loadSettings() {
+    if (Object.keys(extension_settings.speech_recognition).length === 0) {
+        Object.assign(extension_settings.speech_recognition, defaultSettings)
+    }
+    $('#speech_recognition_enabled').prop('checked',extension_settings.speech_recognition.enabled);
+    $('#speech_recognition_message_mode').val(extension_settings.speech_recognition.messageMode);
+
+    if (extension_settings.speech_recognition.messageMappingText.length > 0) {
+        $('#speech_recognition_message_mapping').val(extension_settings.speech_recognition.messageMappingText);
+    }
+
+    $('#speech_recognition_message_mapping_enabled').prop('checked',extension_settings.speech_recognition.messageMappingEnabled);
+}
+
+async function onMessageModeChange() {
+    extension_settings.speech_recognition.messageMode = $('#speech_recognition_message_mode').val();
+
+    if(sttProviderName != "Browser" & extension_settings.speech_recognition.messageMode == "auto_send") {
+        $("#speech_recognition_wait_response_div").show()
+    }
+    else {
+        $("#speech_recognition_wait_response_div").hide()
+    }
+
+    saveSettingsDebounced();
+}
+
+async function onMessageMappingChange() {
+    let array = $('#speech_recognition_message_mapping').val().split(",");
+    array = array.map(element => {return element.trim();});
+    array = array.filter((str) => str !== '');
+    extension_settings.speech_recognition.messageMapping = {};
+    for (const text of array) {
+        if (text.includes("=")) {
+            const pair = text.toLowerCase().split("=")
+            extension_settings.speech_recognition.messageMapping[pair[0].trim()] = pair[1].trim()
+            console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.speech_recognition.messageMapping[pair[0]]);
        }
-
-        const recognition = new speechRecognition();
-
-        if (settings.grammar && speechRecognitionList) {
-            speechRecognitionList.addFromString(settings.grammar, 1);
-            recognition.grammars = speechRecognitionList;
+        else {
+            console.debug(DEBUG_PREFIX+"Wrong syntax for message mapping, no '=' found in:", text);
        }
+    }
+    
+    $("#speech_recognition_message_mapping_status").text("Message mapping updated to: "+JSON.stringify(extension_settings.speech_recognition.messageMapping))
+    console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.speech_recognition.messageMapping);
+    extension_settings.speech_recognition.messageMappingText = $('#speech_recognition_message_mapping').val()
+    saveSettingsDebounced();
+}

-        recognition.continuous = true;
-        recognition.interimResults = true;
-        // TODO: This should be configurable.
-        recognition.lang = 'en-US'; // Set the language to English (US).
+async function onMessageMappingEnabledClick() {
+    extension_settings.speech_recognition.messageMappingEnabled = $('#speech_recognition_message_mapping_enabled').is(':checked');
+    saveSettingsDebounced()
+}

-        const $textarea = this;
-        const $button = $('<div class="fa-solid fa-microphone speech-toggle" title="Click to speak"></div>');
+$(document).ready(function () {
+    function addExtensionControls() {
+        const settingsHtml = `
+        <div id="speech_recognition_settings">
+            <div class="inline-drawer">
+                <div class="inline-drawer-toggle inline-drawer-header">
+                    <b>Speech Recognition</b>
+                    <div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
+                </div>
+                <div class="inline-drawer-content">
+                    <div>
+                        <span>Select Speech-to-text Provider</span> </br>
+                        <select id="speech_recognition_provider">
+                        </select>
+                    </div>
+                    <div id="speech_recognition_message_mode_div">
+                        <span>Message Mode</span> </br>
+                        <select id="speech_recognition_message_mode">
+                            <option value="append">Append</option>
+                            <option value="replace">Replace</option>
+                            <option value="auto_send">Auto send</option>
+                        </select>
+                    </div>
+                    <div id="speech_recognition_message_mapping_div">
+                        <span>Message Mapping</span>
+                        <textarea id="speech_recognition_message_mapping" class="text_pole textarea_compact" type="text" rows="4" placeholder="Enter comma separated phrases mapping, example:\ncommand delete = /del 2,\nslash delete = /del 2,\nsystem roll = /roll 2d6,\nhey continue = /continue"></textarea>
+                        <span id="speech_recognition_message_mapping_status"></span>
+                        <label class="checkbox_label" for="speech_recognition_message_mapping_enabled">
+                            <input type="checkbox" id="speech_recognition_message_mapping_enabled" name="speech_recognition_message_mapping_enabled">
+                            <small>Enable messages mapping</small>
+                        </label>
+                    </div>
+                    <form id="speech_recognition_provider_settings" class="inline-drawer-content">
+                    </form>
+                </div>
+            </div>
+        </div>
+        `;
+        $('#extensions_settings').append(settingsHtml);
+        $('#speech_recognition_provider_settings').on('input', onSttProviderSettingsInput);
+        for (const provider in sttProviders) {
+            $('#speech_recognition_provider').append($("<option />").val(provider).text(provider));
+            console.debug(DEBUG_PREFIX+"added option "+provider);
+        }
+        $('#speech_recognition_provider').on('change', onSttProviderChange);
+        $('#speech_recognition_message_mode').on('change', onMessageModeChange);
+        $('#speech_recognition_message_mapping').on('change', onMessageMappingChange);
+        $('#speech_recognition_message_mapping_enabled').on('click', onMessageMappingEnabledClick);
+        
+        const $button = $('<div id="microphone_button" class="fa-solid fa-microphone speech-toggle" title="Click to speak"></div>');
        $('#send_but_sheld').prepend($button);

-        let listening = false;
-        $button.on('click', function () {
-            if (listening) {
-                recognition.stop();
-            } else {
-                recognition.start();
-            }
-            listening = !listening;
-        });
+    }
+    addExtensionControls(); // No init dependencies
+    loadSettings(); // Depends on Extension Controls and loadTtsProvider
+    loadSttProvider(extension_settings.speech_recognition.currentProvider); // No dependencies

-        let initialText = '';
-
-        recognition.onresult = function (speechEvent) {
-            let finalTranscript = '';
-            let interimTranscript = ''
-
-            for (let i = speechEvent.resultIndex; i < speechEvent.results.length; ++i) {
-              const transcript = speechEvent.results[i][0].transcript;
-
-              if (speechEvent.results[i].isFinal) {
-                let interim = capitalizeInterim(transcript);
-                if (interim != '') {
-                  let final = finalTranscript;
-                  final = composeValues(final, interim) + '.';
-                  finalTranscript = final;
-                  recognition.abort();
-                  listening = false;
-                }
-                interimTranscript = ' ';
-              } else {
-                interimTranscript += transcript;
-              }
-            }
-
-            interimTranscript = capitalizeInterim(interimTranscript);
-
-            $textarea.val(initialText + finalTranscript + interimTranscript);
-        };
-
-        recognition.onerror = function (event) {
-            console.error('Error occurred in recognition:', event.error);
-        };
-
-        recognition.onend = function () {
-            listening = false;
-            $button.toggleClass('fa-microphone fa-microphone-slash');
-        };
-
-        recognition.onstart = function () {
-            initialText = $textarea.val();
-            $button.toggleClass('fa-microphone fa-microphone-slash');
-        };
-    };
-}(jQuery));
-
-jQuery(() => {
-    const $textarea = $('#send_textarea');
-    $textarea.speechRecognitionPlugin();
-});
+    //const wrapper = new ModuleWorkerWrapper(moduleWorker);
+    //setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
+    //moduleWorker();
+})
--- a/public/scripts/extensions/speech-recognition/manifest.json
+++ b/public/scripts/extensions/speech-recognition/manifest.json
@ -2,10 +2,13 @@
    "display_name": "Speech Recognition",
    "loading_order": 13,
    "requires": [],
-    "optional": [],
+    "optional": [
+        "vosk-speech-recognition",
+        "whisper-speech-recognition"
+    ],
    "js": "index.js",
    "css": "style.css",
-    "author": "Cohee#1207",
-    "version": "1.0.0",
+    "author": "Cohee#1207 and Keij#6799",
+    "version": "1.1.0",
    "homePage": "https://github.com/SillyTavern/SillyTavern"
 }
--- a/public/scripts/extensions/speech-recognition/vosk.js
+++ b/public/scripts/extensions/speech-recognition/vosk.js
@ -0,0 +1,65 @@
+import { getApiUrl, doExtrasFetch } from "../../extensions.js";
+export { VoskSttProvider }
+
+const DEBUG_PREFIX = "<Speech Recognition module (Vosk)> "
+
+class VoskSttProvider {
+    //########//
+    // Config //
+    //########//
+
+    settings
+
+    defaultSettings = {
+    }
+
+    get settingsHtml() {
+        let html = ""
+        return html
+    }
+
+    onSettingsChange() {
+        // Used when provider settings are updated from UI
+    }
+
+    loadSettings(settings) {
+        // Populate Provider UI given input settings
+        if (Object.keys(settings).length == 0) {
+            console.debug(DEBUG_PREFIX+"Using default vosk STT extension settings")
+        }
+
+        // Only accept keys defined in defaultSettings
+        this.settings = this.defaultSettings
+
+        for (const key in settings){
+            if (key in this.settings){
+                this.settings[key] = settings[key]
+            } else {
+                throw `Invalid setting passed to STT extension: ${key}`
+            }
+        }
+
+        console.debug(DEBUG_PREFIX+"Vosk STT settings loaded")
+    }
+
+    async processAudio(audioblob) {
+        var requestData = new FormData();
+        requestData.append('AudioFile', audioblob, 'record.wav');
+        
+        const url = new URL(getApiUrl());
+        url.pathname = '/api/speech-recognition/vosk/process-audio';
+
+        const apiResult = await doExtrasFetch(url, {
+            method: 'POST',
+            body: requestData,
+        });
+
+        if (!apiResult.ok) {
+            toastr.error(apiResult.statusText, 'STT Generation Failed  (Vosk)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+            throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
+        }
+        
+        const result = await apiResult.json();
+        return  result.transcript;
+    }
+}
--- a/public/scripts/extensions/speech-recognition/whisper.js
+++ b/public/scripts/extensions/speech-recognition/whisper.js
@ -0,0 +1,67 @@
+import { getApiUrl, doExtrasFetch } from "../../extensions.js";
+export { WhisperSttProvider }
+
+const DEBUG_PREFIX = "<Speech Recognition module (Vosk)> "
+
+class WhisperSttProvider {
+    //########//
+    // Config //
+    //########//
+
+    settings
+
+    defaultSettings = {
+        //model_path: "",
+    }
+
+    get settingsHtml() {
+        let html = ""
+        return html
+    }
+
+    onSettingsChange() {
+        // Used when provider settings are updated from UI
+    }
+
+    loadSettings(settings) {
+        // Populate Provider UI given input settings
+        if (Object.keys(settings).length == 0) {
+            console.debug(DEBUG_PREFIX+"Using default Whisper STT extension settings")
+        }
+
+        // Only accept keys defined in defaultSettings
+        this.settings = this.defaultSettings
+
+        for (const key in settings){
+            if (key in this.settings){
+                this.settings[key] = settings[key]
+            } else {
+                throw `Invalid setting passed to STT extension: ${key}`
+            }
+        }
+
+        console.debug(DEBUG_PREFIX+"Whisper STT settings loaded")
+    }
+
+    async processAudio(audioblob) {
+        var requestData = new FormData();
+        requestData.append('AudioFile', audioblob, 'record.wav');
+        
+        const url = new URL(getApiUrl());
+        url.pathname = '/api/speech-recognition/whisper/process-audio';
+
+        const apiResult = await doExtrasFetch(url, {
+            method: 'POST',
+            body: requestData,
+        });
+
+        if (!apiResult.ok) {
+            toastr.error(apiResult.statusText, 'STT Generation Failed (Whisper)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+            throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
+        }
+        
+        const result = await apiResult.json();
+        return  result.transcript;
+    }
+
+}
--- a/public/scripts/extensions/tts/coquitts.js
+++ b/public/scripts/extensions/tts/coquitts.js
@ -1,7 +1,15 @@
+import { eventSource, event_types } from "../../../script.js"
 import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"

 export { CoquiTtsProvider }

+function throwIfModuleMissing() {
+    if (!modules.includes('coqui-tts')) {
+        toastr.error(`Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.`)
+        throw new Error(`Coqui TTS module not loaded.`)
+    }
+}
+
 class CoquiTtsProvider {
    //########//
    // Config //
@ -12,51 +20,45 @@ class CoquiTtsProvider {
    separator = ' .. '

    defaultSettings = {
-        provider_endpoint: "http://localhost:5100",
        voiceMap: {}
    }


    get settingsHtml() {
        let html = `
-        <div style="display: flex; width: 100%;">
-        <div style="flex: 80%;">
-          <label for="model">Model:</label>
-          <select id="model">
-            <option value="none">Select Model</option>
-            <!-- Add more model options here -->
-          </select>
+        <div class="flex wide100p flexGap10 alignitemscenter">
+            <div style="flex: 80%;">
+                <label for="coqui_model">Model:</label>
+                <select id="coqui_model">
+                    <option value="none">Select Model</option>
+                    <!-- Add more model options here -->
+                </select>
+            </div>
+            <div class="flex justifyCenter" style="flex: 20%;">
+                <button id="coqui_preview" class="menu_button menu_button_icon wide100p" type="button">
+                </button>
+            </div>
        </div>
-        <div style="flex: 20%; display: flex; justify-content: center;">
-            <button id="preview" class="menu_button" type="button" style="width: 100%;">Play</button>
-        </div>
-      </div>
-      
-    
-        <div style="display: flex; width: 100%;">
-        <div style="flex: 1; margin-right: 10px;">
-            <label for="speaker">Speaker:</label>
-            <select id="speaker">
-                <!-- Add more speaker options here -->
-            </select>
-        </div>
-        <div style="flex: 1;">
-            <label for="language">Language:</label>
-            <select id="language">
-                <!-- Add more language options here -->
-            </select>
-        </div>
-    </div>

-        <label for="Coqui_tts_endpoint">Provider Endpoint:</label>
-        <input id="Coqui_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
+        <div class="flex wide100p flexGap10">
+            <div class="flex1">
+                <label for="coqui_speaker">Speaker:</label>
+                <select id="coqui_speaker">
+                    <!-- Add more speaker options here -->
+                </select>
+            </div>
+            <div class="flex1">
+                <label for="coqui_language">Language:</label>
+                <select id="coqui_language">
+                    <!-- Add more language options here -->
+                </select>
+            </div>
+        </div>
        `
        return html
    }

    onSettingsChange() {
-        // Used when provider settings are updated from UI
-        this.settings.provider_endpoint = $('#Coqui_tts_endpoint').val()
    }

    loadSettings(settings) {
@ -64,70 +66,61 @@ class CoquiTtsProvider {
        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings")
        }
-        
-        const modelSelect = document.getElementById('model');
-        const previewButton = document.getElementById('preview');
+
+        const modelSelect = document.getElementById('coqui_model');
+        const previewButton = document.getElementById('coqui_preview');
        previewButton.addEventListener('click', () => {
            const selectedModel = modelSelect.value;
            this.sampleTtsVoice(selectedModel);
        });//add event listener to button
- 
-     
+
        previewButton.disabled = true;
        previewButton.innerText = "Select Model";

-
        // Only accept keys defined in defaultSettings
        this.settings = this.defaultSettings

-        for (const key in settings){
-            if (key in this.settings){
+        for (const key in settings) {
+            if (key in this.settings) {
                this.settings[key] = settings[key]
            } else {
                throw `Invalid setting passed to TTS Provider: ${key}`
            }
        }

-        const apiCheckInterval = setInterval(() => {
-            // Use Extras API if TTS support is enabled
-            if (modules.includes('tts') || modules.includes('Coqui-tts')) {
-                const baseUrl = new URL(getApiUrl());
-                baseUrl.pathname = '/api/coqui-tts/coqui-tts';
-                this.settings.provider_endpoint = baseUrl.toString();
-                $('#Coqui_tts_endpoint').val(this.settings.provider_endpoint);
-                clearInterval(apiCheckInterval);
-            }
-        }, 2000);
-
-        $('#Coqui_tts_endpoint').val(this.settings.provider_endpoint)
-      
        const textexample = document.getElementById('tts_voice_map');
        textexample.placeholder = 'Enter comma separated map of charName:ttsName[speakerID][langID]. Example: \nAqua:tts_models--en--ljspeech--glow-tts\model_file.pth,\nDarkness:tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][3]';

        //Load models function
-        this.getModels();
+        eventSource.on(event_types.EXTRAS_CONNECTED, () => {
+            this.getModels();
+        });
        this.onttsCoquiHideButtons();
        console.info("Settings loaded")
    }

-    async onttsCoquiHideButtons(){
+    async onttsCoquiHideButtons() {
        // Get references to the select element and the two input elements
        const ttsProviderSelect = document.getElementById('tts_provider');
        const ttsVoicesInput = document.getElementById('tts_voices');
        const ttsPreviewInput = document.getElementById('tts_preview');
-    
+
+        ttsProviderSelect.addEventListener('click', () => {
+            this.getModels();
+         });
+
        // Add an event listener to the 'change' event of the tts_provider select element
        ttsProviderSelect.addEventListener('change', () => {
-        // Check if the selected value is 'Coqui'
-        if (ttsProviderSelect.value === 'Coqui') {
-            ttsVoicesInput.style.display = 'none'; // Hide the tts_voices input
-            ttsPreviewInput.style.display = ''; // Show the tts_preview input
-        } else {
-            ttsVoicesInput.style.display = ''; // Show the tts_voices input
-            ttsPreviewInput.style.display = 'none'; // Hide the tts_preview input
-        }
+            // Check if the selected value is 'Coqui'
+            if (ttsProviderSelect.value === 'Coqui') {
+                ttsVoicesInput.style.display = 'none'; // Hide the tts_voices input
+                ttsPreviewInput.style.display = ''; // Show the tts_preview input
+            } else {
+                ttsVoicesInput.style.display = ''; // Show the tts_voices input
+                ttsPreviewInput.style.display = 'none'; // Hide the tts_preview input
+            }
        });
-}
+    }

    async onApplyClick() {
        return
@ -135,262 +128,267 @@ class CoquiTtsProvider {

    async getLang() {
        try {
-          const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/multlang`);
-          if (!response.ok) {
-            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-          }
-          const voiceData = await response.json();
-      
-          const modelSelect = document.getElementById('language');
-          modelSelect.innerHTML = ''; // Clear existing options
-      
-          if (Object.keys(voiceData).length === 0) {
-            const option = document.createElement('option');
-            option.value = 'none';
-            option.textContent = 'None';
-            modelSelect.appendChild(option);
-          } else {
-            for (const [key, value] of Object.entries(voiceData)) {
-              const option = document.createElement('option');
-              option.value = key;
-              option.textContent = key + ": " + value;
-              modelSelect.appendChild(option);
-            }
-          }
-        } catch (error) {
-          //console.error('Error fetching voice data:', error);
-      
-          // Remove all options except "None"
-          const modelSelect = document.getElementById('language');
-          modelSelect.innerHTML = '';
-      
-          const option = document.createElement('option');
-          option.value = 'none';
-          option.textContent = 'None';
-          modelSelect.appendChild(option);
-        } 
-      }
-
-
-      async getSpeakers() {
-          try {
-            const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/multspeaker`);
+            const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/multlang`);
            if (!response.ok) {
-              throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
            }
            const voiceData = await response.json();
-        
-            const modelSelect = document.getElementById('speaker');
-            modelSelect.innerHTML = ''; // Clear existing options
-        
+
+            const languageSelect = document.getElementById('coqui_language');
+            languageSelect.innerHTML = ''; // Clear existing options
+
            if (Object.keys(voiceData).length === 0) {
-              const option = document.createElement('option');
-              option.value = 'none';
-              option.textContent = 'None';
-              modelSelect.appendChild(option);
-            } else {
-              for (const [index, name] of Object.entries(voiceData)) {
                const option = document.createElement('option');
-                option.value = index;
-                option.textContent = index + ": " + name;
-                modelSelect.appendChild(option);
-              }
+                option.value = 'none';
+                option.textContent = 'None';
+                languageSelect.appendChild(option);
+            } else {
+                for (const [key, value] of Object.entries(voiceData)) {
+                    const option = document.createElement('option');
+                    option.value = key;
+                    option.textContent = key + ": " + value;
+                    languageSelect.appendChild(option);
+                }
            }
-          } catch (error) {
+        } catch (error) {
            //console.error('Error fetching voice data:', error);
-        
+
            // Remove all options except "None"
-            const modelSelect = document.getElementById('speaker');
-            modelSelect.innerHTML = '';
-        
+            const languageSelect = document.getElementById('coqui_language');
+            languageSelect.innerHTML = '';
+
+            const option = document.createElement('option');
+            option.value = 'none';
+            option.textContent = 'None';
+            languageSelect.appendChild(option);
+        }
+    }
+
+
+    async getSpeakers() {
+        try {
+            const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/multspeaker`);
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+            }
+            const voiceData = await response.json();
+
+            const speakerSelect = document.getElementById('coqui_speaker');
+            speakerSelect.innerHTML = ''; // Clear existing options
+
+            if (Object.keys(voiceData).length === 0) {
+                const option = document.createElement('option');
+                option.value = 'none';
+                option.textContent = 'None';
+                speakerSelect.appendChild(option);
+            } else {
+                for (const [index, name] of Object.entries(voiceData)) {
+                    const option = document.createElement('option');
+                    option.value = index;
+                    option.textContent = index + ": " + name;
+                    speakerSelect.appendChild(option);
+                }
+            }
+        } catch (error) {
+            //console.error('Error fetching voice data:', error);
+
+            // Remove all options except "None"
+            const speakerSelect = document.getElementById('coqui_speaker');
+            speakerSelect.innerHTML = '';
+
+            const option = document.createElement('option');
+            option.value = 'none';
+            option.textContent = 'None';
+            speakerSelect.appendChild(option);
+        }
+    }
+
+    async getModels() {
+        try {
+            throwIfModuleMissing();
+            const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/list`);
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+            }
+            const voiceIds = await response.json();
+
+            const modelSelect = document.getElementById('coqui_model');
+            if (voiceIds.length === 0) {
+                const option = document.createElement('option');
+                option.value = 'none';
+                option.textContent = 'Select Model';
+                modelSelect.appendChild(option);
+            } else {
+                voiceIds.forEach(voiceId => {
+                    const option = document.createElement('option');
+                    option.value = voiceId;
+                    option.textContent = voiceId;
+                    modelSelect.appendChild(option);
+                });
+            }
+
+            // Update provider endpoint on model selection change
+            modelSelect.addEventListener('change', () => {
+                const selectedModel = modelSelect.value;
+                this.LoadModel(selectedModel);
+            });
+        } catch (error) {
+            console.error('Error fetching voice IDs:', error);
+
+            // Add "None" option when the request fails or the response is empty
+            const modelSelect = document.getElementById('coqui_model');
            const option = document.createElement('option');
            option.value = 'none';
            option.textContent = 'None';
            modelSelect.appendChild(option);
-          }           
-      }
-      
-      async getModels() {
-        try {
-          const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/list`);
-          if (!response.ok) {
-            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-          }
-          const voiceIds = await response.json();
-      
-          const modelSelect = document.getElementById('model');
-          if (voiceIds.length === 0) {
-            const option = document.createElement('option');
-            option.value = 'none';
-            option.textContent = 'Select Model';
-            modelSelect.appendChild(option);
-          } else {
-            voiceIds.forEach(voiceId => {
-              const option = document.createElement('option');
-              option.value = voiceId;
-              option.textContent = voiceId;
-              modelSelect.appendChild(option);
-            });
-          }
-      
-          // Update provider endpoint on model selection change
-          modelSelect.addEventListener('change', () => {
-            const selectedModel = modelSelect.value;
-            this.LoadModel(selectedModel);
-          });
-        } catch (error) {
-          console.error('Error fetching voice IDs:', error);
-      
-          // Add "None" option when the request fails or the response is empty
-          const modelSelect = document.getElementById('model');
-          const option = document.createElement('option');
-          option.value = 'none';
-          option.textContent = 'None';
-          modelSelect.appendChild(option);
        }
-      }
+    }

-      async LoadModel(selectedModel) {
-        const previewButton = document.getElementById('preview');
+    async LoadModel(selectedModel) {
+        const previewButton = document.getElementById('coqui_preview');
        previewButton.disabled = true;
        previewButton.innerText = "Loading";
        try {
-          const response = await fetch(`${this.defaultSettings.provider_endpoint}/api/coqui-tts/load?_model=${selectedModel}`);
-          if (!response.ok) {
-            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
-          }
-          this.getSpeakers();
-          this.getLang();
+            throwIfModuleMissing();
+            const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/load?_model=${selectedModel}`);
+            if (!response.ok) {
+                throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+            }
+            this.getSpeakers();
+            this.getLang();

-          const previewButton = document.getElementById('preview');
-          previewButton.disabled = false;
-          previewButton.innerText = "Play";
+            const previewButton = document.getElementById('coqui_preview');
+            previewButton.disabled = false;
+            previewButton.innerText = "Play";

        } catch (error) {
-          console.error('Error updating provider endpoint:', error);
+            console.error('Error updating provider endpoint:', error);
        }
-      }
+    }

-      async getVoice(voiceName) {
+    async getVoice(voiceName) {
        //tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][1]
        //tts_models--en--ljspeech--glow-tts\model_file.pth
-        
+
        let _voiceNameOrg = voiceName; // Store the original voiceName in a variable _voiceNameOrg
        voiceName = voiceName.replace(/(\[\d+\])+$/, ''); // For example, converts 'model[2][1]' to 'model'
-  
+
        this.voices = []; //reset for follow up runs

        if (this.voices.length === 0) { this.voices = await this.fetchCheckMap(); }
-        
+
        // Search for a voice object in the 'this.voices' array where the 'name' property matches the provided 'voiceName'
-        
+
        //const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
        const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);

        // If no match is found, throw an error indicating that the TTS Voice name was not found
        if (!match) {
-          throw new Error(`TTS Voice name ${voiceName} not found`);
+            throw new Error(`TTS Voice name ${voiceName} not found`);
        } else {
-          match.name = _voiceNameOrg;
-          match.voice_id = _voiceNameOrg;
+            match.name = _voiceNameOrg;
+            match.voice_id = _voiceNameOrg;
        }
        // Return the matched voice object (with the 'name' property updated if a match was found)
        return match;
-      }
+    }

    async fetchCheckMap() {
-      const endpoint = `${this.settings.provider_endpoint}/api/coqui-tts/checkmap`;
-      const response = await fetch(endpoint);
-  
-      if (!response.ok) {
-      throw new Error(`HTTP ${response.status}: ${await response.json()}`);
-      }
-      const voiceData = await response.json();
-      const voices = voiceData.map((voice) => ({
-      id: voice.name,
-      name: voice.id, // this is the issue!!!
-      voice_id: voice.id, // this is the issue!!!
-      //preview_url: false, 
-      lang: voice.lang,
-      }));
-      return voices;
-    }
-    
-	async fetchTtsVoiceIds() {
-	  const endpoint = `${this.settings.provider_endpoint}/api/coqui-tts/speaker_id`;
-	  const response = await fetch(endpoint);
+        const endpoint = `${getApiUrl()}/api/coqui-tts/checkmap`;
+        const response = await doExtrasFetch(endpoint);

-	  if (!response.ok) {
-		throw new Error(`HTTP ${response.status}: ${await response.json()}`);
-	  }
-	  const voiceData = await response.json();
-	  const voices = voiceData.map((voice) => ({
-		id: voice.name,
-		name: voice.id, //add filename here
-		voice_id: voice.id, 
-    //preview_url: false,
-    //preview_url: `${this.settings.provider_endpoint}/api/coqui-tts/download?model=${voice.id}`, 
-    //http://localhost:5100/api/coqui-tts/speaker_id/tts_models/en/ljspeech/speedy-speech
-		lang: voice.lang,
-	  }));
-	  return voices;
-	}
-  
-  sampleTtsVoice(voiceId) {
-    // Get the selected values of speaker and language
-    const speakerSelect = document.getElementById('speaker');
-    const languageSelect = document.getElementById('language');
-    const selectedSpeaker = speakerSelect.value;
-    const selectedLanguage = languageSelect.value;
-  
-    // Construct the URL with the selected values
-    const url = `${this.settings.provider_endpoint}/api/coqui-tts/tts?text=The%20Quick%20Brown%20Fox%20Jumps%20Over%20the%20Lazy%20Dog.&speaker_id=${voiceId}&style_wav=&language_id=${selectedLanguage}&mspker=${selectedSpeaker}`;
-  
-    fetch(url)
-      .then(response => response.blob())
-      .then(blob => {
-        const audioUrl = URL.createObjectURL(blob);
-        // Play the audio
-        const audio = new Audio(audioUrl);
-        audio.play();
-      })
-      .catch(error => {
-        console.error('Error performing TTS request:', error);
-      });
-  }
-
-  previewTtsVoice(voiceId) { //button on avail voices
-    const url = `${this.settings.provider_endpoint}/api/coqui-tts/download?model=${voiceId}`;
-  
-    fetch(url)
-      .then(response => response.text()) // Expecting a text response
-      .then(responseText => {
-        const isResponseTrue = responseText.trim().toLowerCase() === 'true';
- 
-        if (isResponseTrue) {
-          console.log("Downloading Model") //if true
-        } else {
-          console.error('Already Installed'); //if false
+        if (!response.ok) {
+            throw new Error(`HTTP ${response.status}: ${await response.json()}`);
        }
-      })
-      .catch(error => {
-        console.error('Error performing download:', error);
-      });
-  }
-  
-  
-	async generateTts(text, voiceId){
+        const voiceData = await response.json();
+        const voices = voiceData.map((voice) => ({
+            id: voice.name,
+            name: voice.id, // this is the issue!!!
+            voice_id: voice.id, // this is the issue!!!
+            //preview_url: false,
+            lang: voice.lang,
+        }));
+        return voices;
+    }
+
+    async fetchTtsVoiceIds() {
+        throwIfModuleMissing();
+        const endpoint = `${getApiUrl()}/api/coqui-tts/speaker_id`;
+        const response = await doExtrasFetch(endpoint);
+
+        if (!response.ok) {
+            throw new Error(`HTTP ${response.status}: ${await response.json()}`);
+        }
+        const voiceData = await response.json();
+        const voices = voiceData.map((voice) => ({
+            id: voice.name,
+            name: voice.id, //add filename here
+            voice_id: voice.id,
+            //preview_url: false,
+            //preview_url: `${getApiUrl()}/api/coqui-tts/download?model=${voice.id}`,
+            //http://localhost:5100/api/coqui-tts/speaker_id/tts_models/en/ljspeech/speedy-speech
+            lang: voice.lang,
+        }));
+        return voices;
+    }
+
+    sampleTtsVoice(voiceId) {
+        // Get the selected values of speaker and language
+        const speakerSelect = document.getElementById('coqui_speaker');
+        const languageSelect = document.getElementById('coqui_language');
+        const selectedSpeaker = speakerSelect.value;
+        const selectedLanguage = languageSelect.value;
+
+        // Construct the URL with the selected values
+        const url = `${getApiUrl()}/api/coqui-tts/tts?text=The%20Quick%20Brown%20Fox%20Jumps%20Over%20the%20Lazy%20Dog.&speaker_id=${voiceId}&style_wav=&language_id=${selectedLanguage}&mspker=${selectedSpeaker}`;
+
+        doExtrasFetch(url)
+            .then(response => response.blob())
+            .then(blob => {
+                const audioUrl = URL.createObjectURL(blob);
+                // Play the audio
+                const audio = new Audio(audioUrl);
+                audio.play();
+            })
+            .catch(error => {
+                console.error('Error performing TTS request:', error);
+            });
+    }
+
+    previewTtsVoice(voiceId) { //button on avail voices
+        throwIfModuleMissing();
+        const url = `${getApiUrl()}/api/coqui-tts/download?model=${voiceId}`;
+
+        doExtrasFetch(url)
+            .then(response => response.text()) // Expecting a text response
+            .then(responseText => {
+                const isResponseTrue = responseText.trim().toLowerCase() === 'true';
+
+                if (isResponseTrue) {
+                    console.log("Downloading Model") //if true
+                } else {
+                    console.error('Already Installed'); //if false
+                }
+            })
+            .catch(error => {
+                console.error('Error performing download:', error);
+            });
+    }
+
+
+    async generateTts(text, voiceId) {
        const response = await this.fetchTtsGeneration(text, voiceId)
        return response
    }

-	async fetchTtsGeneration(inputText, voiceId) {
-    console.info(`Generating new TTS for voice_id ${voiceId}`);
-    const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/tts?text=${encodeURIComponent(inputText)}&speaker_id=${voiceId}`);
-    if (!response.ok) {
-        toastr.error(response.statusText, 'TTS Generation Failed');
-        throw new Error(`HTTP ${response.status}: ${await response.text()}`);
-    }
+    async fetchTtsGeneration(inputText, voiceId) {
+        throwIfModuleMissing();
+        console.info(`Generating new TTS for voice_id ${voiceId}`);
+        const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/tts?text=${encodeURIComponent(inputText)}&speaker_id=${voiceId}`);
+        if (!response.ok) {
+            toastr.error(response.statusText, 'TTS Generation Failed');
+            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
+        }
        if (!response.ok) {
            toastr.error(response.statusText, 'TTS Generation Failed');
            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
--- a/public/style.css
+++ b/public/style.css
@ -4243,6 +4243,10 @@ toolcool-color-picker {
    padding: 5px;
 }

+.flex {
+    display: flex;
+}
+
 .flex-container {
    display: flex;
    gap: 5px;
@ -4849,6 +4853,10 @@ body.waifuMode .zoomed_avatar {
    gap: 5px;
 }

+.flexGap10 {
+    gap: 10px;
+}
+
 .timestamp {
    font-size: calc(var(--mainFontSize) * 0.7);
    font-weight: 400;