Merge pull request #1021 from ouoertheo/ouoertheo/tts-ui-voicemap

TTS Voice map UI and Coqui UI improvements
2025-03-06 04:38:21 +01:00 · 2023-08-28 23:57:22 +03:00 · 2023-08-28 23:57:22 +03:00 · f1b91620b6
commit f1b91620b6
parent ae8dbcc6d0 9ac2281a8f
11 changed files with 638 additions and 271 deletions
--- a/public/css/toggle-dependent.css
+++ b/public/css/toggle-dependent.css
@ -1,4 +1,3 @@
-body.tts .mes[is_user="true"] .mes_narrate,
 body.tts .mes[is_system="true"] .mes_narrate {
    display: none;
 }
@ -364,4 +363,4 @@ body.movingUI #groupMemberListPopout {

 body.noShadows * {
    text-shadow: none !important;
-}
+}
--- a/public/scripts/extensions/tts/coqui.js
+++ b/public/scripts/extensions/tts/coqui.js
@ -5,13 +5,16 @@ TODO:
 */

 import { doExtrasFetch, extension_settings, getApiUrl, getContext, modules, ModuleWorkerWrapper } from "../../extensions.js"
+import { callPopup } from "../../../script.js"
+import { initVoiceMap } from "./index.js"

 export { CoquiTtsProvider }

 const DEBUG_PREFIX = "<Coqui TTS module> ";
 const UPDATE_INTERVAL = 1000;

-let charactersList = []; // Updated with module worker
+let inApiCall = false;
+let voiceIdList = []; // Updated with module worker
 let coquiApiModels = {}; // Initialized only once
 let coquiApiModelsFull = {}; // Initialized only once
 let coquiLocalModels = []; // Initialized only once
@ -39,16 +42,11 @@ const languageLabels = {
    "ja": "Japanese"
 }

-
-const defaultSettings = {
-    voiceMap: "",
-    voiceMapDict: {}
-}
-
 function throwIfModuleMissing() {
    if (!modules.includes('coqui-tts')) {
-        toastr.error(`Add coqui-tts to enable-modules and restart the Extras API.`, "Coqui TTS module not loaded.", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-        throw new Error(DEBUG_PREFIX, `Coqui TTS module not loaded.`);
+        const message = `Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.`
+        // toastr.error(message, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+        throw new Error(DEBUG_PREFIX, message);
    }
 }

@ -57,46 +55,18 @@ function resetModelSettings() {
    $("#coqui_api_model_settings_speaker").val("none");
 }

-function updateCharactersList() {
-    let currentcharacters = new Set();
-    const context = getContext();
-    for (const i of context.characters) {
-        currentcharacters.add(i.name);
-    }
-
-    currentcharacters = Array.from(currentcharacters);
-    currentcharacters.unshift(context.name1);
-
-    if (JSON.stringify(charactersList) !== JSON.stringify(currentcharacters)) {
-        charactersList = currentcharacters
-
-        $('#coqui_character_select')
-            .find('option')
-            .remove()
-            .end()
-            .append('<option value="none">Select Character</option>')
-            .val('none')
-
-        for (const charName of charactersList) {
-            $("#coqui_character_select").append(new Option(charName, charName));
-        }
-
-        console.debug(DEBUG_PREFIX, "Updated character list to:", charactersList);
-    }
-}
-
 class CoquiTtsProvider {
    //#############################//
    //  Extension UI and Settings  //
    //#############################//

-    static instance;
-    settings = {};
+    settings

-    // Singleton to allow acces to instance in event functions
-    constructor() {
-        if (CoquiTtsProvider.instance === undefined)
-            CoquiTtsProvider.instance = this;
+    defaultSettings = {
+        voiceMap: {},
+        customVoices: {},
+        voiceIds: [],
+        voiceMapDict: {}
    }

    get settingsHtml() {
@ -104,13 +74,15 @@ class CoquiTtsProvider {
        <div class="flex wide100p flexGap10 alignitemscenter">
            <div>
                <div style="flex: 50%;">
-                    <label for="coqui_character_select">Character:</label>
-                    <select id="coqui_character_select">
+                    <small>To use CoquiTTS, select the origin, language, and model, then click Add Voice. The voice will then be available to add to a character. Voices are saved globally. </small><br>
+                    <label for="coqui_voicename_select">Select Saved Voice:</label>
+                    <select id="coqui_voicename_select">
                        <!-- Populated by JS -->
                    </select>
-
-                    <input id="coqui_remove_char_mapping" class="menu_button" type="button" value="Remove from Voice Map" />
-
+                    <div class="tts_block">
+                        <input id="coqui_remove_voiceId_mapping" class="menu_button" type="button" value="Remove Voice" />
+                        <input id="coqui_add_voiceId_mapping" class="menu_button" type="button" value="Add Voice" />
+                    </div>
                    <label for="coqui_model_origin">Models:</label>
                    <select id="coqui_model_origin">gpu_mode
                        <option value="none">Select Origin</option>
@ -139,7 +111,7 @@ class CoquiTtsProvider {
                        <span id="coqui_api_model_install_status">Model installed on extras server</span>
                        <input id="coqui_api_model_install_button" class="menu_button" type="button" value="Install" />
                    </div>
-                    
+
                    <div id="coqui_local_model_div">
                        <select id="coqui_local_model_name">
                            <!-- Populated by JS and request -->
@ -153,13 +125,9 @@ class CoquiTtsProvider {
        return html
    }

-    loadSettings(settings) {
-        if (Object.keys(this.settings).length === 0) {
-            Object.assign(this.settings, defaultSettings)
-        }
-
+    async loadSettings(settings) {
        // Only accept keys defined in defaultSettings
-        this.settings = defaultSettings;
+        this.settings = this.defaultSettings

        for (const key in settings) {
            if (key in this.settings) {
@ -169,7 +137,8 @@ class CoquiTtsProvider {
            }
        }

-        CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+        await initLocalModels();
+        this.updateCustomVoices(); // Overide any manual modification

        $("#coqui_api_model_div").hide();
        $("#coqui_local_model_div").hide();
@ -180,70 +149,123 @@ class CoquiTtsProvider {
        $("#coqui_api_model_install_status").hide();
        $("#coqui_api_model_install_button").hide();

-        $("#coqui_model_origin").on("change", CoquiTtsProvider.onModelOriginChange);
-        $("#coqui_api_language").on("change", CoquiTtsProvider.onModelLanguageChange);
-        $("#coqui_api_model_name").on("change", CoquiTtsProvider.onModelNameChange);
-        $("#coqui_remove_char_mapping").on("click", CoquiTtsProvider.onRemoveClick);
+        let that = this
+        $("#coqui_model_origin").on("change", function () { that.onModelOriginChange() });
+        $("#coqui_api_language").on("change", function () { that.onModelLanguageChange() });
+        $("#coqui_api_model_name").on("change", function () { that.onModelNameChange() });

-        updateCharactersList();
+        $("#coqui_remove_voiceId_mapping").on("click", function () { that.onRemoveClick() });
+        $("#coqui_add_voiceId_mapping").on("click", function () { that.onAddClick() });

        // Load coqui-api settings from json file
-        fetch("/scripts/extensions/tts/coqui_api_models_settings.json")
+        await fetch("/scripts/extensions/tts/coqui_api_models_settings.json")
        .then(response => response.json())
        .then(json => {
            coquiApiModels = json;
            console.debug(DEBUG_PREFIX,"initialized coqui-api model list to", coquiApiModels);
+            /*
+            $('#coqui_api_language')
+                .find('option')
+                .remove()
+                .end()
+                .append('<option value="none">Select model language</option>')
+                .val('none');
+
+            for(let language in coquiApiModels) {
+                $("#coqui_api_language").append(new Option(languageLabels[language],language));
+                console.log(DEBUG_PREFIX,"added language",language);
+            }*/
        });

        // Load coqui-api FULL settings from json file
-        fetch("/scripts/extensions/tts/coqui_api_models_settings_full.json")
+        await fetch("/scripts/extensions/tts/coqui_api_models_settings_full.json")
        .then(response => response.json())
        .then(json => {
            coquiApiModelsFull = json;
            console.debug(DEBUG_PREFIX,"initialized coqui-api full model list to", coquiApiModelsFull);
+            /*
+            $('#coqui_api_full_language')
+                .find('option')
+                .remove()
+                .end()
+                .append('<option value="none">Select model language</option>')
+                .val('none');
+
+            for(let language in coquiApiModelsFull) {
+                $("#coqui_api_full_language").append(new Option(languageLabels[language],language));
+                console.log(DEBUG_PREFIX,"added language",language);
+            }*/
        });
    }

-    static updateVoiceMap() {
-        CoquiTtsProvider.instance.settings.voiceMap = "";
-        for (let i in CoquiTtsProvider.instance.settings.voiceMapDict) {
-            const voice_settings = CoquiTtsProvider.instance.settings.voiceMapDict[i];
-            CoquiTtsProvider.instance.settings.voiceMap += i + ":" + voice_settings["model_id"];
+    // Perform a simple readiness check by trying to fetch voiceIds
+    async checkReady(){
+        throwIfModuleMissing()
+        await this.fetchTtsVoiceObjects()
+    }

-            if (voice_settings["model_language"] != null)
-                CoquiTtsProvider.instance.settings.voiceMap += "[" + voice_settings["model_language"] + "]";
+    updateCustomVoices() {
+        // Takes voiceMapDict and converts it to a string to save to voiceMap
+        this.settings.customVoices = {};
+        for (let voiceName in this.settings.voiceMapDict) {
+            const voiceId = this.settings.voiceMapDict[voiceName];
+            this.settings.customVoices[voiceName] = voiceId["model_id"];

-            if (voice_settings["model_speaker"] != null)
-                CoquiTtsProvider.instance.settings.voiceMap += "[" + voice_settings["model_speaker"] + "]";
+            if (voiceId["model_language"] != null)
+                this.settings.customVoices[voiceName] += "[" + voiceId["model_language"] + "]";

-            CoquiTtsProvider.instance.settings.voiceMap += ",";
+            if (voiceId["model_speaker"] != null)
+                this.settings.customVoices[voiceName] += "[" + voiceId["model_speaker"] + "]";
        }
-        $("#tts_voice_map").val(CoquiTtsProvider.instance.settings.voiceMap);
-        //extension_settings.tts.Coqui = extension_settings.tts.Coqui;
+
+        // Update UI select list with voices
+        $("#coqui_voicename_select").empty()
+        $('#coqui_voicename_select')
+            .find('option')
+            .remove()
+            .end()
+            .append('<option value="none">Select Voice</option>')
+            .val('none')
+        for (const voiceName in this.settings.voiceMapDict) {
+            $("#coqui_voicename_select").append(new Option(voiceName, voiceName));
+        }
+
+        this.onSettingsChange()
    }

    onSettingsChange() {
-        //console.debug(DEBUG_PREFIX, "Settings changes", CoquiTtsProvider.instance.settings);
-        CoquiTtsProvider.updateVoiceMap();
+        console.debug(DEBUG_PREFIX, "Settings changes", this.settings);
+        extension_settings.tts.Coqui = this.settings;
    }

-    async onApplyClick() {
-        const character = $("#coqui_character_select").val();
+    async onRefreshClick() {
+        this.checkReady()
+    }
+
+    async onAddClick() {
+        if (inApiCall) {
+            return; //TODO: block dropdown
+        }
+
+        // Ask user for voiceId name to save voice
+        const voiceName = await callPopup('<h3>Name of Coqui voice to add to voice select dropdown:</h3>', 'input')
+
        const model_origin = $("#coqui_model_origin").val();
        const model_language = $("#coqui_api_language").val();
        const model_name = $("#coqui_api_model_name").val();
        let model_setting_language = $("#coqui_api_model_settings_language").val();
        let model_setting_speaker = $("#coqui_api_model_settings_speaker").val();

-        if (character === "none") {
-            toastr.error(`Character not selected, please select one.`, DEBUG_PREFIX + " voice mapping character", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+
+        if (!voiceName) {
+            toastr.error(`Voice name empty, please enter one.`, DEBUG_PREFIX + " voice mapping voice name", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+            this.updateCustomVoices(); // Overide any manual modification
            return;
        }

        if (model_origin == "none") {
            toastr.error(`Origin not selected, please select one.`, DEBUG_PREFIX + " voice mapping origin", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+            this.updateCustomVoices(); // Overide any manual modification
            return;
        }

@ -252,25 +274,25 @@ class CoquiTtsProvider {

            if (model_name == "none") {
                toastr.error(`Model not selected, please select one.`, DEBUG_PREFIX + " voice mapping model", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-                CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+                this.updateCustomVoices(); // Overide any manual modification
                return;
            }

-            CoquiTtsProvider.instance.settings.voiceMapDict[character] = { model_type: "local", model_id: "local/" + model_id };
-            console.debug(DEBUG_PREFIX, "Registered new voice map: ", character, ":", CoquiTtsProvider.instance.settings.voiceMapDict[character]);
-            CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+            this.settings.voiceMapDict[voiceName] = { model_type: "local", model_id: "local/" + model_id };
+            console.debug(DEBUG_PREFIX, "Registered new voice map: ", voiceName, ":", this.settings.voiceMapDict[voiceName]);
+            this.updateCustomVoices(); // Overide any manual modification
            return;
        }

        if (model_language == "none") {
            toastr.error(`Language not selected, please select one.`, DEBUG_PREFIX + " voice mapping language", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+            this.updateCustomVoices(); // Overide any manual modification
            return;
        }

        if (model_name == "none") {
            toastr.error(`Model not selected, please select one.`, DEBUG_PREFIX + " voice mapping model", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-            CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification
+            this.updateCustomVoices(); // Overide any manual modification
            return;
        }

@ -299,45 +321,51 @@ class CoquiTtsProvider {
            return;
        }

-        console.debug(DEBUG_PREFIX, "Current voice map: ", CoquiTtsProvider.instance.settings.voiceMap);
+        console.debug(DEBUG_PREFIX, "Current custom voices: ", this.settings.customVoices);

-        CoquiTtsProvider.instance.settings.voiceMapDict[character] = { model_type: "coqui-api", model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker };
+        this.settings.voiceMapDict[voiceName] = { model_type: "coqui-api", model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker };

-        console.debug(DEBUG_PREFIX, "Registered new voice map: ", character, ":", CoquiTtsProvider.instance.settings.voiceMapDict[character]);
+        console.debug(DEBUG_PREFIX, "Registered new voice map: ", voiceName, ":", this.settings.voiceMapDict[voiceName]);

-        CoquiTtsProvider.updateVoiceMap();
+        this.updateCustomVoices();
+        initVoiceMap() // Update TTS extension voiceMap

-        let successMsg = character + ":" + model_id;
+        let successMsg = voiceName + ":" + model_id;
        if (model_setting_language != null)
            successMsg += "[" + model_setting_language + "]";
        if (model_setting_speaker != null)
            successMsg += "[" + model_setting_speaker + "]";
        toastr.info(successMsg, DEBUG_PREFIX + " voice map updated", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+
        return
    }

-    // DBG: assume voiceName is correct
-    // TODO: check voice is correct
    async getVoice(voiceName) {
-        console.log(DEBUG_PREFIX, "getVoice", voiceName);
-        const output = { voice_id: voiceName };
-        return output;
+        let match = await this.fetchTtsVoiceObjects()
+        match = match.filter(
+            voice => voice.name == voiceName
+        )[0]
+        if (!match) {
+            throw `TTS Voice name ${voiceName} not found in CoquiTTS Provider voice list`
+        }
+        return match;
    }

-    static async onRemoveClick() {
-        const character = $("#coqui_character_select").val();
+    async onRemoveClick() {
+        const voiceName = $("#coqui_voicename_select").val();

-        if (character === "none") {
-            toastr.error(`Character not selected, please select one.`, DEBUG_PREFIX + " voice mapping character", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
+        if (voiceName === "none") {
+            toastr.error(`Voice not selected, please select one.`, DEBUG_PREFIX + " voice mapping voiceId", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
            return;
        }

        // Todo erase from voicemap
-        delete (CoquiTtsProvider.instance.settings.voiceMapDict[character]);
-        CoquiTtsProvider.updateVoiceMap(); // TODO
+        delete (this.settings.voiceMapDict[voiceName]);
+        this.updateCustomVoices();
+        initVoiceMap() // Update TTS extension voiceMap
    }

-    static async onModelOriginChange() {
+    async onModelOriginChange() {
        throwIfModuleMissing()
        resetModelSettings();
        const model_origin = $('#coqui_model_origin').val();
@ -346,13 +374,10 @@ class CoquiTtsProvider {
            $("#coqui_local_model_div").hide();
            $("#coqui_api_model_div").hide();
        }
-        
+
        // show coqui model selected list (SAFE)
        if (model_origin == "coqui-api") {
            $("#coqui_local_model_div").hide();
-            $("#coqui_api_model_div").hide();
-            $("#coqui_api_model_name").hide();
-            $("#coqui_api_model_settings").hide();

            $('#coqui_api_language')
                .find('option')
@ -375,9 +400,6 @@ class CoquiTtsProvider {
        // show coqui model full list (UNSAFE)
        if (model_origin == "coqui-api-full") {
            $("#coqui_local_model_div").hide();
-            $("#coqui_api_model_div").hide();
-            $("#coqui_api_model_name").hide();
-            $("#coqui_api_model_settings").hide();

            $('#coqui_api_language')
                .find('option')
@ -405,7 +427,7 @@ class CoquiTtsProvider {
        }
    }

-    static async onModelLanguageChange() {
+    async onModelLanguageChange() {
        throwIfModuleMissing();
        resetModelSettings();
        $("#coqui_api_model_settings").hide();
@ -438,7 +460,7 @@ class CoquiTtsProvider {
            }
    }

-    static async onModelNameChange() {
+    async onModelNameChange() {
        throwIfModuleMissing();
        resetModelSettings();
        $("#coqui_api_model_settings").hide();
@ -529,6 +551,8 @@ class CoquiTtsProvider {
                $("#coqui_api_model_install_status").text("Model not found on extras server");
            }

+            const onModelNameChange_pointer = this.onModelNameChange;
+
            $("#coqui_api_model_install_button").off("click").on("click", async function () {
                try {
                    $("#coqui_api_model_install_status").text("Downloading model...");
@ -542,7 +566,7 @@ class CoquiTtsProvider {
                    if (apiResult["status"] == "done") {
                        $("#coqui_api_model_install_status").text("Model installed and ready to use!");
                        $("#coqui_api_model_install_button").hide();
-                        CoquiTtsProvider.onModelNameChange();
+                        onModelNameChange_pointer();
                    }

                    if (apiResult["status"] == "downloading") {
@ -553,7 +577,7 @@ class CoquiTtsProvider {
                } catch (error) {
                    console.error(error)
                    toastr.error(error, DEBUG_PREFIX + " error with model download", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
-                    CoquiTtsProvider.onModelNameChange();
+                    onModelNameChange_pointer();
                }
                // will refresh model status
            });
@ -656,6 +680,8 @@ class CoquiTtsProvider {
    // ts_models/ja/kokoro/tacotron2-DDC
    async generateTts(text, voiceId) {
        throwIfModuleMissing()
+        voiceId = this.settings.customVoices[voiceId]
+
        const url = new URL(getApiUrl());
        url.pathname = '/api/text-to-speech/coqui/generate-tts';

@ -703,8 +729,11 @@ class CoquiTtsProvider {
    }

    // Dirty hack to say not implemented
-    async fetchTtsVoiceIds() {
-        return [{ name: "Voice samples not implemented for coqui TTS yet, search for the model samples online", voice_id: "", lang: "", }]
+    async fetchTtsVoiceObjects() {
+        const voiceIds = Object
+            .keys(this.settings.voiceMapDict)
+            .map(voice => ({ name: voice, voice_id: voice, preview_url: false }));
+        return voiceIds
    }

    // Do nothing
@ -717,13 +746,7 @@ class CoquiTtsProvider {
    }
 }

-//#############################//
-//  Module Worker              //
-//#############################//
-
-async function moduleWorker() {
-    updateCharactersList();
-
+async function initLocalModels() {
    if (!modules.includes('coqui-tts'))
        return

@ -748,9 +771,3 @@ async function moduleWorker() {
        coquiLocalModelsReceived = true;
    }
 }
-
-$(document).ready(function () {
-    const wrapper = new ModuleWorkerWrapper(moduleWorker);
-    setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL);
-    moduleWorker();
-})
--- a/public/scripts/extensions/tts/edge.js
+++ b/public/scripts/extensions/tts/edge.js
@ -2,6 +2,7 @@ import { getRequestHeaders } from "../../../script.js"
 import { getApiUrl } from "../../extensions.js"
 import { doExtrasFetch, modules } from "../../extensions.js"
 import { getPreviewString } from "./index.js"
+import { saveTtsProviderSettings } from "./index.js"

 export { EdgeTtsProvider }

@ -30,9 +31,10 @@ class EdgeTtsProvider {
    onSettingsChange() {
        this.settings.rate = Number($('#edge_tts_rate').val());
        $('#edge_tts_rate_output').text(this.settings.rate);
+        saveTtsProviderSettings()
    }

-    loadSettings(settings) {
+    async loadSettings(settings) {
        // Pupulate Provider UI given input settings
        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings")
@ -51,12 +53,20 @@ class EdgeTtsProvider {

        $('#edge_tts_rate').val(this.settings.rate || 0);
        $('#edge_tts_rate_output').text(this.settings.rate || 0);
+        $('#edge_tts_rate').on("input", () => {this.onSettingsChange()})
+        await this.checkReady()

        console.info("Settings loaded")
    }


-    async onApplyClick() {
+    // Perform a simple readiness check by trying to fetch voiceIds
+    async checkReady(){
+        throwIfModuleMissing()
+        await this.fetchTtsVoiceObjects()
+    }
+
+    async onRefreshClick() {
        return
    }

@ -66,7 +76,7 @@ class EdgeTtsProvider {

    async getVoice(voiceName) {
        if (this.voices.length == 0) {
-            this.voices = await this.fetchTtsVoiceIds()
+            this.voices = await this.fetchTtsVoiceObjects()
        }
        const match = this.voices.filter(
            voice => voice.name == voiceName
@ -85,7 +95,7 @@ class EdgeTtsProvider {
    //###########//
    // API CALLS //
    //###########//
-    async fetchTtsVoiceIds() {
+    async fetchTtsVoiceObjects() {
        throwIfModuleMissing()

        const url = new URL(getApiUrl());
@ -144,8 +154,9 @@ class EdgeTtsProvider {
 }
 function throwIfModuleMissing() {
    if (!modules.includes('edge-tts')) {
-        toastr.error(`Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.`)
-        throw new Error(`Edge TTS module not loaded.`)
+        const message = `Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.`
+        // toastr.error(message)
+        throw new Error(message)
    }
 }

--- a/public/scripts/extensions/tts/elevenlabs.js
+++ b/public/scripts/extensions/tts/elevenlabs.js
@ -1,5 +1,4 @@
-import { deepClone } from "../../utils.js";
-
+import { saveTtsProviderSettings } from "./index.js"
 export { ElevenLabsTtsProvider }

 class ElevenLabsTtsProvider {
@ -25,16 +24,19 @@ class ElevenLabsTtsProvider {

    get settingsHtml() {
        let html = `
-        <label for="elevenlabs_tts_api_key">API Key</label>
-        <input id="elevenlabs_tts_api_key" type="text" class="text_pole" placeholder="<API Key>"/>
-        <label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
-        <input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.05" />
-        <label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
-        <input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.05" />
-        <label class="checkbox_label" for="elevenlabs_tts_multilingual">
-            <input id="elevenlabs_tts_multilingual" type="checkbox" value="${this.defaultSettings.multilingual}" />
-            Enable Multilingual
-        </label>
+        <div class="elevenlabs_tts_settings">
+            <label for="elevenlabs_tts_api_key">API Key</label>
+            <input id="elevenlabs_tts_api_key" type="text" class="text_pole" placeholder="<API Key>"/>
+            <input id="eleven_labs_connect" class="menu_button" type="button" value="Connect" />
+            <label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
+            <input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.05" />
+            <label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
+            <input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.05" />
+            <label class="checkbox_label" for="elevenlabs_tts_multilingual">
+                <input id="elevenlabs_tts_multilingual" type="checkbox" value="${this.defaultSettings.multilingual}" />
+                Enable Multilingual
+            </label>
+        </div>
        `
        return html
    }
@ -44,39 +46,49 @@ class ElevenLabsTtsProvider {
        this.settings.stability = $('#elevenlabs_tts_stability').val()
        this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val()
        this.settings.multilingual = $('#elevenlabs_tts_multilingual').prop('checked')
+        saveTtsProviderSettings()
    }


-    loadSettings(settings) {
+    async loadSettings(settings) {
        // Pupulate Provider UI given input settings
-        if (!settings || Object.keys(settings).length == 0) {
+        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings")
        }

        // Only accept keys defined in defaultSettings
-        this.settings = deepClone(this.defaultSettings);
+        this.settings = this.defaultSettings

-        if (settings) {
-            for (const key in settings) {
-                if (key in this.settings) {
-                    this.settings[key] = settings[key]
-                } else {
-                    throw `Invalid setting passed to TTS Provider: ${key}`
-                }
+        for (const key in settings){
+            if (key in this.settings){
+                this.settings[key] = settings[key]
+            } else {
+                throw `Invalid setting passed to TTS Provider: ${key}`
            }
        }
-
        $('#elevenlabs_tts_stability').val(this.settings.stability)
        $('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost)
        $('#elevenlabs_tts_api_key').val(this.settings.apiKey)
        $('#tts_auto_generation').prop('checked', this.settings.multilingual)
+        $('#eleven_labs_connect').on('click', () => {this.onConnectClick()})
+        $('#elevenlabs_tts_settings').on('input',this.onSettingsChange)
+
+        await this.checkReady()
        console.info("Settings loaded")
    }

-    async onApplyClick() {
+    // Perform a simple readiness check by trying to fetch voiceIds
+    async checkReady(){
+        await this.fetchTtsVoiceObjects()
+    }
+
+    async onRefreshClick() {
+    }
+
+    async onConnectClick() {
        // Update on Apply click
        return await this.updateApiKey().catch( (error) => {
-            throw error
+            toastr.error(`ElevenLabs: ${error}`)
        })
    }

@ -85,11 +97,12 @@ class ElevenLabsTtsProvider {
        // Using this call to validate API key
        this.settings.apiKey = $('#elevenlabs_tts_api_key').val()

-        await this.fetchTtsVoiceIds().catch(error => {
+        await this.fetchTtsVoiceObjects().catch(error => {
            throw `TTS API key validation failed`
        })
        this.settings.apiKey = this.settings.apiKey
        console.debug(`Saved new API_KEY: ${this.settings.apiKey}`)
+        this.onSettingsChange()
    }

    //#################//
@ -98,7 +111,7 @@ class ElevenLabsTtsProvider {

    async getVoice(voiceName) {
        if (this.voices.length == 0) {
-            this.voices = await this.fetchTtsVoiceIds()
+            this.voices = await this.fetchTtsVoiceObjects()
        }
        const match = this.voices.filter(
            elevenVoice => elevenVoice.name == voiceName
@ -145,7 +158,7 @@ class ElevenLabsTtsProvider {
    //###########//
    // API CALLS //
    //###########//
-    async fetchTtsVoiceIds() {
+    async fetchTtsVoiceObjects() {
        const headers = {
            'xi-api-key': this.settings.apiKey
        }
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@ -13,6 +13,7 @@ export { talkingAnimation };

 const UPDATE_INTERVAL = 1000

+let voiceMapEntries = []
 let voiceMap = {} // {charName:voiceid, charName2:voiceid2}
 let audioControl
 let storedvalue = false;
@ -224,8 +225,8 @@ function debugTtsPlayback() {
    console.log(JSON.stringify(
        {
            "ttsProviderName": ttsProviderName,
+            "voiceMap": voiceMap,
            "currentMessageNumber": currentMessageNumber,
-            "isWorkerBusy": isWorkerBusy,
            "audioPaused": audioPaused,
            "audioJobQueue": audioJobQueue,
            "currentAudioJob": currentAudioJob,
@ -285,7 +286,7 @@ async function onTtsVoicesClick() {
    let popupText = ''

    try {
-        const voiceIds = await ttsProvider.fetchTtsVoiceIds()
+        const voiceIds = await ttsProvider.fetchTtsVoiceObjects()

        for (const voice of voiceIds) {
            popupText += `
@ -486,6 +487,12 @@ function loadSettings() {
    if (Object.keys(extension_settings.tts).length === 0) {
        Object.assign(extension_settings.tts, defaultSettings)
    }
+    for (const key in defaultSettings) {
+        if (!(key in extension_settings.tts)) {
+            extension_settings.tts[key] = defaultSettings[key]
+        }
+    }
+    $('#tts_provider').val(extension_settings.tts.currentProvider)
    $('#tts_enabled').prop(
        'checked',
        extension_settings.tts.enabled
@ -513,59 +520,17 @@ function setTtsStatus(status, success) {
    }
 }

-function parseVoiceMap(voiceMapString) {
-    let parsedVoiceMap = {}
-    for (const [charName, voiceId] of voiceMapString
-        .split(',')
-        .map(s => s.split(':'))) {
-        if (charName && voiceId) {
-            parsedVoiceMap[charName.trim()] = voiceId.trim()
-        }
-    }
-    return parsedVoiceMap
-}
-
-async function voicemapIsValid(parsedVoiceMap) {
-    let valid = true
-    for (const characterName in parsedVoiceMap) {
-        const parsedVoiceName = parsedVoiceMap[characterName]
-        try {
-            await ttsProvider.getVoice(parsedVoiceName)
-        } catch (error) {
-            console.error(error)
-            valid = false
-        }
-    }
-    return valid
-}
-
-async function updateVoiceMap() {
-    let isValidResult = false
-
-    const value = $('#tts_voice_map').val()
-    const parsedVoiceMap = parseVoiceMap(value)
-
-    isValidResult = await voicemapIsValid(parsedVoiceMap)
-    if (isValidResult) {
-        ttsProvider.settings.voiceMap = String(value)
-        // console.debug(`ttsProvider.voiceMap: ${ttsProvider.settings.voiceMap}`)
-        voiceMap = parsedVoiceMap
-        console.debug(`Saved new voiceMap: ${value}`)
-        saveSettingsDebounced()
-    } else {
-        throw 'Voice map is invalid, check console for errors'
-    }
-}
-
-function onApplyClick() {
+function onRefreshClick() {
    Promise.all([
-        ttsProvider.onApplyClick(),
-        updateVoiceMap()
+        ttsProvider.onRefreshClick(),
+        // updateVoiceMap()
    ]).then(() => {
        extension_settings.tts[ttsProviderName] = ttsProvider.settings
        saveSettingsDebounced()
        setTtsStatus('Successfully applied settings', true)
        console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`)
+        initVoiceMap()
+        updateVoiceMap()
    }).catch(error => {
        console.error(error)
        setTtsStatus(error, false)
@ -608,13 +573,14 @@ function onNarrateTranslatedOnlyClick() {
 // TTS Provider //
 //##############//

-function loadTtsProvider(provider) {
+async function loadTtsProvider(provider) {
    //Clear the current config and add new config
    $("#tts_provider_settings").html("")

    if (!provider) {
-        provider
+        return
    }
+
    // Init provider references
    extension_settings.tts.currentProvider = provider
    ttsProviderName = provider
@ -626,38 +592,210 @@ function loadTtsProvider(provider) {
        console.warn(`Provider ${ttsProviderName} not in Extension Settings, initiatilizing provider in settings`)
        extension_settings.tts[ttsProviderName] = {}
    }
-
-    // Load voicemap settings
-    let voiceMapFromSettings
-    if ("voiceMap" in extension_settings.tts[ttsProviderName]) {
-        voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap
-        voiceMap = parseVoiceMap(voiceMapFromSettings)
-    } else {
-        voiceMapFromSettings = ""
-        voiceMap = {}
-    }
-    $('#tts_voice_map').val(voiceMapFromSettings)
-    $('#tts_provider').val(ttsProviderName)
-
-    ttsProvider.loadSettings(extension_settings.tts[ttsProviderName])
+    await ttsProvider.loadSettings(extension_settings.tts[ttsProviderName])
+    await initVoiceMap()
 }

 function onTtsProviderChange() {
    const ttsProviderSelection = $('#tts_provider').val()
+    extension_settings.tts.currentProvider = ttsProviderSelection
    loadTtsProvider(ttsProviderSelection)
 }

-function onTtsProviderSettingsInput() {
-    ttsProvider.onSettingsChange()
-
-    // Persist changes to SillyTavern tts extension settings
-
+// Ensure that TTS provider settings are saved to extension settings.
+export function saveTtsProviderSettings() {
+    updateVoiceMap()
    extension_settings.tts[ttsProviderName] = ttsProvider.settings
    saveSettingsDebounced()
    console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`)
 }


+//###################//
+// voiceMap Handling //
+//###################//
+
+async function onChatChanged() {
+    await resetTtsPlayback()
+    await initVoiceMap()
+}
+
+function getCharacters(){
+    const context = getContext()
+    let characters = []
+    if (context.groupId === null){
+        // Single char chat
+        characters.push(context.name1)
+        characters.push(context.name2)
+    } else {
+        // Group chat
+        characters.push(context.name1)
+        const group = context.groups.find(group => context.groupId == group.id)
+        for (let member of group.members) {
+            // Remove suffix
+            if (member.endsWith('.png')){
+                member = member.slice(0, -4)
+            }
+            characters.push(member)
+        }
+    }
+    return characters
+}
+
+function sanitizeId(input) {
+  // Remove any non-alphanumeric characters except underscore (_) and hyphen (-)
+  let sanitized = input.replace(/[^a-zA-Z0-9-_]/g, '');
+
+  // Ensure first character is always a letter
+  if (!/^[a-zA-Z]/.test(sanitized)) {
+    sanitized = 'element_' + sanitized;
+  }
+
+  return sanitized;
+}
+
+function parseVoiceMap(voiceMapString) {
+    let parsedVoiceMap = {}
+    for (const [charName, voiceId] of voiceMapString
+        .split(',')
+        .map(s => s.split(':'))) {
+        if (charName && voiceId) {
+            parsedVoiceMap[charName.trim()] = voiceId.trim()
+        }
+    }
+    return parsedVoiceMap
+}
+
+
+
+/**
+ * Apply voiceMap based on current voiceMapEntries
+ */
+function updateVoiceMap() {
+    const tempVoiceMap = {}
+    for (const voice of voiceMapEntries){
+        if (voice.voiceId === null){
+            continue
+        }
+        tempVoiceMap[voice.name] = voice.voiceId
+    }
+    if (Object.keys(tempVoiceMap).length !== 0){
+        voiceMap = tempVoiceMap
+        console.log(`Voicemap updated to ${JSON.stringify(voiceMap)}`)
+    }
+    Object.assign(extension_settings.tts[ttsProviderName].voiceMap, voiceMap)
+    saveSettingsDebounced()
+}
+
+class VoiceMapEntry {
+    name
+    voiceId
+    selectElement
+    constructor (name, voiceId='disabled') {
+        this.name = name
+        this.voiceId = voiceId
+        this.selectElement = null
+    }
+
+    addUI(voiceIds){
+        let sanitizedName = sanitizeId(this.name)
+        let template = `
+            <div class='tts_voicemap_block_char flex-container flexGap5'>
+                <span id='tts_voicemap_char_${sanitizedName}'>${this.name}</span>
+                <select id='tts_voicemap_char_${sanitizedName}_voice'>
+                    <option>disabled</option>
+                </select>
+            </div>
+        `
+        $('#tts_voicemap_block').append(template)
+
+        // Populate voice ID select list
+        for (const voiceId of voiceIds){
+            const option = document.createElement('option');
+            option.innerText = voiceId.name;
+            option.value = voiceId.name;
+            $(`#tts_voicemap_char_${sanitizedName}_voice`).append(option)
+        }
+
+        this.selectElement = $(`#tts_voicemap_char_${sanitizedName}_voice`)
+        this.selectElement.on('change', args => this.onSelectChange(args))
+        this.selectElement.val(this.voiceId)
+    }
+
+    onSelectChange(args) {
+        this.voiceId = this.selectElement.find(':selected').val()
+        updateVoiceMap()
+    }
+}
+
+/**
+ * Init voiceMapEntries for character select list.
+ *
+ */
+export async function initVoiceMap(){
+    // Clear existing voiceMap state
+    $('#tts_voicemap_block').empty()
+    voiceMapEntries = []
+
+    // Gate initialization if not enabled or TTS Provider not ready. Prevents error popups.
+    const enabled = $('#tts_enabled').is(':checked')
+    if (!enabled){
+        return
+    }
+
+    // Keep errors inside extension UI rather than toastr. Toastr errors for TTS are annoying.
+    try {
+        await ttsProvider.checkReady()
+    } catch (error) {
+        const message = `TTS Provider not ready. ${error}`
+        setTtsStatus(message, false)
+        return
+    }
+
+    setTtsStatus("TTS Provider Loaded", true)
+
+    // Get characters in current chat
+    const characters = getCharacters()
+
+    // Get saved voicemap from provider settings, handling new and old representations
+    let voiceMapFromSettings = {}
+    if ("voiceMap" in extension_settings.tts[ttsProviderName]) {
+        // Handle previous representation
+        if (typeof extension_settings.tts[ttsProviderName].voiceMap === "string"){
+            voiceMapFromSettings = parseVoiceMap(extension_settings.tts[ttsProviderName].voiceMap)
+        // Handle new representation
+        } else if (typeof extension_settings.tts[ttsProviderName].voiceMap === "object"){
+            voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap
+        }
+    }
+
+    // Get voiceIds from provider
+    let voiceIdsFromProvider
+    try {
+        voiceIdsFromProvider = await ttsProvider.fetchTtsVoiceObjects()
+    }
+    catch {
+        toastr.error("TTS Provider failed to return voice ids.")
+    }
+
+    // Build UI using VoiceMapEntry objects
+    for (const character of characters){
+        if (character === "SillyTavern System"){
+            continue
+        }
+        // Check provider settings for voiceIds
+        let voiceId
+        if (character in voiceMapFromSettings){
+            voiceId = voiceMapFromSettings[character]
+        } else {
+            voiceId = 'disabled'
+        }
+        const voiceMapEntry = new VoiceMapEntry(character, voiceId)
+        voiceMapEntry.addUI(voiceIdsFromProvider)
+        voiceMapEntries.push(voiceMapEntry)
+    }
+    updateVoiceMap()
+}

 $(document).ready(function () {
    function addExtensionControls() {
@ -669,10 +807,13 @@ $(document).ready(function () {
                    <div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
                </div>
                <div class="inline-drawer-content">
-                    <div>
-                        <span>Select TTS Provider</span> </br>
-                        <select id="tts_provider">
+                    <div id="tts_status">
+                    </div>
+                    <span>Select TTS Provider</span> </br>
+                    <div class="tts_block">
+                        <select id="tts_provider" class="flex1">
                        </select>
+                        <input id="tts_refresh" class="menu_button" type="submit" value="Reload" />
                    </div>
                    <div>
                        <label class="checkbox_label" for="tts_enabled">
@ -696,16 +837,12 @@ $(document).ready(function () {
                            <small>Narrate only the translated text</small>
                        </label>
                    </div>
-                    <label>Voice Map</label>
-                    <textarea id="tts_voice_map" type="text" class="text_pole textarea_compact" rows="4"
-                        placeholder="Enter comma separated map of charName:ttsName. Example: \nAqua:Bella,\nYou:Josh,"></textarea>
-
-                    <div id="tts_status">
+                    <div id="tts_voicemap_block">
                    </div>
+                    <hr>
                    <form id="tts_provider_settings" class="inline-drawer-content">
                    </form>
                    <div class="tts_buttons">
-                        <input id="tts_apply" class="menu_button" type="submit" value="Apply" />
                        <input id="tts_voices" class="menu_button" type="submit" value="Available voices" />
                    </div>
                    </div>
@ -714,14 +851,13 @@ $(document).ready(function () {
        </div>
        `
        $('#extensions_settings').append(settingsHtml)
-        $('#tts_apply').on('click', onApplyClick)
+        $('#tts_refresh').on('click', onRefreshClick)
        $('#tts_enabled').on('click', onEnableClick)
        $('#tts_narrate_dialogues').on('click', onNarrateDialoguesClick);
        $('#tts_narrate_quoted').on('click', onNarrateQuotedClick);
        $('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick);
        $('#tts_auto_generation').on('click', onAutoGenerationClick);
        $('#tts_voices').on('click', onTtsVoicesClick)
-        $('#tts_provider_settings').on('input', onTtsProviderSettingsInput)
        for (const provider in ttsProviders) {
            $('#tts_provider').append($("<option />").val(provider).text(provider))
        }
@ -735,4 +871,6 @@ $(document).ready(function () {
    const wrapper = new ModuleWorkerWrapper(moduleWorker);
    setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL) // Init depends on all the things
    eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
+    eventSource.on(event_types.CHAT_CHANGED, onChatChanged)
+    eventSource.on(event_types.GROUP_UPDATED, onChatChanged)
 })
--- a/public/scripts/extensions/tts/novel.js
+++ b/public/scripts/extensions/tts/novel.js
@ -1,5 +1,6 @@
-import { getRequestHeaders } from "../../../script.js"
-import { getPreviewString } from "./index.js"
+import { getRequestHeaders, callPopup } from "../../../script.js"
+import { getPreviewString, saveTtsProviderSettings } from "./index.js"
+import { initVoiceMap } from "./index.js"

 export { NovelTtsProvider }

@ -14,24 +15,69 @@ class NovelTtsProvider {
    audioElement = document.createElement('audio')

    defaultSettings = {
-        voiceMap: {}
+        voiceMap: {},
+        customVoices: []
    }

    get settingsHtml() {
-        let html = `Use NovelAI's TTS engine.<br>
-        The Voice IDs in the preview list are only examples, as it can be any string of text. Feel free to try different options!<br>
-        <small><i>Hint: Save an API key in the NovelAI API settings to use it here.</i></small>`;
+        let html = `
+        <div class="novel_tts_hints">
+            <div>Use NovelAI's TTS engine.</div>
+            <div>
+                The default Voice IDs are only examples. Add custom voices and Novel will create a new random voice for it.
+                Feel free to try different options!
+            </div>
+            <i>Hint: Save an API key in the NovelAI API settings to use it here.</i>
+        </div>
+        <label for="tts-novel-custom-voices-add">Custom Voices</label>
+        <div class="tts_custom_voices">
+            <select id="tts-novel-custom-voices-select"><select>
+            <i id="tts-novel-custom-voices-add" class="tts-button fa-solid fa-plus fa-xl success" title="Add"></i>
+            <i id="tts-novel-custom-voices-delete" class="tts-button fa-solid fa-xmark fa-xl failure" title="Delete"></i>
+        </div>
+        `;
        return html;
    }

-    onSettingsChange() {
+
+    // Add a new Novel custom voice to provider
+    async addCustomVoice(){
+        const voiceName = await callPopup('<h3>Custom Voice name:</h3>', 'input')
+        this.settings.customVoices.push(voiceName)
+        this.populateCustomVoices()
+        initVoiceMap() // Update TTS extension voiceMap
+        saveTtsProviderSettings()
    }

-    loadSettings(settings) {
+    // Delete selected custom voice from provider
+    deleteCustomVoice() {
+        const selected = $("#tts-novel-custom-voices-select").find(':selected').val();
+        const voiceIndex = this.settings.customVoices.indexOf(selected);
+
+        if (voiceIndex !== -1) {
+            this.settings.customVoices.splice(voiceIndex, 1);
+        }
+        this.populateCustomVoices()
+        initVoiceMap() // Update TTS extension voiceMap
+        saveTtsProviderSettings()
+    }
+
+    // Create the UI dropdown list of voices in provider
+    populateCustomVoices(){
+        let voiceSelect = $("#tts-novel-custom-voices-select")
+        voiceSelect.empty()
+        this.settings.customVoices.forEach(voice => {
+            voiceSelect.append(`<option>${voice}</option>`)
+        })
+    }
+
+    async loadSettings(settings) {
        // Populate Provider UI given input settings
        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings")
        }
+        $("#tts-novel-custom-voices-add").on('click', () => (this.addCustomVoice()))
+        $("#tts-novel-custom-voices-delete").on('click',() => (this.deleteCustomVoice()))

        // Only accept keys defined in defaultSettings
        this.settings = this.defaultSettings
@ -44,11 +90,18 @@ class NovelTtsProvider {
            }
        }

+        this.populateCustomVoices()
+        await this.checkReady()
        console.info("Settings loaded")
    }

+    // Perform a simple readiness check by trying to fetch voiceIds
+    // Doesnt really do much for Novel, not seeing a good way to test this at the moment.
+    async checkReady(){
+        await this.fetchTtsVoiceObjects()
+    }

-    async onApplyClick() {
+    async onRefreshClick() {
        return
    }

@ -72,8 +125,8 @@ class NovelTtsProvider {
    //###########//
    // API CALLS //
    //###########//
-    async fetchTtsVoiceIds() {
-        const voices = [
+    async fetchTtsVoiceObjects() {
+        let voices = [
            { name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false },
            { name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false },
            { name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false },
@ -89,6 +142,12 @@ class NovelTtsProvider {
            { name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false },
        ];

+        // Add in custom voices to the map
+        let addVoices = this.settings.customVoices.map(voice =>
+            ({ name: voice, voice_id: voice, lang: 'en-US', preview_url: false })
+        )
+        voices = voices.concat(addVoices)
+
        return voices;
    }

--- a/public/scripts/extensions/tts/readme.md
+++ b/public/scripts/extensions/tts/readme.md
@ -0,0 +1,71 @@
+# Provider Requirements. 
+Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate.
+
+### class YourTtsProvider
+#### Required 
+Exported for use in extension index.js, and added to providers list in index.js
+1. generateTts(text, voiceId)
+2. fetchTtsVoiceObjects()
+3. onRefreshClick()
+4. checkReady()
+5. loadSettings(settingsObject)
+6. settings field
+7. settingsHtml field
+
+#### Optional
+1. previewTtsVoice() 
+2. separator field
+
+# Requirement Descriptions
+### generateTts(text, voiceId)
+Must return `audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']`
+Must take text to be rendered and the voiceId to identify the voice to be used
+
+### fetchTtsVoiceObjects()
+Required.
+Used by the TTS extension to get a list of voice objects from the provider.
+Must return an list of voice objects representing the available voices.
+1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
+2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
+3. preview_url: a URL to a local audio file that will be used to sample voices
+4. lang: OPTIONAL language string
+
+### getVoice(voiceName)
+Required.
+Must return a single voice object matching the provided voiceName. The voice object must have the following at least:
+1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
+2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
+3. preview_url: a URL to a local audio file that will be used to sample voices
+4. lang: OPTIONAL language indicator
+
+### onRefreshClick()
+Required.
+Users click this button to reconnect/reinit the selected provider.
+Responds to the user clicking the refresh button, which is intended to re-initialize the Provider into a working state, like retrying connections or checking if everything is loaded.
+
+### checkReady()
+Required.
+Return without error to let TTS extension know that the provider is ready.
+Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly.
+
+### loadSettings(settingsObject)
+Required. 
+Handle the input settings from the TTS extension on provider load.
+Put code in here to load your provider settings.
+
+### settings field
+Required, used for storing any provider state that needs to be saved.
+Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`.
+TTS extension doesn't expect any specific contents. 
+
+### settingsHtml field
+Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly.
+
+### previewTtsVoice()
+Optional.
+Function to handle playing previews of voice samples if no direct preview_url is available in fetchTtsVoiceObjects() response
+
+### separator field
+Optional.
+Used when narrate quoted text is enabled.
+Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...` 
--- a/public/scripts/extensions/tts/silerotts.js
+++ b/public/scripts/extensions/tts/silerotts.js
@ -1,4 +1,5 @@
 import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
+import { saveTtsProviderSettings } from "./index.js"

 export { SileroTtsProvider }

@ -8,6 +9,7 @@ class SileroTtsProvider {
    //########//

    settings
+    ready = false
    voices = []
    separator = ' .. '

@ -29,9 +31,10 @@ class SileroTtsProvider {
    onSettingsChange() {
        // Used when provider settings are updated from UI
        this.settings.provider_endpoint = $('#silero_tts_endpoint').val()
+        saveTtsProviderSettings()
    }

-    loadSettings(settings) {
+    async loadSettings(settings) {
        // Pupulate Provider UI given input settings
        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings")
@ -60,11 +63,19 @@ class SileroTtsProvider {
        }, 2000);

        $('#silero_tts_endpoint').val(this.settings.provider_endpoint)
+        $('#silero_tts_endpoint').on("input", () => {this.onSettingsChange()})
+
+        await this.checkReady()
+
        console.info("Settings loaded")
    }

+    // Perform a simple readiness check by trying to fetch voiceIds
+    async checkReady(){
+        await this.fetchTtsVoiceObjects()
+    }

-    async onApplyClick() {
+    async onRefreshClick() {
        return
    }

@ -74,7 +85,7 @@ class SileroTtsProvider {

    async getVoice(voiceName) {
        if (this.voices.length == 0) {
-            this.voices = await this.fetchTtsVoiceIds()
+            this.voices = await this.fetchTtsVoiceObjects()
        }
        const match = this.voices.filter(
            sileroVoice => sileroVoice.name == voiceName
@ -93,7 +104,7 @@ class SileroTtsProvider {
    //###########//
    // API CALLS //
    //###########//
-    async fetchTtsVoiceIds() {
+    async fetchTtsVoiceObjects() {
        const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`)
        if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${await response.json()}`)
--- a/public/scripts/extensions/tts/style.css
+++ b/public/scripts/extensions/tts/style.css
@ -50,4 +50,41 @@

 .voice_preview .fa-play {
    cursor: pointer;
-}
+}
+
+.tts-button {
+    margin: 0;
+    outline: none;
+    border: none;
+    cursor: pointer;
+    transition: 0.3s;
+    opacity: 0.7;
+    align-items: center;
+    justify-content: center;
+
+}
+
+.tts-button:hover {
+    opacity: 1;
+}
+
+.tts_block {
+    display: flex;
+    align-items: baseline;
+    column-gap: 5px;
+    flex-wrap: wrap;
+}
+
+.tts_custom_voices {
+    display: flex;
+    align-items: baseline;
+    gap: 5px;
+}
+
+.novel_tts_hints {
+    font-size: calc(0.9 * var(--mainFontSize));
+    display: flex;
+    flex-direction: column;
+    gap: 5px;
+    margin-bottom: 5px;
+}
--- a/public/scripts/extensions/tts/system.js
+++ b/public/scripts/extensions/tts/system.js
@ -1,7 +1,7 @@
 import { isMobile } from "../../RossAscends-mods.js";
 import { getPreviewString } from "./index.js";
 import { talkingAnimation } from './index.js';
-
+import { saveTtsProviderSettings } from "./index.js"
 export { SystemTtsProvider }

 /**
@ -80,6 +80,7 @@ class SystemTtsProvider {
    //########//

    settings
+    ready = false
    voices = []
    separator = ' ... '

@ -106,10 +107,10 @@ class SystemTtsProvider {
        this.settings.pitch = Number($('#system_tts_pitch').val());
        $('#system_tts_pitch_output').text(this.settings.pitch);
        $('#system_tts_rate_output').text(this.settings.rate);
-        console.log('Save changes');
+        saveTtsProviderSettings()
    }

-    loadSettings(settings) {
+    async loadSettings(settings) {
        // Populate Provider UI given input settings
        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings");
@ -143,19 +144,29 @@ class SystemTtsProvider {

        $('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
        $('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
+
+        // Trigger updates
+        $('#system_tts_rate').on("input", () =>{this.onSettingsChange()})
+        $('#system_tts_rate').on("input", () => {this.onSettingsChange()})
+
        $('#system_tts_pitch_output').text(this.settings.pitch);
        $('#system_tts_rate_output').text(this.settings.rate);
        console.info("Settings loaded");
    }

-    async onApplyClick() {
+    // Perform a simple readiness check by trying to fetch voiceIds
+    async checkReady(){
+        await this.fetchTtsVoiceObjects()
+    }
+
+    async onRefreshClick() {
        return
    }

    //#################//
    //  TTS Interfaces //
    //#################//
-    fetchTtsVoiceIds() {
+    fetchTtsVoiceObjects() {
        if (!('speechSynthesis' in window)) {
            return [];
        }
--- a/server.js
+++ b/server.js
@ -4894,7 +4894,7 @@ async function readAllChunks(readableStream) {
        });

        readableStream.on('end', () => {
-            console.log('Finished reading the stream.');
+            //console.log('Finished reading the stream.');
            resolve(chunks);
        });