Merge branch 'staging' into X-T-E-R/release

2025-06-05 21:59:27 +02:00 · 2024-05-18 19:52:33 +03:00
parent 6c44f5b3fd b93131ec7a
commit 93c3e9e1dd
402 changed files with 55418 additions and 23307 deletions
--- a/public/scripts/extensions/tts/alltalk.js
+++ b/public/scripts/extensions/tts/alltalk.js
@@ -433,8 +433,8 @@ class AllTalkTtsProvider {
    updateLanguageDropdown() {
        const languageSelect = document.getElementById('language_options');
        if (languageSelect) {
-            // Ensure default language is set
-            this.settings.language = this.settings.language;
+            // Ensure default language is set (??? whatever that means)
+            // this.settings.language = this.settings.language;

            languageSelect.innerHTML = '';
            for (let language in this.languageLabels) {
--- a/public/scripts/extensions/tts/edge.js
+++ b/public/scripts/extensions/tts/edge.js
@@ -6,6 +6,11 @@ import { saveTtsProviderSettings } from './index.js';

 export { EdgeTtsProvider };

+const EDGE_TTS_PROVIDER = {
+    extras: 'extras',
+    plugin: 'plugin',
+};
+
 class EdgeTtsProvider {
    //########//
    // Config //
@@ -19,18 +24,26 @@ class EdgeTtsProvider {
    defaultSettings = {
        voiceMap: {},
        rate: 0,
+        provider: EDGE_TTS_PROVIDER.extras,
    };

    get settingsHtml() {
-        let html = `Microsoft Edge TTS Provider<br>
+        let html = `Microsoft Edge TTS<br>
+        <label for="edge_tts_provider">Provider</label>
+        <select id="edge_tts_provider">
+            <option value="${EDGE_TTS_PROVIDER.extras}">Extras</option>
+            <option value="${EDGE_TTS_PROVIDER.plugin}">Plugin</option>
+        </select>
        <label for="edge_tts_rate">Rate: <span id="edge_tts_rate_output"></span></label>
-        <input id="edge_tts_rate" type="range" value="${this.defaultSettings.rate}" min="-100" max="100" step="1" />`;
+        <input id="edge_tts_rate" type="range" value="${this.defaultSettings.rate}" min="-100" max="100" step="1" />
+        `;
        return html;
    }

    onSettingsChange() {
        this.settings.rate = Number($('#edge_tts_rate').val());
        $('#edge_tts_rate_output').text(this.settings.rate);
+        this.settings.provider = String($('#edge_tts_provider').val());
        saveTtsProviderSettings();
    }

@@ -53,16 +66,19 @@ class EdgeTtsProvider {

        $('#edge_tts_rate').val(this.settings.rate || 0);
        $('#edge_tts_rate_output').text(this.settings.rate || 0);
-        $('#edge_tts_rate').on('input', () => {this.onSettingsChange();});
+        $('#edge_tts_rate').on('input', () => { this.onSettingsChange(); });
+        $('#edge_tts_provider').val(this.settings.provider || EDGE_TTS_PROVIDER.extras);
+        $('#edge_tts_provider').on('change', () => { this.onSettingsChange(); });
        await this.checkReady();

        console.debug('EdgeTTS: Settings loaded');
    }

-
-    // Perform a simple readiness check by trying to fetch voiceIds
-    async checkReady(){
-        throwIfModuleMissing();
+    /**
+    * Perform a simple readiness check by trying to fetch voiceIds
+    */
+    async checkReady() {
+        await this.throwIfModuleMissing();
        await this.fetchTtsVoiceObjects();
    }

@@ -74,6 +90,11 @@ class EdgeTtsProvider {
    //  TTS Interfaces //
    //#################//

+    /**
+     * Get a voice from the TTS provider.
+     * @param {string} voiceName Voice name to get
+     * @returns {Promise<Object>} Voice object
+     */
    async getVoice(voiceName) {
        if (this.voices.length == 0) {
            this.voices = await this.fetchTtsVoiceObjects();
@@ -87,6 +108,12 @@ class EdgeTtsProvider {
        return match;
    }

+    /**
+     * Generate TTS for a given text.
+     * @param {string} text Text to generate TTS for
+     * @param {string} voiceId Voice ID to use
+     * @returns {Promise<Response>} Fetch response
+     */
    async generateTts(text, voiceId) {
        const response = await this.fetchTtsGeneration(text, voiceId);
        return response;
@@ -96,11 +123,10 @@ class EdgeTtsProvider {
    // API CALLS //
    //###########//
    async fetchTtsVoiceObjects() {
-        throwIfModuleMissing();
+        await this.throwIfModuleMissing();

-        const url = new URL(getApiUrl());
-        url.pathname = '/api/edge-tts/list';
-        const response = await doExtrasFetch(url);
+        const url = this.getVoicesUrl();
+        const response = await this.doFetch(url);
        if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
        }
@@ -111,7 +137,10 @@ class EdgeTtsProvider {
        return responseJson;
    }

-
+    /**
+     * Preview TTS for a given voice ID.
+     * @param {string} id Voice ID
+     */
    async previewTtsVoice(id) {
        this.audioElement.pause();
        this.audioElement.currentTime = 0;
@@ -128,13 +157,18 @@ class EdgeTtsProvider {
        this.audioElement.play();
    }

+    /**
+     * Fetch TTS generation from the API.
+     * @param {string} inputText Text to generate TTS for
+     * @param {string} voiceId Voice ID to use
+     * @returns {Promise<Response>} Fetch response
+     */
    async fetchTtsGeneration(inputText, voiceId) {
-        throwIfModuleMissing();
+        await this.throwIfModuleMissing();

        console.info(`Generating new TTS for voice_id ${voiceId}`);
-        const url = new URL(getApiUrl());
-        url.pathname = '/api/edge-tts/generate';
-        const response = await doExtrasFetch(url,
+        const url = this.getGenerateUrl();
+        const response = await this.doFetch(url,
            {
                method: 'POST',
                headers: getRequestHeaders(),
@@ -151,12 +185,85 @@ class EdgeTtsProvider {
        }
        return response;
    }
-}
-function throwIfModuleMissing() {
-    if (!modules.includes('edge-tts')) {
-        const message = 'Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.';
-        // toastr.error(message)
-        throw new Error(message);
+
+    /**
+     * Perform a fetch request using the configured provider.
+     * @param {string} url URL string
+     * @param {any} options Request options
+     * @returns {Promise<Response>} Fetch response
+     */
+    doFetch(url, options) {
+        if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
+            return doExtrasFetch(url, options);
+        }
+
+        if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
+            return fetch(url, options);
+        }
+
+        throw new Error('Invalid TTS Provider');
+    }
+
+    /**
+     * Get the URL for the TTS generation endpoint.
+     * @returns {string} URL string
+     */
+    getGenerateUrl() {
+        if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
+            const url = new URL(getApiUrl());
+            url.pathname = '/api/edge-tts/generate';
+            return url.toString();
+        }
+
+        if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
+            return '/api/plugins/edge-tts/generate';
+        }
+
+        throw new Error('Invalid TTS Provider');
+    }
+
+    /**
+     * Get the URL for the TTS voices endpoint.
+     * @returns {string} URL object or string
+     */
+    getVoicesUrl() {
+        if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
+            const url = new URL(getApiUrl());
+            url.pathname = '/api/edge-tts/list';
+            return url.toString();
+        }
+
+        if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
+            return '/api/plugins/edge-tts/list';
+        }
+
+        throw new Error('Invalid TTS Provider');
+    }
+
+    async throwIfModuleMissing() {
+        if (this.settings.provider === EDGE_TTS_PROVIDER.extras && !modules.includes('edge-tts')) {
+            const message = 'Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.';
+            // toastr.error(message)
+            throw new Error(message);
+        }
+
+        if (this.settings.provider === EDGE_TTS_PROVIDER.plugin && !this.isPluginAvailable()) {
+            const message = 'Edge TTS Server plugin not loaded. Install it from https://github.com/SillyTavern/SillyTavern-EdgeTTS-Plugin and restart the SillyTavern server.';
+            // toastr.error(message)
+            throw new Error(message);
+        }
+    }
+
+    async isPluginAvailable() {
+        try {
+            const result = await fetch('/api/plugins/edge-tts/probe', {
+                method: 'POST',
+                headers: getRequestHeaders(),
+            });
+            return result.ok;
+        } catch (e) {
+            return false;
+        }
    }
 }

--- a/public/scripts/extensions/tts/elevenlabs.js
+++ b/public/scripts/extensions/tts/elevenlabs.js
@@ -14,6 +14,8 @@ class ElevenLabsTtsProvider {
    defaultSettings = {
        stability: 0.75,
        similarity_boost: 0.75,
+        style_exaggeration: 0.00,
+        speaker_boost: true,
        apiKey: '',
        model: 'eleven_monolingual_v1',
        voiceMap: {},
@@ -26,27 +28,57 @@ class ElevenLabsTtsProvider {
            <input id="elevenlabs_tts_api_key" type="text" class="text_pole" placeholder="<API Key>"/>
            <label for="elevenlabs_tts_model">Model</label>
            <select id="elevenlabs_tts_model" class="text_pole">
-                <option value="eleven_monolingual_v1">Monolingual</option>
+                <option value="eleven_monolingual_v1">English v1</option>
                <option value="eleven_multilingual_v1">Multilingual v1</option>
                <option value="eleven_multilingual_v2">Multilingual v2</option>
+                <option value="eleven_turbo_v2">Turbo v2</option>
            </select>
            <input id="eleven_labs_connect" class="menu_button" type="button" value="Connect" />
            <label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
-            <input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.05" />
+            <input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.01" />
            <label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
-            <input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.05" />
+            <input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.01" />
+            <div id="elevenlabs_tts_v2_options" style="display: none;">
+                <label for="elevenlabs_tts_style_exaggeration">Style Exaggeration: <span id="elevenlabs_tts_style_exaggeration_output"></span></label>
+                <input id="elevenlabs_tts_style_exaggeration" type="range" value="${this.defaultSettings.style_exaggeration}" min="0" max="1" step="0.01" />
+                <label for="elevenlabs_tts_speaker_boost">Speaker Boost:</label>
+                <input id="elevenlabs_tts_speaker_boost" style="display: inline-grid" type="checkbox" />
+            </div>
+            <hr>
+            <div id="elevenlabs_tts_voice_cloning">
+                <span>Instant Voice Cloning</span><br>
+                <input id="elevenlabs_tts_voice_cloning_name" type="text" class="text_pole" placeholder="Voice Name"/>
+                <input id="elevenlabs_tts_voice_cloning_description" type="text" class="text_pole" placeholder="Voice Description"/>
+                <input id="elevenlabs_tts_voice_cloning_labels" type="text" class="text_pole" placeholder="Labels"/>
+                <div class="menu_button menu_button_icon" id="upload_audio_file">
+                    <i class="fa-solid fa-file-import"></i>
+                    <span>Upload Audio Files</span>
+                </div>
+                <input id="elevenlabs_tts_audio_files" type="file" name="audio_files" accept="audio/*" style="display: none;" multiple>
+                <div id="elevenlabs_tts_selected_files_list"></div>
+                <input id="elevenlabs_tts_clone_voice_button" class="menu_button menu_button_icon" type="button" value="Clone Voice">
+            </div>
+            <hr>
        </div>
        `;
        return html;
    }

+    shouldInvolveExtendedSettings() {
+        return this.settings.model === 'eleven_multilingual_v2';
+    }
+
    onSettingsChange() {
        // Update dynamically
        this.settings.stability = $('#elevenlabs_tts_stability').val();
        this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val();
+        this.settings.style_exaggeration = $('#elevenlabs_tts_style_exaggeration').val();
+        this.settings.speaker_boost = $('#elevenlabs_tts_speaker_boost').is(':checked');
        this.settings.model = $('#elevenlabs_tts_model').find(':selected').val();
-        $('#elevenlabs_tts_stability_output').text(this.settings.stability);
-        $('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost);
+        $('#elevenlabs_tts_stability_output').text(this.settings.stability * 100 + '%');
+        $('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost * 100 + '%');
+        $('#elevenlabs_tts_style_exaggeration_output').text(this.settings.style_exaggeration * 100 + '%');
+        $('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
        saveTtsProviderSettings();
    }

@@ -75,21 +107,28 @@ class ElevenLabsTtsProvider {

        $('#elevenlabs_tts_stability').val(this.settings.stability);
        $('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost);
+        $('#elevenlabs_tts_style_exaggeration').val(this.settings.style_exaggeration);
+        $('#elevenlabs_tts_speaker_boost').prop('checked', this.settings.speaker_boost);
        $('#elevenlabs_tts_api_key').val(this.settings.apiKey);
        $('#elevenlabs_tts_model').val(this.settings.model);
        $('#eleven_labs_connect').on('click', () => { this.onConnectClick(); });
        $('#elevenlabs_tts_similarity_boost').on('input', this.onSettingsChange.bind(this));
        $('#elevenlabs_tts_stability').on('input', this.onSettingsChange.bind(this));
+        $('#elevenlabs_tts_style_exaggeration').on('input', this.onSettingsChange.bind(this));
+        $('#elevenlabs_tts_speaker_boost').on('change', this.onSettingsChange.bind(this));
        $('#elevenlabs_tts_model').on('change', this.onSettingsChange.bind(this));
        $('#elevenlabs_tts_stability_output').text(this.settings.stability);
        $('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost);
-
+        $('#elevenlabs_tts_style_exaggeration_output').text(this.settings.style_exaggeration);
+        $('#elevenlabs_tts_v2_options').toggle(this.shouldInvolveExtendedSettings());
        try {
            await this.checkReady();
            console.debug('ElevenLabs: Settings loaded');
        } catch {
            console.debug('ElevenLabs: Settings loaded, but not ready');
        }
+
+        this.setupVoiceCloningMenu();
    }

    // Perform a simple readiness check by trying to fetch voiceIds
@@ -107,6 +146,63 @@ class ElevenLabsTtsProvider {
        });
    }

+    setupVoiceCloningMenu() {
+        const audioFilesInput = document.getElementById('elevenlabs_tts_audio_files');
+        const selectedFilesListElement = document.getElementById('elevenlabs_tts_selected_files_list');
+        const cloneVoiceButton = document.getElementById('elevenlabs_tts_clone_voice_button');
+        const uploadAudioFileButton = document.getElementById('upload_audio_file');
+        const voiceCloningNameInput = document.getElementById('elevenlabs_tts_voice_cloning_name');
+        const voiceCloningDescriptionInput = document.getElementById('elevenlabs_tts_voice_cloning_description');
+        const voiceCloningLabelsInput = document.getElementById('elevenlabs_tts_voice_cloning_labels');
+
+        const updateCloneVoiceButtonVisibility = () => {
+            cloneVoiceButton.style.display = audioFilesInput.files.length > 0 ? 'inline-block' : 'none';
+        };
+
+        const clearSelectedFiles = () => {
+            audioFilesInput.value = '';
+            selectedFilesListElement.innerHTML = '';
+            updateCloneVoiceButtonVisibility();
+        };
+
+        uploadAudioFileButton.addEventListener('click', () => {
+            audioFilesInput.click();
+        });
+
+        audioFilesInput.addEventListener('change', () => {
+            selectedFilesListElement.innerHTML = '';
+            for (const file of audioFilesInput.files) {
+                const listItem = document.createElement('div');
+                listItem.textContent = file.name;
+                selectedFilesListElement.appendChild(listItem);
+            }
+            updateCloneVoiceButtonVisibility();
+        });
+
+        cloneVoiceButton.addEventListener('click', async () => {
+            const voiceName = voiceCloningNameInput.value.trim();
+            const voiceDescription = voiceCloningDescriptionInput.value.trim();
+            const voiceLabels = voiceCloningLabelsInput.value.trim();
+
+            if (!voiceName) {
+                toastr.error('Please provide a name for the cloned voice.');
+                return;
+            }
+
+            try {
+                await this.addVoice(voiceName, voiceDescription, voiceLabels);
+                toastr.success('Voice cloned successfully. Hit reload to see the new voice in the voice listing.');
+                clearSelectedFiles();
+                voiceCloningNameInput.value = '';
+                voiceCloningDescriptionInput.value = '';
+                voiceCloningLabelsInput.value = '';
+            } catch (error) {
+                toastr.error(`Failed to clone voice: ${error.message}`);
+            }
+        });
+
+        updateCloneVoiceButtonVisibility();
+    }

    async updateApiKey() {
        // Using this call to validate API key
@@ -206,24 +302,26 @@ class ElevenLabsTtsProvider {
    async fetchTtsGeneration(text, voiceId) {
        let model = this.settings.model ?? 'eleven_monolingual_v1';
        console.info(`Generating new TTS for voice_id ${voiceId}, model ${model}`);
-        const response = await fetch(
-            `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
-            {
-                method: 'POST',
-                headers: {
-                    'xi-api-key': this.settings.apiKey,
-                    'Content-Type': 'application/json',
-                },
-                body: JSON.stringify({
-                    model_id: model,
-                    text: text,
-                    voice_settings: {
-                        stability: Number(this.settings.stability),
-                        similarity_boost: Number(this.settings.similarity_boost),
-                    },
-                }),
+        const request = {
+            model_id: model,
+            text: text,
+            voice_settings: {
+                stability: Number(this.settings.stability),
+                similarity_boost: Number(this.settings.similarity_boost),
            },
-        );
+        };
+        if (this.shouldInvolveExtendedSettings()) {
+            request.voice_settings.style_exaggeration = Number(this.settings.style_exaggeration);
+            request.voice_settings.speaker_boost = Boolean(this.settings.speaker_boost);
+        }
+        const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, {
+            method: 'POST',
+            headers: {
+                'xi-api-key': this.settings.apiKey,
+                'Content-Type': 'application/json',
+            },
+            body: JSON.stringify(request),
+        });
        if (!response.ok) {
            toastr.error(response.statusText, 'TTS Generation Failed');
            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
@@ -260,4 +358,33 @@ class ElevenLabsTtsProvider {
        const responseJson = await response.json();
        return responseJson.history;
    }
+
+    async addVoice(name, description, labels) {
+        const selected_files = document.querySelectorAll('input[type="file"][name="audio_files"]');
+        const formData = new FormData();
+
+        formData.append('name', name);
+        formData.append('description', description);
+        formData.append('labels', labels);
+
+        for (const file of selected_files) {
+            if (file.files.length > 0) {
+                formData.append('files', file.files[0]);
+            }
+        }
+
+        const response = await fetch('https://api.elevenlabs.io/v1/voices/add', {
+            method: 'POST',
+            headers: {
+                'xi-api-key': this.settings.apiKey,
+            },
+            body: formData,
+        });
+
+        if (!response.ok) {
+            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
+        }
+
+        return await response.json();
+    }
 }
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@@ -1,4 +1,4 @@
-import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js';
+import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced, substituteParams } from '../../../script.js';
 import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js';
 import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js';
 import { EdgeTtsProvider } from './edge.js';
@@ -8,20 +8,23 @@ import { CoquiTtsProvider } from './coqui.js';
 import { SystemTtsProvider } from './system.js';
 import { NovelTtsProvider } from './novel.js';
 import { power_user } from '../../power-user.js';
-import { registerSlashCommand } from '../../slash-commands.js';
 import { OpenAITtsProvider } from './openai.js';
 import { XTTSTtsProvider } from './xtts.js';
 import { GSVITtsProvider } from './gsvi.js';
 import { AllTalkTtsProvider } from './alltalk.js';
 import { SpeechT5TtsProvider } from './speecht5.js';
+import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
+import { SlashCommand } from '../../slash-commands/SlashCommand.js';
+import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
 export { talkingAnimation };

 const UPDATE_INTERVAL = 1000;

 let voiceMapEntries = [];
 let voiceMap = {}; // {charName:voiceid, charName2:voiceid2}
-let storedvalue = false;
+let talkingHeadState = false;
 let lastChatId = null;
+let lastMessage = null;
 let lastMessageHash = null;

 const DEFAULT_VOICE_MARKER = '[Default Voice]';
@@ -68,7 +71,7 @@ export function getPreviewString(lang) {
    return previewStrings[lang] ?? fallbackPreview;
 }

-let ttsProviders = {
+const ttsProviders = {
    ElevenLabs: ElevenLabsTtsProvider,
    Silero: SileroTtsProvider,
    XTTSv2: XTTSTtsProvider,
@@ -84,7 +87,6 @@ let ttsProviders = {
 let ttsProvider;
 let ttsProviderName;

-let ttsLastMessage = null;

 async function onNarrateOneMessage() {
    audioElement.src = '/sounds/silence.mp3';
@@ -132,103 +134,13 @@ async function onNarrateText(args, text) {
 }

 async function moduleWorker() {
-    // Primarily determining when to add new chat to the TTS queue
-    const enabled = $('#tts_enabled').is(':checked');
-    $('body').toggleClass('tts', enabled);
-    if (!enabled) {
+    if (!extension_settings.tts.enabled) {
        return;
    }

-    const context = getContext();
-    const chat = context.chat;
-
    processTtsQueue();
    processAudioJobQueue();
    updateUiAudioPlayState();
-
-    // Auto generation is disabled
-    if (extension_settings.tts.auto_generation == false) {
-        return;
-    }
-
-    // no characters or group selected
-    if (!context.groupId && context.characterId === undefined) {
-        return;
-    }
-
-    // Chat changed
-    if (
-        context.chatId !== lastChatId
-    ) {
-        currentMessageNumber = context.chat.length ? context.chat.length : 0;
-        saveLastValues();
-
-        // Force to speak on the first message in the new chat
-        if (context.chat.length === 1) {
-            lastMessageHash = -1;
-        }
-
-        return;
-    }
-
-    // take the count of messages
-    let lastMessageNumber = context.chat.length ? context.chat.length : 0;
-
-    // There's no new messages
-    let diff = lastMessageNumber - currentMessageNumber;
-    let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '');
-
-    // if messages got deleted, diff will be < 0
-    if (diff < 0) {
-        // necessary actions will be taken by the onChatDeleted() handler
-        return;
-    }
-
-    // if no new messages, or same message, or same message hash, do nothing
-    if (diff == 0 && hashNew === lastMessageHash) {
-        return;
-    }
-
-    // If streaming, wait for streaming to finish before processing new messages
-    if (context.streamingProcessor && !context.streamingProcessor.isFinished) {
-        return;
-    }
-
-    // clone message object, as things go haywire if message object is altered below (it's passed by reference)
-    const message = structuredClone(chat[chat.length - 1]);
-
-    // if last message within current message, message got extended. only send diff to TTS.
-    if (ttsLastMessage !== null && message.mes.indexOf(ttsLastMessage) !== -1) {
-        let tmp = message.mes;
-        message.mes = message.mes.replace(ttsLastMessage, '');
-        ttsLastMessage = tmp;
-    } else {
-        ttsLastMessage = message.mes;
-    }
-
-    // We're currently swiping. Don't generate voice
-    if (!message || message.mes === '...' || message.mes === '') {
-        return;
-    }
-
-    // Don't generate if message doesn't have a display text
-    if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
-        return;
-    }
-
-    // Don't generate if message is a user message and user message narration is disabled
-    if (message.is_user && !extension_settings.tts.narrate_user) {
-        return;
-    }
-
-    // New messages, add new chat to history
-    lastMessageHash = hashNew;
-    currentMessageNumber = lastMessageNumber;
-
-    console.debug(
-        `Adding message from ${message.name} for TTS processing: "${message.mes}"`,
-    );
-    ttsJobQueue.push(message);
 }

 function talkingAnimation(switchValue) {
@@ -240,11 +152,11 @@ function talkingAnimation(switchValue) {
    const apiUrl = getApiUrl();
    const animationType = switchValue ? 'start' : 'stop';

-    if (switchValue !== storedvalue) {
+    if (switchValue !== talkingHeadState) {
        try {
            console.log(animationType + ' Talking Animation');
            doExtrasFetch(`${apiUrl}/api/talkinghead/${animationType}_talking`);
-            storedvalue = switchValue; // Update the storedvalue to the current switchValue
+            talkingHeadState = switchValue;
        } catch (error) {
            // Handle the error here or simply ignore it to prevent logging
        }
@@ -291,7 +203,6 @@ function debugTtsPlayback() {
        {
            'ttsProviderName': ttsProviderName,
            'voiceMap': voiceMap,
-            'currentMessageNumber': currentMessageNumber,
            'audioPaused': audioPaused,
            'audioJobQueue': audioJobQueue,
            'currentAudioJob': currentAudioJob,
@@ -352,6 +263,7 @@ async function playAudioData(audioJob) {
    audioElement.addEventListener('ended', completeCurrentAudioJob);
    audioElement.addEventListener('canplay', () => {
        console.debug('Starting TTS playback');
+        audioElement.playbackRate = extension_settings.tts.playback_rate;
        audioElement.play();
    });
 }
@@ -467,6 +379,7 @@ async function processAudioJobQueue() {
        playAudioData(currentAudioJob);
        talkingAnimation(true);
    } catch (error) {
+        toastr.error(error.toString());
        console.error(error);
        audioQueueProcessorReady = true;
    }
@@ -478,21 +391,12 @@ async function processAudioJobQueue() {

 let ttsJobQueue = [];
 let currentTtsJob; // Null if nothing is currently being processed
-let currentMessageNumber = 0;

 function completeTtsJob() {
    console.info(`Current TTS job for ${currentTtsJob?.name} completed.`);
    currentTtsJob = null;
 }

-function saveLastValues() {
-    const context = getContext();
-    lastChatId = context.chatId;
-    lastMessageHash = getStringHash(
-        (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '',
-    );
-}
-
 async function tts(text, voiceId, char) {
    async function processResponse(response) {
        // RVC injection
@@ -526,11 +430,18 @@ async function processTtsQueue() {
    currentTtsJob = ttsJobQueue.shift();
    let text = extension_settings.tts.narrate_translated_only ? (currentTtsJob?.extra?.display_text || currentTtsJob.mes) : currentTtsJob.mes;

+    // Substitute macros
+    text = substituteParams(text);
+
    if (extension_settings.tts.skip_codeblocks) {
        text = text.replace(/^\s{4}.*$/gm, '').trim();
        text = text.replace(/```.*?```/gs, '').trim();
    }

+    if (extension_settings.tts.skip_tags) {
+        text = text.replace(/<.*?>.*?<\/.*?>/g, '').trim();
+    }
+
    if (!extension_settings.tts.pass_asterisks) {
        text = extension_settings.tts.narrate_dialogues_only
            ? text.replace(/\*[^*]*?(\*|$)/g, '').trim() // remove asterisks content
@@ -579,8 +490,9 @@ async function processTtsQueue() {
            toastr.error(`Specified voice for ${char} was not found. Check the TTS extension settings.`);
            throw `Unable to attain voiceId for ${char}`;
        }
-        tts(text, voiceId, char);
+        await tts(text, voiceId, char);
    } catch (error) {
+        toastr.error(error.toString());
        console.error(error);
        currentTtsJob = null;
    }
@@ -618,6 +530,12 @@ function loadSettings() {
    $('#tts_narrate_translated_only').prop('checked', extension_settings.tts.narrate_translated_only);
    $('#tts_narrate_user').prop('checked', extension_settings.tts.narrate_user);
    $('#tts_pass_asterisks').prop('checked', extension_settings.tts.pass_asterisks);
+    $('#tts_skip_codeblocks').prop('checked', extension_settings.tts.skip_codeblocks);
+    $('#tts_skip_tags').prop('checked', extension_settings.tts.skip_tags);
+    $('#playback_rate').val(extension_settings.tts.playback_rate);
+    $('#playback_rate_counter').val(Number(extension_settings.tts.playback_rate).toFixed(2));
+    $('#playback_rate_block').toggle(extension_settings.tts.currentProvider !== 'System');
+
    $('body').toggleClass('tts', extension_settings.tts.enabled);
 }

@@ -627,6 +545,7 @@ const defaultSettings = {
    currentProvider: 'ElevenLabs',
    auto_generation: true,
    narrate_user: false,
+    playback_rate: 1,
 };

 function setTtsStatus(status, success) {
@@ -650,6 +569,7 @@ function onRefreshClick() {
        initVoiceMap();
        updateVoiceMap();
    }).catch(error => {
+        toastr.error(error.toString());
        console.error(error);
        setTtsStatus(error, false);
    });
@@ -696,6 +616,11 @@ function onSkipCodeblocksClick() {
    saveSettingsDebounced();
 }

+function onSkipTagsClick() {
+    extension_settings.tts.skip_tags = !!$('#tts_skip_tags').prop('checked');
+    saveSettingsDebounced();
+}
+
 function onPassAsterisksClick() {
    extension_settings.tts.pass_asterisks = !!$('#tts_pass_asterisks').prop('checked');
    saveSettingsDebounced();
@@ -732,6 +657,7 @@ async function loadTtsProvider(provider) {
 function onTtsProviderChange() {
    const ttsProviderSelection = $('#tts_provider').val();
    extension_settings.tts.currentProvider = ttsProviderSelection;
+    $('#playback_rate_block').toggle(extension_settings.tts.currentProvider !== 'System');
    loadTtsProvider(ttsProviderSelection);
 }

@@ -752,26 +678,103 @@ async function onChatChanged() {
    await resetTtsPlayback();
    const voiceMapInit = initVoiceMap();
    await Promise.race([voiceMapInit, delay(1000)]);
-    ttsLastMessage = null;
+    lastMessage = null;
 }

-async function onChatDeleted() {
+async function onMessageEvent(messageId) {
+    // If TTS is disabled, do nothing
+    if (!extension_settings.tts.enabled) {
+        return;
+    }
+
+    // Auto generation is disabled
+    if (!extension_settings.tts.auto_generation) {
+        return;
+    }
+
+    const context = getContext();
+
+    // no characters or group selected
+    if (!context.groupId && context.characterId === undefined) {
+        return;
+    }
+
+    // Chat changed
+    if (context.chatId !== lastChatId) {
+        lastChatId = context.chatId;
+        lastMessageHash = getStringHash(context.chat[messageId]?.mes ?? '');
+
+        // Force to speak on the first message in the new chat
+        if (context.chat.length === 1) {
+            lastMessageHash = -1;
+        }
+    }
+
+    // clone message object, as things go haywire if message object is altered below (it's passed by reference)
+    const message = structuredClone(context.chat[messageId]);
+    const hashNew = getStringHash(message?.mes ?? '');
+
+    // if no new messages, or same message, or same message hash, do nothing
+    if (hashNew === lastMessageHash) {
+        return;
+    }
+
+    const isLastMessageInCurrent = () =>
+        lastMessage &&
+        typeof lastMessage === 'object' &&
+        message.swipe_id === lastMessage.swipe_id &&
+        message.name === lastMessage.name  &&
+        message.is_user === lastMessage.is_user  &&
+        message.mes.indexOf(lastMessage.mes) !== -1;
+
+    // if last message within current message, message got extended. only send diff to TTS.
+    if (isLastMessageInCurrent()) {
+        const tmp = structuredClone(message);
+        message.mes = message.mes.replace(lastMessage.mes, '');
+        lastMessage = tmp;
+    } else {
+        lastMessage = structuredClone(message);
+    }
+
+    // We're currently swiping. Don't generate voice
+    if (!message || message.mes === '...' || message.mes === '') {
+        return;
+    }
+
+    // Don't generate if message doesn't have a display text
+    if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
+        return;
+    }
+
+    // Don't generate if message is a user message and user message narration is disabled
+    if (message.is_user && !extension_settings.tts.narrate_user) {
+        return;
+    }
+
+    // New messages, add new chat to history
+    lastMessageHash = hashNew;
+    lastChatId = context.chatId;
+
+    console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`);
+    ttsJobQueue.push(message);
+}
+
+async function onMessageDeleted() {
    const context = getContext();

    // update internal references to new last message
    lastChatId = context.chatId;
-    currentMessageNumber = context.chat.length ? context.chat.length : 0;

    // compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue
-    let messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
+    const messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
    if (messageHash === lastMessageHash) {
        return;
    }
    lastMessageHash = messageHash;
-    ttsLastMessage = (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '';
+    lastMessage = context.chat.length ? structuredClone(context.chat[context.chat.length - 1]) : null;

    // stop any tts playback since message might not exist anymore
-    await resetTtsPlayback();
+    resetTtsPlayback();
 }

 /**
@@ -1019,11 +1022,29 @@ $(document).ready(function () {
                            <input type="checkbox" id="tts_skip_codeblocks">
                            <small>Skip codeblocks</small>
                        </label>
+                        <label class="checkbox_label" for="tts_skip_tags">
+                            <input type="checkbox" id="tts_skip_tags">
+                            <small>Skip &lt;tagged&gt; blocks</small>
+                        </label>
                        <label class="checkbox_label" for="tts_pass_asterisks">
                        <input type="checkbox" id="tts_pass_asterisks">
                        <small>Pass Asterisks to TTS Engine</small>
                        </label>
                    </div>
+                    <div id="playback_rate_block" class="range-block">
+                        <hr>
+                        <div class="range-block-title justifyLeft" data-i18n="Audio Playback Speed">
+                            <small>Audio Playback Speed</small>
+                        </div>
+                        <div class="range-block-range-and-counter">
+                            <div class="range-block-range">
+                                <input type="range" id="playback_rate" name="volume" min="0" max="3" step="0.05">
+                            </div>
+                            <div class="range-block-counter">
+                                <input type="number" min="0" max="3" step="0.05" data-for="playback_rate" id="playback_rate_counter">
+                            </div>
+                        </div>
+                    </div>
                    <div id="tts_voicemap_block">
                    </div>
                    <hr>
@@ -1044,9 +1065,19 @@ $(document).ready(function () {
        $('#tts_narrate_quoted').on('click', onNarrateQuotedClick);
        $('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick);
        $('#tts_skip_codeblocks').on('click', onSkipCodeblocksClick);
+        $('#tts_skip_tags').on('click', onSkipTagsClick);
        $('#tts_pass_asterisks').on('click', onPassAsterisksClick);
        $('#tts_auto_generation').on('click', onAutoGenerationClick);
        $('#tts_narrate_user').on('click', onNarrateUserClick);
+
+        $('#playback_rate').on('input', function () {
+            const value = $(this).val();
+            const formattedValue = Number(value).toFixed(2);
+            extension_settings.tts.playback_rate = value;
+            $('#playback_rate_counter').val(formattedValue);
+            saveSettingsDebounced();
+        });
+
        $('#tts_voices').on('click', onTtsVoicesClick);
        for (const provider in ttsProviders) {
            $('#tts_provider').append($('<option />').val(provider).text(provider));
@@ -1062,8 +1093,40 @@ $(document).ready(function () {
    setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
    eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
    eventSource.on(event_types.CHAT_CHANGED, onChatChanged);
-    eventSource.on(event_types.MESSAGE_DELETED, onChatDeleted);
+    eventSource.on(event_types.MESSAGE_DELETED, onMessageDeleted);
    eventSource.on(event_types.GROUP_UPDATED, onChatChanged);
-    registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], '<span class="monospace">(text)</span>  – narrate any text using currently selected character\'s voice. Use voice="Character Name" argument to set other voice from the voice map, example: <tt>/speak voice="Donald Duck" Quack!</tt>', true, true);
+    eventSource.makeLast(event_types.CHARACTER_MESSAGE_RENDERED, onMessageEvent);
+    eventSource.makeLast(event_types.USER_MESSAGE_RENDERED, onMessageEvent);
+    SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'speak',
+        callback: onNarrateText,
+        aliases: ['narrate', 'tts'],
+        namedArgumentList: [
+            new SlashCommandNamedArgument(
+                'voice', 'character voice name', [ARGUMENT_TYPE.STRING], false,
+            ),
+        ],
+        unnamedArgumentList: [
+            new SlashCommandArgument(
+                'text', [ARGUMENT_TYPE.STRING], true,
+            ),
+        ],
+        helpString: `
+            <div>
+                Narrate any text using currently selected character's voice.
+            </div>
+            <div>
+                Use <code>voice="Character Name"</code> argument to set other voice from the voice map.
+            </div>
+            <div>
+                <strong>Example:</strong>
+                <ul>
+                    <li>
+                        <pre><code>/speak voice="Donald Duck" Quack!</code></pre>
+                    </li>
+                </ul>
+            </div>
+        `,
+    }));
+
    document.body.appendChild(audioElement);
 });
--- a/public/scripts/extensions/tts/novel.js
+++ b/public/scripts/extensions/tts/novel.js
@@ -28,6 +28,8 @@ class NovelTtsProvider {
    processText(text) {
        // Novel reads tilde as a word. Replace with full stop
        text = text.replace(/~/g, '.');
+        // Novel reads asterisk as a word. Remove it
+        text = text.replace(/\*/g, '');
        return text;
    }

--- a/public/scripts/extensions/tts/silerotts.js
+++ b/public/scripts/extensions/tts/silerotts.js
@@ -11,7 +11,7 @@ class SileroTtsProvider {
    settings;
    ready = false;
    voices = [];
-    separator = ' .. ';
+    separator = ' ';

    defaultSettings = {
        provider_endpoint: 'http://localhost:8001/tts',