diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 44b388cbc..7bca9c258 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -1,6 +1,6 @@ import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js'; import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js'; -import { delay, escapeRegex, getStringHash, onlyUnique } from '../../utils.js'; +import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js'; import { EdgeTtsProvider } from './edge.js'; import { ElevenLabsTtsProvider } from './elevenlabs.js'; import { SileroTtsProvider } from './silerotts.js'; @@ -316,12 +316,14 @@ async function playAudioData(audioBlob) { if (currentAudioJob == null) { console.log('Cancelled TTS playback because currentAudioJob was null'); } - const reader = new FileReader(); - reader.onload = function (e) { - const srcUrl = e.target.result; + if (audioBlob instanceof Blob) { + const srcUrl = await getBase64Async(audioBlob); audioElement.src = srcUrl; - }; - reader.readAsDataURL(audioBlob); + } else if (typeof audioBlob === 'string') { + audioElement.src = audioBlob; + } else { + throw `TTS received invalid audio data type ${typeof audioBlob}`; + } audioElement.addEventListener('ended', completeCurrentAudioJob); audioElement.addEventListener('canplay', () => { console.debug('Starting TTS playback'); @@ -417,11 +419,15 @@ function completeCurrentAudioJob() { * @param {Response} response */ async function addAudioJob(response) { - const audioData = await response.blob(); - if (!audioData.type.startsWith('audio/')) { - throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`; + if (typeof response === 'string') { + audioJobQueue.push(response); + } else { + const audioData = await response.blob(); + if (!audioData.type.startsWith('audio/')) { + throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`; + } + audioJobQueue.push(audioData); } - audioJobQueue.push(audioData); console.debug('Pushed audio job to queue.'); } @@ -432,7 +438,7 @@ async function processAudioJobQueue() { } try { audioQueueProcessorReady = false; - currentAudioJob = audioJobQueue.pop(); + currentAudioJob = audioJobQueue.shift(); playAudioData(currentAudioJob); talkingAnimation(true); } catch (error) { diff --git a/public/scripts/extensions/tts/xtts.js b/public/scripts/extensions/tts/xtts.js index 7b271232a..1f54317c1 100644 --- a/public/scripts/extensions/tts/xtts.js +++ b/public/scripts/extensions/tts/xtts.js @@ -52,6 +52,7 @@ class XTTSTtsProvider { provider_endpoint: 'http://localhost:8020', language: 'en', voiceMap: {}, + streaming: false, }; get settingsHtml() { @@ -75,7 +76,10 @@ class XTTSTtsProvider { - + `; html += ` @@ -90,6 +94,7 @@ class XTTSTtsProvider { // Used when provider settings are updated from UI this.settings.provider_endpoint = $('#xtts_tts_endpoint').val(); this.settings.language = $('#xtts_api_language').val(); + this.settings.streaming = $('#xtts_tts_streaming').is(':checked'); saveTtsProviderSettings(); } @@ -125,6 +130,8 @@ class XTTSTtsProvider { $('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); }); $('#xtts_api_language').val(this.settings.language); $('#xtts_api_language').on('change', () => { this.onSettingsChange(); }); + $('#xtts_tts_streaming').prop('checked', this.settings.streaming); + $('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); }); await this.checkReady(); @@ -176,6 +183,15 @@ class XTTSTtsProvider { async fetchTtsGeneration(inputText, voiceId) { console.info(`Generating new TTS for voice_id ${voiceId}`); + + if (this.settings.streaming) { + const params = new URLSearchParams(); + params.append('text', inputText); + params.append('speaker_wav', voiceId); + params.append('language', this.settings.language); + return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`; + } + const response = await doExtrasFetch( `${this.settings.provider_endpoint}/tts_to_audio/`, {