Add support for XTTS streaming

This commit is contained in:
Cohee
2024-01-02 07:04:32 +02:00
parent 9b24e7dc67
commit 99244a0c11
2 changed files with 34 additions and 12 deletions

View File

@@ -1,6 +1,6 @@
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js'; import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js';
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js'; import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js';
import { delay, escapeRegex, getStringHash, onlyUnique } from '../../utils.js'; import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js';
import { EdgeTtsProvider } from './edge.js'; import { EdgeTtsProvider } from './edge.js';
import { ElevenLabsTtsProvider } from './elevenlabs.js'; import { ElevenLabsTtsProvider } from './elevenlabs.js';
import { SileroTtsProvider } from './silerotts.js'; import { SileroTtsProvider } from './silerotts.js';
@@ -316,12 +316,14 @@ async function playAudioData(audioBlob) {
if (currentAudioJob == null) { if (currentAudioJob == null) {
console.log('Cancelled TTS playback because currentAudioJob was null'); console.log('Cancelled TTS playback because currentAudioJob was null');
} }
const reader = new FileReader(); if (audioBlob instanceof Blob) {
reader.onload = function (e) { const srcUrl = await getBase64Async(audioBlob);
const srcUrl = e.target.result;
audioElement.src = srcUrl; audioElement.src = srcUrl;
}; } else if (typeof audioBlob === 'string') {
reader.readAsDataURL(audioBlob); audioElement.src = audioBlob;
} else {
throw `TTS received invalid audio data type ${typeof audioBlob}`;
}
audioElement.addEventListener('ended', completeCurrentAudioJob); audioElement.addEventListener('ended', completeCurrentAudioJob);
audioElement.addEventListener('canplay', () => { audioElement.addEventListener('canplay', () => {
console.debug('Starting TTS playback'); console.debug('Starting TTS playback');
@@ -417,11 +419,15 @@ function completeCurrentAudioJob() {
* @param {Response} response * @param {Response} response
*/ */
async function addAudioJob(response) { async function addAudioJob(response) {
const audioData = await response.blob(); if (typeof response === 'string') {
if (!audioData.type.startsWith('audio/')) { audioJobQueue.push(response);
throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`; } else {
const audioData = await response.blob();
if (!audioData.type.startsWith('audio/')) {
throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`;
}
audioJobQueue.push(audioData);
} }
audioJobQueue.push(audioData);
console.debug('Pushed audio job to queue.'); console.debug('Pushed audio job to queue.');
} }
@@ -432,7 +438,7 @@ async function processAudioJobQueue() {
} }
try { try {
audioQueueProcessorReady = false; audioQueueProcessorReady = false;
currentAudioJob = audioJobQueue.pop(); currentAudioJob = audioJobQueue.shift();
playAudioData(currentAudioJob); playAudioData(currentAudioJob);
talkingAnimation(true); talkingAnimation(true);
} catch (error) { } catch (error) {

View File

@@ -52,6 +52,7 @@ class XTTSTtsProvider {
provider_endpoint: 'http://localhost:8020', provider_endpoint: 'http://localhost:8020',
language: 'en', language: 'en',
voiceMap: {}, voiceMap: {},
streaming: false,
}; };
get settingsHtml() { get settingsHtml() {
@@ -75,7 +76,10 @@ class XTTSTtsProvider {
</select> </select>
<label for="xtts_tts_endpoint">Provider Endpoint:</label> <label for="xtts_tts_endpoint">Provider Endpoint:</label>
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/> <input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<label for="xtts_tts_streaming" class="checkbox_label">
<input id="xtts_tts_streaming" type="checkbox" />
<span>Streaming <small>(RVC not supported)</small></span>
</label>
`; `;
html += ` html += `
@@ -90,6 +94,7 @@ class XTTSTtsProvider {
// Used when provider settings are updated from UI // Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val(); this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
this.settings.language = $('#xtts_api_language').val(); this.settings.language = $('#xtts_api_language').val();
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
saveTtsProviderSettings(); saveTtsProviderSettings();
} }
@@ -125,6 +130,8 @@ class XTTSTtsProvider {
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); }); $('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#xtts_api_language').val(this.settings.language); $('#xtts_api_language').val(this.settings.language);
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); }); $('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
await this.checkReady(); await this.checkReady();
@@ -176,6 +183,15 @@ class XTTSTtsProvider {
async fetchTtsGeneration(inputText, voiceId) { async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`); console.info(`Generating new TTS for voice_id ${voiceId}`);
if (this.settings.streaming) {
const params = new URLSearchParams();
params.append('text', inputText);
params.append('speaker_wav', voiceId);
params.append('language', this.settings.language);
return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`;
}
const response = await doExtrasFetch( const response = await doExtrasFetch(
`${this.settings.provider_endpoint}/tts_to_audio/`, `${this.settings.provider_endpoint}/tts_to_audio/`,
{ {