Merge pull request #1623 from SillyTavern/xtts-stream
Add support for XTTS streaming
This commit is contained in:
commit
223325e8bb
|
@ -1,6 +1,6 @@
|
|||
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js';
|
||||
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js';
|
||||
import { delay, escapeRegex, getStringHash, onlyUnique } from '../../utils.js';
|
||||
import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js';
|
||||
import { EdgeTtsProvider } from './edge.js';
|
||||
import { ElevenLabsTtsProvider } from './elevenlabs.js';
|
||||
import { SileroTtsProvider } from './silerotts.js';
|
||||
|
@ -316,12 +316,14 @@ async function playAudioData(audioBlob) {
|
|||
if (currentAudioJob == null) {
|
||||
console.log('Cancelled TTS playback because currentAudioJob was null');
|
||||
}
|
||||
const reader = new FileReader();
|
||||
reader.onload = function (e) {
|
||||
const srcUrl = e.target.result;
|
||||
if (audioBlob instanceof Blob) {
|
||||
const srcUrl = await getBase64Async(audioBlob);
|
||||
audioElement.src = srcUrl;
|
||||
};
|
||||
reader.readAsDataURL(audioBlob);
|
||||
} else if (typeof audioBlob === 'string') {
|
||||
audioElement.src = audioBlob;
|
||||
} else {
|
||||
throw `TTS received invalid audio data type ${typeof audioBlob}`;
|
||||
}
|
||||
audioElement.addEventListener('ended', completeCurrentAudioJob);
|
||||
audioElement.addEventListener('canplay', () => {
|
||||
console.debug('Starting TTS playback');
|
||||
|
@ -417,11 +419,15 @@ function completeCurrentAudioJob() {
|
|||
* @param {Response} response
|
||||
*/
|
||||
async function addAudioJob(response) {
|
||||
const audioData = await response.blob();
|
||||
if (!audioData.type.startsWith('audio/')) {
|
||||
throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`;
|
||||
if (typeof response === 'string') {
|
||||
audioJobQueue.push(response);
|
||||
} else {
|
||||
const audioData = await response.blob();
|
||||
if (!audioData.type.startsWith('audio/')) {
|
||||
throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`;
|
||||
}
|
||||
audioJobQueue.push(audioData);
|
||||
}
|
||||
audioJobQueue.push(audioData);
|
||||
console.debug('Pushed audio job to queue.');
|
||||
}
|
||||
|
||||
|
@ -432,7 +438,7 @@ async function processAudioJobQueue() {
|
|||
}
|
||||
try {
|
||||
audioQueueProcessorReady = false;
|
||||
currentAudioJob = audioJobQueue.pop();
|
||||
currentAudioJob = audioJobQueue.shift();
|
||||
playAudioData(currentAudioJob);
|
||||
talkingAnimation(true);
|
||||
} catch (error) {
|
||||
|
|
|
@ -52,6 +52,7 @@ class XTTSTtsProvider {
|
|||
provider_endpoint: 'http://localhost:8020',
|
||||
language: 'en',
|
||||
voiceMap: {},
|
||||
streaming: false,
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
|
@ -75,7 +76,10 @@ class XTTSTtsProvider {
|
|||
</select>
|
||||
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
|
||||
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||
|
||||
<label for="xtts_tts_streaming" class="checkbox_label">
|
||||
<input id="xtts_tts_streaming" type="checkbox" />
|
||||
<span>Streaming <small>(RVC not supported)</small></span>
|
||||
</label>
|
||||
`;
|
||||
|
||||
html += `
|
||||
|
@ -90,6 +94,7 @@ class XTTSTtsProvider {
|
|||
// Used when provider settings are updated from UI
|
||||
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
|
||||
this.settings.language = $('#xtts_api_language').val();
|
||||
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
|
@ -125,6 +130,8 @@ class XTTSTtsProvider {
|
|||
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||
$('#xtts_api_language').val(this.settings.language);
|
||||
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
|
||||
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
|
||||
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
|
||||
|
||||
await this.checkReady();
|
||||
|
||||
|
@ -176,6 +183,15 @@ class XTTSTtsProvider {
|
|||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||
|
||||
if (this.settings.streaming) {
|
||||
const params = new URLSearchParams();
|
||||
params.append('text', inputText);
|
||||
params.append('speaker_wav', voiceId);
|
||||
params.append('language', this.settings.language);
|
||||
return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`;
|
||||
}
|
||||
|
||||
const response = await doExtrasFetch(
|
||||
`${this.settings.provider_endpoint}/tts_to_audio/`,
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue