mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add support for XTTS streaming
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js';
|
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js';
|
||||||
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js';
|
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js';
|
||||||
import { delay, escapeRegex, getStringHash, onlyUnique } from '../../utils.js';
|
import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js';
|
||||||
import { EdgeTtsProvider } from './edge.js';
|
import { EdgeTtsProvider } from './edge.js';
|
||||||
import { ElevenLabsTtsProvider } from './elevenlabs.js';
|
import { ElevenLabsTtsProvider } from './elevenlabs.js';
|
||||||
import { SileroTtsProvider } from './silerotts.js';
|
import { SileroTtsProvider } from './silerotts.js';
|
||||||
@@ -316,12 +316,14 @@ async function playAudioData(audioBlob) {
|
|||||||
if (currentAudioJob == null) {
|
if (currentAudioJob == null) {
|
||||||
console.log('Cancelled TTS playback because currentAudioJob was null');
|
console.log('Cancelled TTS playback because currentAudioJob was null');
|
||||||
}
|
}
|
||||||
const reader = new FileReader();
|
if (audioBlob instanceof Blob) {
|
||||||
reader.onload = function (e) {
|
const srcUrl = await getBase64Async(audioBlob);
|
||||||
const srcUrl = e.target.result;
|
|
||||||
audioElement.src = srcUrl;
|
audioElement.src = srcUrl;
|
||||||
};
|
} else if (typeof audioBlob === 'string') {
|
||||||
reader.readAsDataURL(audioBlob);
|
audioElement.src = audioBlob;
|
||||||
|
} else {
|
||||||
|
throw `TTS received invalid audio data type ${typeof audioBlob}`;
|
||||||
|
}
|
||||||
audioElement.addEventListener('ended', completeCurrentAudioJob);
|
audioElement.addEventListener('ended', completeCurrentAudioJob);
|
||||||
audioElement.addEventListener('canplay', () => {
|
audioElement.addEventListener('canplay', () => {
|
||||||
console.debug('Starting TTS playback');
|
console.debug('Starting TTS playback');
|
||||||
@@ -417,11 +419,15 @@ function completeCurrentAudioJob() {
|
|||||||
* @param {Response} response
|
* @param {Response} response
|
||||||
*/
|
*/
|
||||||
async function addAudioJob(response) {
|
async function addAudioJob(response) {
|
||||||
const audioData = await response.blob();
|
if (typeof response === 'string') {
|
||||||
if (!audioData.type.startsWith('audio/')) {
|
audioJobQueue.push(response);
|
||||||
throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`;
|
} else {
|
||||||
|
const audioData = await response.blob();
|
||||||
|
if (!audioData.type.startsWith('audio/')) {
|
||||||
|
throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`;
|
||||||
|
}
|
||||||
|
audioJobQueue.push(audioData);
|
||||||
}
|
}
|
||||||
audioJobQueue.push(audioData);
|
|
||||||
console.debug('Pushed audio job to queue.');
|
console.debug('Pushed audio job to queue.');
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -432,7 +438,7 @@ async function processAudioJobQueue() {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
audioQueueProcessorReady = false;
|
audioQueueProcessorReady = false;
|
||||||
currentAudioJob = audioJobQueue.pop();
|
currentAudioJob = audioJobQueue.shift();
|
||||||
playAudioData(currentAudioJob);
|
playAudioData(currentAudioJob);
|
||||||
talkingAnimation(true);
|
talkingAnimation(true);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
@@ -52,6 +52,7 @@ class XTTSTtsProvider {
|
|||||||
provider_endpoint: 'http://localhost:8020',
|
provider_endpoint: 'http://localhost:8020',
|
||||||
language: 'en',
|
language: 'en',
|
||||||
voiceMap: {},
|
voiceMap: {},
|
||||||
|
streaming: false,
|
||||||
};
|
};
|
||||||
|
|
||||||
get settingsHtml() {
|
get settingsHtml() {
|
||||||
@@ -75,7 +76,10 @@ class XTTSTtsProvider {
|
|||||||
</select>
|
</select>
|
||||||
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
|
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
|
||||||
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
|
||||||
|
<label for="xtts_tts_streaming" class="checkbox_label">
|
||||||
|
<input id="xtts_tts_streaming" type="checkbox" />
|
||||||
|
<span>Streaming <small>(RVC not supported)</small></span>
|
||||||
|
</label>
|
||||||
`;
|
`;
|
||||||
|
|
||||||
html += `
|
html += `
|
||||||
@@ -90,6 +94,7 @@ class XTTSTtsProvider {
|
|||||||
// Used when provider settings are updated from UI
|
// Used when provider settings are updated from UI
|
||||||
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
|
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
|
||||||
this.settings.language = $('#xtts_api_language').val();
|
this.settings.language = $('#xtts_api_language').val();
|
||||||
|
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
|
||||||
saveTtsProviderSettings();
|
saveTtsProviderSettings();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,6 +130,8 @@ class XTTSTtsProvider {
|
|||||||
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
$('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
|
||||||
$('#xtts_api_language').val(this.settings.language);
|
$('#xtts_api_language').val(this.settings.language);
|
||||||
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
|
$('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
|
||||||
|
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
|
||||||
|
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
|
||||||
|
|
||||||
await this.checkReady();
|
await this.checkReady();
|
||||||
|
|
||||||
@@ -176,6 +183,15 @@ class XTTSTtsProvider {
|
|||||||
|
|
||||||
async fetchTtsGeneration(inputText, voiceId) {
|
async fetchTtsGeneration(inputText, voiceId) {
|
||||||
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
console.info(`Generating new TTS for voice_id ${voiceId}`);
|
||||||
|
|
||||||
|
if (this.settings.streaming) {
|
||||||
|
const params = new URLSearchParams();
|
||||||
|
params.append('text', inputText);
|
||||||
|
params.append('speaker_wav', voiceId);
|
||||||
|
params.append('language', this.settings.language);
|
||||||
|
return `${this.settings.provider_endpoint}/tts_stream/?${params.toString()}`;
|
||||||
|
}
|
||||||
|
|
||||||
const response = await doExtrasFetch(
|
const response = await doExtrasFetch(
|
||||||
`${this.settings.provider_endpoint}/tts_to_audio/`,
|
`${this.settings.provider_endpoint}/tts_to_audio/`,
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user