diff --git a/public/scripts/extensions/tts/xtts.js b/public/scripts/extensions/tts/xtts.js index 1f54317c1..1408cae2d 100644 --- a/public/scripts/extensions/tts/xtts.js +++ b/public/scripts/extensions/tts/xtts.js @@ -51,6 +51,14 @@ class XTTSTtsProvider { defaultSettings = { provider_endpoint: 'http://localhost:8020', language: 'en', + temperature: 0.75, + length_penalty: 1.0, + repetition_penalty: 5.0, + top_k: 50, + top_p: 0.85, + speed: 1, + enable_text_splitting: true, + stream_chunk_size: 100, voiceMap: {}, streaming: false, }; @@ -60,9 +68,7 @@ class XTTSTtsProvider { + XTTS Settings:
+ Use XTTSv2 TTS Server. - `; + + - html += ` + + - - Use XTTSv2 TTS Server. + + + + + + + + + + + + + + + + `; return html; } + onSettingsChange() { // Used when provider settings are updated from UI this.settings.provider_endpoint = $('#xtts_tts_endpoint').val(); this.settings.language = $('#xtts_api_language').val(); + + // Update the default TTS settings based on input fields + this.settings.speed = $('#xtts_speed').val(); + this.settings.temperature = $('#xtts_temperature').val(); + this.settings.length_penalty = $('#xtts_length_penalty').val(); + this.settings.repetition_penalty = $('#xtts_repetition_penalty').val(); + this.settings.top_k = $('#xtts_top_k').val(); + this.settings.top_p = $('#xtts_top_p').val(); + this.settings.stream_chunk_size = $('#xtts_stream_chunk_size').val(); + this.settings.enable_text_splitting = $('#xtts_enable_text_splitting').is(':checked'); this.settings.streaming = $('#xtts_tts_streaming').is(':checked'); + + // Update the UI to reflect changes + $('#xtts_tts_speed_output').text(this.settings.speed); + $('#xtts_tts_temperature_output').text(this.settings.temperature); + $('#xtts_length_penalty_output').text(this.settings.length_penalty); + $('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty); + $('#xtts_top_k_output').text(this.settings.top_k); + $('#xtts_top_p_output').text(this.settings.top_p); + $('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size); + saveTtsProviderSettings(); + this.changeTTSSettings(); } async loadSettings(settings) { @@ -126,11 +174,39 @@ class XTTSTtsProvider { } }, 2000); + // Set initial values from the settings $('#xtts_tts_endpoint').val(this.settings.provider_endpoint); - $('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); }); $('#xtts_api_language').val(this.settings.language); - $('#xtts_api_language').on('change', () => { this.onSettingsChange(); }); + $('#xtts_speed').val(this.settings.speed); + $('#xtts_temperature').val(this.settings.temperature); + $('#xtts_length_penalty').val(this.settings.length_penalty); + $('#xtts_repetition_penalty').val(this.settings.repetition_penalty); + $('#xtts_top_k').val(this.settings.top_k); + $('#xtts_top_p').val(this.settings.top_p); + $('#xtts_enable_text_splitting').prop('checked', this.settings.enable_text_splitting); + $('#xtts_stream_chunk_size').val(this.settings.stream_chunk_size); $('#xtts_tts_streaming').prop('checked', this.settings.streaming); + + // Update the UI to reflect changes + $('#xtts_tts_speed_output').text(this.settings.speed); + $('#xtts_tts_temperature_output').text(this.settings.temperature); + $('#xtts_length_penalty_output').text(this.settings.length_penalty); + $('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty); + $('#xtts_top_k_output').text(this.settings.top_k); + $('#xtts_top_p_output').text(this.settings.top_p); + $('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size); + + // Register input/change event listeners to update settings on user interaction + $('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); }); + $('#xtts_api_language').on('change', () => { this.onSettingsChange(); }); + $('#xtts_speed').on('input', () => { this.onSettingsChange(); }); + $('#xtts_temperature').on('input', () => { this.onSettingsChange(); }); + $('#xtts_length_penalty').on('input', () => { this.onSettingsChange(); }); + $('#xtts_repetition_penalty').on('input', () => { this.onSettingsChange(); }); + $('#xtts_top_k').on('input', () => { this.onSettingsChange(); }); + $('#xtts_top_p').on('input', () => { this.onSettingsChange(); }); + $('#xtts_enable_text_splitting').on('change', () => { this.onSettingsChange(); }); + $('#xtts_stream_chunk_size').on('input', () => { this.onSettingsChange(); }); $('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); }); await this.checkReady(); @@ -140,7 +216,7 @@ class XTTSTtsProvider { // Perform a simple readiness check by trying to fetch voiceIds async checkReady() { - await this.fetchTtsVoiceObjects(); + await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]); } async onRefreshClick() { @@ -181,6 +257,35 @@ class XTTSTtsProvider { return responseJson; } + // Each time a parameter is changed, we change the configuration + async changeTTSSettings() { + if (!this.settings.provider_endpoint) { + return; + } + + const response = await doExtrasFetch( + `${this.settings.provider_endpoint}/set_tts_settings`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Cache-Control': 'no-cache', + }, + body: JSON.stringify({ + 'temperature': this.settings.temperature, + 'speed': this.settings.speed, + 'length_penalty': this.settings.length_penalty, + 'repetition_penalty': this.settings.repetition_penalty, + 'top_p': this.settings.top_p, + 'top_k': this.settings.top_k, + 'enable_text_splitting': this.settings.enable_text_splitting, + 'stream_chunk_size': this.settings.stream_chunk_size, + }), + }, + ); + return response; + } + async fetchTtsGeneration(inputText, voiceId) { console.info(`Generating new TTS for voice_id ${voiceId}`);