diff --git a/public/scripts/extensions/tts/xtts.js b/public/scripts/extensions/tts/xtts.js
index 1f54317c1..1408cae2d 100644
--- a/public/scripts/extensions/tts/xtts.js
+++ b/public/scripts/extensions/tts/xtts.js
@@ -51,6 +51,14 @@ class XTTSTtsProvider {
defaultSettings = {
provider_endpoint: 'http://localhost:8020',
language: 'en',
+ temperature: 0.75,
+ length_penalty: 1.0,
+ repetition_penalty: 5.0,
+ top_k: 50,
+ top_p: 0.85,
+ speed: 1,
+ enable_text_splitting: true,
+ stream_chunk_size: 100,
voiceMap: {},
streaming: false,
};
@@ -60,9 +68,7 @@ class XTTSTtsProvider {
+
+ Use XTTSv2 TTS Server.
- `;
+
+
- html += `
+
+
-
- Use XTTSv2 TTS Server.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
`;
return html;
}
+
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val();
this.settings.language = $('#xtts_api_language').val();
+
+ // Update the default TTS settings based on input fields
+ this.settings.speed = $('#xtts_speed').val();
+ this.settings.temperature = $('#xtts_temperature').val();
+ this.settings.length_penalty = $('#xtts_length_penalty').val();
+ this.settings.repetition_penalty = $('#xtts_repetition_penalty').val();
+ this.settings.top_k = $('#xtts_top_k').val();
+ this.settings.top_p = $('#xtts_top_p').val();
+ this.settings.stream_chunk_size = $('#xtts_stream_chunk_size').val();
+ this.settings.enable_text_splitting = $('#xtts_enable_text_splitting').is(':checked');
this.settings.streaming = $('#xtts_tts_streaming').is(':checked');
+
+ // Update the UI to reflect changes
+ $('#xtts_tts_speed_output').text(this.settings.speed);
+ $('#xtts_tts_temperature_output').text(this.settings.temperature);
+ $('#xtts_length_penalty_output').text(this.settings.length_penalty);
+ $('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
+ $('#xtts_top_k_output').text(this.settings.top_k);
+ $('#xtts_top_p_output').text(this.settings.top_p);
+ $('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
+
saveTtsProviderSettings();
+ this.changeTTSSettings();
}
async loadSettings(settings) {
@@ -126,11 +174,39 @@ class XTTSTtsProvider {
}
}, 2000);
+ // Set initial values from the settings
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
- $('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
$('#xtts_api_language').val(this.settings.language);
- $('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
+ $('#xtts_speed').val(this.settings.speed);
+ $('#xtts_temperature').val(this.settings.temperature);
+ $('#xtts_length_penalty').val(this.settings.length_penalty);
+ $('#xtts_repetition_penalty').val(this.settings.repetition_penalty);
+ $('#xtts_top_k').val(this.settings.top_k);
+ $('#xtts_top_p').val(this.settings.top_p);
+ $('#xtts_enable_text_splitting').prop('checked', this.settings.enable_text_splitting);
+ $('#xtts_stream_chunk_size').val(this.settings.stream_chunk_size);
$('#xtts_tts_streaming').prop('checked', this.settings.streaming);
+
+ // Update the UI to reflect changes
+ $('#xtts_tts_speed_output').text(this.settings.speed);
+ $('#xtts_tts_temperature_output').text(this.settings.temperature);
+ $('#xtts_length_penalty_output').text(this.settings.length_penalty);
+ $('#xtts_repetition_penalty_output').text(this.settings.repetition_penalty);
+ $('#xtts_top_k_output').text(this.settings.top_k);
+ $('#xtts_top_p_output').text(this.settings.top_p);
+ $('#xtts_stream_chunk_size_output').text(this.settings.stream_chunk_size);
+
+ // Register input/change event listeners to update settings on user interaction
+ $('#xtts_tts_endpoint').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_api_language').on('change', () => { this.onSettingsChange(); });
+ $('#xtts_speed').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_temperature').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_length_penalty').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_repetition_penalty').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_top_k').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_top_p').on('input', () => { this.onSettingsChange(); });
+ $('#xtts_enable_text_splitting').on('change', () => { this.onSettingsChange(); });
+ $('#xtts_stream_chunk_size').on('input', () => { this.onSettingsChange(); });
$('#xtts_tts_streaming').on('change', () => { this.onSettingsChange(); });
await this.checkReady();
@@ -140,7 +216,7 @@ class XTTSTtsProvider {
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
- await this.fetchTtsVoiceObjects();
+ await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
}
async onRefreshClick() {
@@ -181,6 +257,35 @@ class XTTSTtsProvider {
return responseJson;
}
+ // Each time a parameter is changed, we change the configuration
+ async changeTTSSettings() {
+ if (!this.settings.provider_endpoint) {
+ return;
+ }
+
+ const response = await doExtrasFetch(
+ `${this.settings.provider_endpoint}/set_tts_settings`,
+ {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Cache-Control': 'no-cache',
+ },
+ body: JSON.stringify({
+ 'temperature': this.settings.temperature,
+ 'speed': this.settings.speed,
+ 'length_penalty': this.settings.length_penalty,
+ 'repetition_penalty': this.settings.repetition_penalty,
+ 'top_p': this.settings.top_p,
+ 'top_k': this.settings.top_k,
+ 'enable_text_splitting': this.settings.enable_text_splitting,
+ 'stream_chunk_size': this.settings.stream_chunk_size,
+ }),
+ },
+ );
+ return response;
+ }
+
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);