diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js
index 1ac1edd8b..c54458efe 100644
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@@ -11,6 +11,7 @@ import { power_user } from '../../power-user.js';
import { OpenAITtsProvider } from './openai.js';
import { XTTSTtsProvider } from './xtts.js';
import { GSVITtsProvider } from './gsvi.js';
+import { SBVits2TtsProvider } from './sbvits2.js';
import { AllTalkTtsProvider } from './alltalk.js';
import { SpeechT5TtsProvider } from './speecht5.js';
import { AzureTtsProvider } from './azure.js';
@@ -77,6 +78,7 @@ const ttsProviders = {
Silero: SileroTtsProvider,
XTTSv2: XTTSTtsProvider,
GSVI: GSVITtsProvider,
+ SBVits2: SBVits2TtsProvider,
System: SystemTtsProvider,
Coqui: CoquiTtsProvider,
Edge: EdgeTtsProvider,
diff --git a/public/scripts/extensions/tts/sbvits2.js b/public/scripts/extensions/tts/sbvits2.js
new file mode 100644
index 000000000..85fe39051
--- /dev/null
+++ b/public/scripts/extensions/tts/sbvits2.js
@@ -0,0 +1,339 @@
+import { getPreviewString, saveTtsProviderSettings } from './index.js';
+
+export { SBVits2TtsProvider };
+
+class SBVits2TtsProvider {
+ //########//
+ // Config //
+ //########//
+
+ settings;
+ ready = false;
+ voices = [];
+ separator = '. ';
+ audioElement = document.createElement('audio');
+
+ /**
+ * Perform any text processing before passing to TTS engine.
+ * @param {string} text Input text
+ * @returns {string} Processed text
+ */
+ processText(text) {
+ return text;
+ }
+
+ languageLabels = {
+ 'Chinese': 'ZH',
+ 'English': 'EN',
+ 'Japanese': 'JP',
+ };
+
+ langKey2LangCode = {
+ 'ZH': 'zh-CN',
+ 'EN': 'en-US',
+ 'JP': 'ja-JP',
+ };
+
+ defaultSettings = {
+ provider_endpoint: 'http://localhost:5000',
+ sdp_ratio: 0.2,
+ noise: 0.6,
+ noisew: 0.8,
+ length: 1,
+ language: 'JP',
+ auto_split: true,
+ split_interval: 0.5,
+ assist_text: '',
+ assist_text_weight: 1,
+ style: 'Neutral',
+ style_weight: 1,
+ reference_audio_path: '',
+ };
+
+ get settingsHtml() {
+ let html = `
+
+
+
+
+
+ Use Style-Bert-VITS2 API Server.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ `;
+
+ return html;
+ }
+
+ onSettingsChange() {
+ // Used when provider settings are updated from UI
+ this.settings.provider_endpoint = $('#sbvits_tts_endpoint').val();
+ this.settings.language = $('#sbvits_api_language').val();
+ this.settings.assist_text = $('#sbvits_assist_text').val();
+ this.settings.reference_audio_path = $('#sbvits_reference_audio_path').val();
+
+ // Update the default TTS settings based on input fields
+ this.settings.sdp_ratio = $('#sbvits_sdp_ratio').val();
+ this.settings.noise = $('#sbvits_noise').val();
+ this.settings.noisew = $('#sbvits_noisew').val();
+ this.settings.length = $('#sbvits_length').val();
+ this.settings.auto_split = $('#sbvits_auto_split').is(':checked');
+ this.settings.split_interval = $('#sbvits_split_interval').val();
+ this.settings.assist_text_weight = $('#sbvits_assist_text_weight').val();
+ this.settings.style_weight = $('#sbvits_style_weight').val();
+
+ // Update the UI to reflect changes
+ $('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
+ $('#sbvits_noise_output').text(this.settings.noise);
+ $('#sbvits_noisew_output').text(this.settings.noisew);
+ $('#sbvits_length_output').text(this.settings.length);
+ $('#sbvits_split_interval_output').text(this.settings.split_interval);
+ $('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
+ $('#sbvits_style_weight_output').text(this.settings.style_weight);
+
+ saveTtsProviderSettings();
+ this.changeTTSSettings();
+ }
+
+ async loadSettings(settings) {
+ // Pupulate Provider UI given input settings
+ if (Object.keys(settings).length == 0) {
+ console.info('Using default TTS Provider settings');
+ }
+
+ // Only accept keys defined in defaultSettings
+ this.settings = this.defaultSettings;
+
+ for (const key in settings) {
+ if (key in this.settings) {
+ this.settings[key] = settings[key];
+ } else {
+ console.debug(`Ignoring non-user-configurable setting: ${key}`);
+ }
+ }
+
+ // Set initial values from the settings
+ $('#sbvits_tts_endpoint').val(this.settings.provider_endpoint);
+ $('#sbvits_api_language').val(this.settings.language);
+ $('#sbvits_assist_text').val(this.settings.assist_text);
+ $('#sbvits_reference_audio_path').val(this.settings.reference_audio_path);
+ $('#sbvits_sdp_ratio').val(this.settings.sdp_ratio);
+ $('#sbvits_noise').val(this.settings.noise);
+ $('#sbvits_noisew').val(this.settings.noisew);
+ $('#sbvits_length').val(this.settings.length);
+ $('#sbvits_auto_split').prop('checked', this.settings.auto_split);
+ $('#sbvits_split_interval').val(this.settings.split_interval);
+ $('#sbvits_assist_text_weight').val(this.settings.assist_text_weight);
+ $('#sbvits_style_weight').val(this.settings.style_weight);
+
+ // Update the UI to reflect changes
+ $('#sbvits_sdp_ratio_output').text(this.settings.sdp_ratio);
+ $('#sbvits_noise_output').text(this.settings.noise);
+ $('#sbvits_noisew_output').text(this.settings.noisew);
+ $('#sbvits_length_output').text(this.settings.length);
+ $('#sbvits_split_interval_output').text(this.settings.split_interval);
+ $('#sbvits_assist_text_weight_output').text(this.settings.assist_text_weight);
+ $('#sbvits_style_weight_output').text(this.settings.style_weight);
+
+ // Register input/change event listeners to update settings on user interaction
+ $('#sbvits_tts_endpoint').on('input', () => { this.onSettingsChange(); });
+ $('#sbvits_api_language').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_assist_text').on('input', () => { this.onSettingsChange(); });
+ $('#sbvits_reference_audio_path').on('input', () => { this.onSettingsChange(); });
+ $('#sbvits_sdp_ratio').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_noise').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_noisew').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_length').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_auto_split').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_split_interval').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_assist_text_weight').on('change', () => { this.onSettingsChange(); });
+ $('#sbvits_style_weight').on('change', () => { this.onSettingsChange(); });
+
+ await this.checkReady();
+
+ console.info('SBVits2: Settings loaded');
+ }
+
+ // Perform a simple readiness check by trying to fetch voiceIds
+ async checkReady() {
+ await Promise.allSettled([this.fetchTtsVoiceObjects(), this.changeTTSSettings()]);
+ }
+
+ async onRefreshClick() {
+ return;
+ }
+
+ //#################//
+ // TTS Interfaces //
+ //#################//
+
+ /**
+ * Get a voice from the TTS provider.
+ * @param {string} voiceName Voice name to get
+ * @returns {Promise