diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js
index 6736c5bb9..de3041013 100644
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@@ -10,6 +10,7 @@ import { NovelTtsProvider } from './novel.js'
import { power_user } from '../../power-user.js'
import { registerSlashCommand } from '../../slash-commands.js'
import { OpenAITtsProvider } from './openai.js'
+import {XTTSTtsProvider} from "./xtts.js"
export { talkingAnimation };
const UPDATE_INTERVAL = 1000
@@ -70,6 +71,7 @@ export function getPreviewString(lang) {
let ttsProviders = {
ElevenLabs: ElevenLabsTtsProvider,
Silero: SileroTtsProvider,
+ XTTSv2: XTTSTtsProvider,
System: SystemTtsProvider,
Coqui: CoquiTtsProvider,
Edge: EdgeTtsProvider,
diff --git a/public/scripts/extensions/tts/xtts.js b/public/scripts/extensions/tts/xtts.js
new file mode 100644
index 000000000..2d3764ca9
--- /dev/null
+++ b/public/scripts/extensions/tts/xtts.js
@@ -0,0 +1,191 @@
+import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
+import { saveTtsProviderSettings } from "./index.js"
+
+export { XTTSTtsProvider }
+
+class XTTSTtsProvider {
+ //########//
+ // Config //
+ //########//
+
+ settings
+ ready = false
+ voices = []
+ separator = ' .. '
+
+ languageLabels = {
+ "Arabic": "ar",
+ "Brazilian Portuguese": "pt",
+ "Chinese": "zh-cn",
+ "Czech": "cs",
+ "Dutch": "nl",
+ "English": "en",
+ "French": "fr",
+ "German": "de",
+ "Italian": "it",
+ "Polish": "pl",
+ "Russian": "ru",
+ "Spanish": "es",
+ "Turkish": "tr",
+ "Japanese": "ja",
+ "Korean": "ko",
+ "Hungarian": "hu"
+ }
+
+ defaultSettings = {
+ provider_endpoint: "http://localhost:8020",
+ language: "en",
+ voiceMap: {}
+ }
+
+ get settingsHtml() {
+ let html = `
+
+
+
+
+
+ `;
+
+ html += `
+
+
+ Use XTTSv2 TTS Server.
+ `;
+
+ return html;
+ }
+ onSettingsChange() {
+ // Used when provider settings are updated from UI
+ this.settings.provider_endpoint = $('#xtts_tts_endpoint').val()
+ this.settings.language = $('#xtts_api_language').val()
+ saveTtsProviderSettings()
+ }
+
+ async loadSettings(settings) {
+ // Pupulate Provider UI given input settings
+ if (Object.keys(settings).length == 0) {
+ console.info("Using default TTS Provider settings")
+ }
+
+ // Only accept keys defined in defaultSettings
+ this.settings = this.defaultSettings
+
+ for (const key in settings){
+ if (key in this.settings){
+ this.settings[key] = settings[key]
+ } else {
+ throw `Invalid setting passed to TTS Provider: ${key}`
+ }
+ }
+
+ const apiCheckInterval = setInterval(() => {
+ // Use Extras API if TTS support is enabled
+ if (modules.includes('tts') || modules.includes('xtts-tts')) {
+ const baseUrl = new URL(getApiUrl());
+ baseUrl.pathname = '/api/tts';
+ this.settings.provider_endpoint = baseUrl.toString();
+ $('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
+ clearInterval(apiCheckInterval);
+ }
+ }, 2000);
+
+ $('#xtts_tts_endpoint').val(this.settings.provider_endpoint)
+ $('#xtts_tts_endpoint').on("input", () => {this.onSettingsChange()})
+ $('#xtts_api_language').val(this.settings.language)
+ $('#xtts_api_language').on("change", () => {this.onSettingsChange()})
+
+ await this.checkReady()
+
+ console.debug("XTTS: Settings loaded")
+ }
+
+ // Perform a simple readiness check by trying to fetch voiceIds
+ async checkReady(){
+
+ const response = await this.fetchTtsVoiceObjects()
+ }
+
+ async onRefreshClick() {
+ return
+ }
+
+ //#################//
+ // TTS Interfaces //
+ //#################//
+
+ async getVoice(voiceName) {
+ if (this.voices.length == 0) {
+ this.voices = await this.fetchTtsVoiceObjects()
+ }
+ const match = this.voices.filter(
+ XTTSVoice => XTTSVoice.name == voiceName
+ )[0]
+ if (!match) {
+ throw `TTS Voice name ${voiceName} not found`
+ }
+ return match
+ }
+
+ async generateTts(text, voiceId){
+ const response = await this.fetchTtsGeneration(text, voiceId)
+ return response
+ }
+
+ //###########//
+ // API CALLS //
+ //###########//
+ async fetchTtsVoiceObjects() {
+ const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`)
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${await response.json()}`)
+ }
+ const responseJson = await response.json()
+ return responseJson
+ }
+
+ async fetchTtsGeneration(inputText, voiceId) {
+ console.info(`Generating new TTS for voice_id ${voiceId}`)
+ const response = await doExtrasFetch(
+ `${this.settings.provider_endpoint}/tts_to_audio/`,
+ {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Cache-Control': 'no-cache' // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
+ },
+ body: JSON.stringify({
+ "text": inputText,
+ "speaker_wav": voiceId,
+ "language": this.settings.language
+ })
+ }
+ )
+ if (!response.ok) {
+ toastr.error(response.statusText, 'TTS Generation Failed');
+ throw new Error(`HTTP ${response.status}: ${await response.text()}`);
+ }
+ return response
+ }
+
+ // Interface not used by XTTS TTS
+ async fetchTtsFromHistory(history_item_id) {
+ return Promise.resolve(history_item_id);
+ }
+
+}