From bcad0d4e512a4d58ba2d3a77ef419db737095fef Mon Sep 17 00:00:00 2001 From: Danil Boldyrev Date: Tue, 21 Nov 2023 13:16:56 +0300 Subject: [PATCH] add XTTS --- public/scripts/extensions/tts/index.js | 2 + public/scripts/extensions/tts/xtts.js | 191 +++++++++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 public/scripts/extensions/tts/xtts.js diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 6736c5bb9..de3041013 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -10,6 +10,7 @@ import { NovelTtsProvider } from './novel.js' import { power_user } from '../../power-user.js' import { registerSlashCommand } from '../../slash-commands.js' import { OpenAITtsProvider } from './openai.js' +import {XTTSTtsProvider} from "./xtts.js" export { talkingAnimation }; const UPDATE_INTERVAL = 1000 @@ -70,6 +71,7 @@ export function getPreviewString(lang) { let ttsProviders = { ElevenLabs: ElevenLabsTtsProvider, Silero: SileroTtsProvider, + XTTSv2: XTTSTtsProvider, System: SystemTtsProvider, Coqui: CoquiTtsProvider, Edge: EdgeTtsProvider, diff --git a/public/scripts/extensions/tts/xtts.js b/public/scripts/extensions/tts/xtts.js new file mode 100644 index 000000000..2d3764ca9 --- /dev/null +++ b/public/scripts/extensions/tts/xtts.js @@ -0,0 +1,191 @@ +import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js" +import { saveTtsProviderSettings } from "./index.js" + +export { XTTSTtsProvider } + +class XTTSTtsProvider { + //########// + // Config // + //########// + + settings + ready = false + voices = [] + separator = ' .. ' + + languageLabels = { + "Arabic": "ar", + "Brazilian Portuguese": "pt", + "Chinese": "zh-cn", + "Czech": "cs", + "Dutch": "nl", + "English": "en", + "French": "fr", + "German": "de", + "Italian": "it", + "Polish": "pl", + "Russian": "ru", + "Spanish": "es", + "Turkish": "tr", + "Japanese": "ja", + "Korean": "ko", + "Hungarian": "hu" + } + + defaultSettings = { + provider_endpoint: "http://localhost:8020", + language: "en", + voiceMap: {} + } + + get settingsHtml() { + let html = ` + + + + + + `; + + html += ` + + + Use XTTSv2 TTS Server. + `; + + return html; + } + onSettingsChange() { + // Used when provider settings are updated from UI + this.settings.provider_endpoint = $('#xtts_tts_endpoint').val() + this.settings.language = $('#xtts_api_language').val() + saveTtsProviderSettings() + } + + async loadSettings(settings) { + // Pupulate Provider UI given input settings + if (Object.keys(settings).length == 0) { + console.info("Using default TTS Provider settings") + } + + // Only accept keys defined in defaultSettings + this.settings = this.defaultSettings + + for (const key in settings){ + if (key in this.settings){ + this.settings[key] = settings[key] + } else { + throw `Invalid setting passed to TTS Provider: ${key}` + } + } + + const apiCheckInterval = setInterval(() => { + // Use Extras API if TTS support is enabled + if (modules.includes('tts') || modules.includes('xtts-tts')) { + const baseUrl = new URL(getApiUrl()); + baseUrl.pathname = '/api/tts'; + this.settings.provider_endpoint = baseUrl.toString(); + $('#xtts_tts_endpoint').val(this.settings.provider_endpoint); + clearInterval(apiCheckInterval); + } + }, 2000); + + $('#xtts_tts_endpoint').val(this.settings.provider_endpoint) + $('#xtts_tts_endpoint').on("input", () => {this.onSettingsChange()}) + $('#xtts_api_language').val(this.settings.language) + $('#xtts_api_language').on("change", () => {this.onSettingsChange()}) + + await this.checkReady() + + console.debug("XTTS: Settings loaded") + } + + // Perform a simple readiness check by trying to fetch voiceIds + async checkReady(){ + + const response = await this.fetchTtsVoiceObjects() + } + + async onRefreshClick() { + return + } + + //#################// + // TTS Interfaces // + //#################// + + async getVoice(voiceName) { + if (this.voices.length == 0) { + this.voices = await this.fetchTtsVoiceObjects() + } + const match = this.voices.filter( + XTTSVoice => XTTSVoice.name == voiceName + )[0] + if (!match) { + throw `TTS Voice name ${voiceName} not found` + } + return match + } + + async generateTts(text, voiceId){ + const response = await this.fetchTtsGeneration(text, voiceId) + return response + } + + //###########// + // API CALLS // + //###########// + async fetchTtsVoiceObjects() { + const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.json()}`) + } + const responseJson = await response.json() + return responseJson + } + + async fetchTtsGeneration(inputText, voiceId) { + console.info(`Generating new TTS for voice_id ${voiceId}`) + const response = await doExtrasFetch( + `${this.settings.provider_endpoint}/tts_to_audio/`, + { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Cache-Control': 'no-cache' // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23 + }, + body: JSON.stringify({ + "text": inputText, + "speaker_wav": voiceId, + "language": this.settings.language + }) + } + ) + if (!response.ok) { + toastr.error(response.statusText, 'TTS Generation Failed'); + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + return response + } + + // Interface not used by XTTS TTS + async fetchTtsFromHistory(history_item_id) { + return Promise.resolve(history_item_id); + } + +}