diff --git a/public/scripts/extensions/tts/elevenlabs.js b/public/scripts/extensions/tts/elevenlabs.js index f7dc9584b..0429f5ea8 100644 --- a/public/scripts/extensions/tts/elevenlabs.js +++ b/public/scripts/extensions/tts/elevenlabs.js @@ -5,48 +5,24 @@ class ElevenLabsTtsProvider { // Config // //########// - API_KEY - settings = this.defaultSettings + settings voices = [] - - set API_KEY(apiKey) { - this.API_KEY = apiKey - } - get API_KEY() { - return this.API_KEY - } + get settings() { return this.settings } - updateSettings(settings) { - console.info("Settings updated") - if("stability" in settings && "similarity_boost" in settings){ - this.settings = settings - $('#elevenlabs_tts_stability').val(this.settings.stability) - $('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost) - this.onSettingsChange() - } else { - throw `Invalid settings passed to ElevenLabs: ${JSON.stringify(settings)}` - } - } - defaultSettings = { stability: 0.75, - similarity_boost: 0.75 - } - - onSettingsChange() { - this.settings = { - stability: $('#elevenlabs_tts_stability').val(), - similarity_boost: $('#elevenlabs_tts_similarity_boost').val() - } - $('#elevenlabs_tts_stability_output').text(this.settings.stability) - $('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost) + similarity_boost: 0.75, + apiKey: "", + voiceMap: {} } get settingsHtml() { let html = ` + + @@ -55,9 +31,58 @@ class ElevenLabsTtsProvider { return html } - //#############// - // Management // - //#############// + onSettingsChange() { + // Update dynamically + this.settings.stability = $('#elevenlabs_tts_stability').val() + this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val() + } + + + loadSettings(settings) { + // Pupulate Provider UI given input settings + if (Object.keys(settings).length == 0) { + console.info("Using default TTS Provider settings") + } + + // Only accept keys defined in defaultSettings + this.settings = this.defaultSettings + + for (const key in settings){ + if (key in this.settings){ + this.settings[key] = settings[key] + } else { + throw `Invalid setting passed to TTS Provider: ${key}` + } + } + + $('#elevenlabs_tts_stability').val(this.settings.stability) + $('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost) + $('#elevenlabs_tts_api_key').val(this.settings.apiKey) + console.info("Settings loaded") + } + + async onApplyClick() { + // Update on Apply click + return await this.updateApiKey().catch( (error) => { + throw error + }) + } + + + async updateApiKey() { + // Using this call to validate API key + this.settings.apiKey = $('#elevenlabs_tts_api_key').val() + + await this.fetchTtsVoiceIds().catch(error => { + throw `TTS API key validation failed` + }) + this.settings.apiKey = this.settings.apiKey + console.debug(`Saved new API_KEY: ${this.settings.apiKey}`) + } + + //#################// + // TTS Interfaces // + //#################// async getVoice(voiceName) { if (this.voices.length == 0) { @@ -72,6 +97,25 @@ class ElevenLabsTtsProvider { return match } + + async generateTts(text, voiceId){ + const historyId = await this.findTtsGenerationInHistory(text, voiceId) + + let response + if (historyId) { + console.debug(`Found existing TTS generation with id ${historyId}`) + response = await this.fetchTtsFromHistory(historyId) + } else { + console.debug(`No existing TTS generation found, requesting new generation`) + response = await this.fetchTtsGeneration(text, voiceId) + } + return response + } + + //###################// + // Helper Functions // + //###################// + async findTtsGenerationInHistory(message, voiceId) { const ttsHistory = await this.fetchTtsHistory() for (const history of ttsHistory) { @@ -85,12 +129,13 @@ class ElevenLabsTtsProvider { return '' } + //###########// // API CALLS // //###########// async fetchTtsVoiceIds() { const headers = { - 'xi-api-key': this.API_KEY + 'xi-api-key': this.settings.apiKey } const response = await fetch(`https://api.elevenlabs.io/v1/voices`, { headers: headers @@ -104,7 +149,7 @@ class ElevenLabsTtsProvider { async fetchTtsVoiceSettings() { const headers = { - 'xi-api-key': this.API_KEY + 'xi-api-key': this.settings.apiKey } const response = await fetch( `https://api.elevenlabs.io/v1/voices/settings/default`, @@ -125,7 +170,7 @@ class ElevenLabsTtsProvider { { method: 'POST', headers: { - 'xi-api-key': this.API_KEY, + 'xi-api-key': this.settings.apiKey, 'Content-Type': 'application/json' }, body: JSON.stringify({ @@ -146,7 +191,7 @@ class ElevenLabsTtsProvider { `https://api.elevenlabs.io/v1/history/${history_item_id}/audio`, { headers: { - 'xi-api-key': this.API_KEY + 'xi-api-key': this.settings.apiKey } } ) @@ -158,7 +203,7 @@ class ElevenLabsTtsProvider { async fetchTtsHistory() { const headers = { - 'xi-api-key': this.API_KEY + 'xi-api-key': this.settings.apiKey } const response = await fetch(`https://api.elevenlabs.io/v1/history`, { headers: headers diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 3483d290f..b35e0b1ee 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -2,6 +2,7 @@ import { callPopup, saveSettingsDebounced } from '../../../script.js' import { extension_settings, getContext } from '../../extensions.js' import { getStringHash } from '../../utils.js' import { ElevenLabsTtsProvider } from './elevenlabs.js' +import { SileroTtsProvider } from './silerotts.js' const UPDATE_INTERVAL = 1000 @@ -15,7 +16,8 @@ let lastMessageHash = null let ttsProviders = { - elevenLabs: ElevenLabsTtsProvider + ElevenLabs: ElevenLabsTtsProvider, + Silero: SileroTtsProvider } let ttsProvider let ttsProviderName @@ -130,6 +132,30 @@ async function onTtsVoicesClick() { callPopup(popupText, 'text') } +function updateUiAudioPlayState() { + if (extension_settings.tts.enabled == true) { + audioControl.style.display = 'flex' + const img = !audioElement.paused + ? 'fa-solid fa-circle-pause' + : 'fa-solid fa-circle-play' + audioControl.className = img + } else { + audioControl.style.display = 'none' + } +} + +function onAudioControlClicked() { + audioElement.paused ? audioElement.play() : audioElement.pause() + updateUiAudioPlayState() +} + +function addAudioControl() { + $('#send_but_sheld').prepend('
') + $('#send_but_sheld').on('click', onAudioControlClicked) + audioControl = document.getElementById('tts_media_control') + updateUiAudioPlayState() +} + function completeCurrentAudioJob() { queueProcessorReady = true lastAudioPosition = 0 @@ -142,7 +168,7 @@ function completeCurrentAudioJob() { */ async function addAudioJob(response) { const audioData = await response.blob() - if (audioData.type != 'audio/mpeg') { + if (!audioData.type in ['audio/mpeg', 'audio/wav']) { throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` } audioJobQueue.push(audioData) @@ -188,16 +214,7 @@ function saveLastValues() { } async function tts(text, voiceId) { - const historyId = await ttsProvider.findTtsGenerationInHistory(text, voiceId) - - let response - if (historyId) { - console.debug(`Found existing TTS generation with id ${historyId}`) - response = await ttsProvider.fetchTtsFromHistory(historyId) - } else { - console.debug(`No existing TTS generation found, requesting new generation`) - response = await ttsProvider.fetchTtsGeneration(text, voiceId) - } + const response = await ttsProvider.generateTts(text, voiceId) addAudioJob(response) completeTtsJob() } @@ -242,32 +259,20 @@ window.playFullConversation = playFullConversation //#############################// function loadSettings() { - if (!(ttsProviderName in extension_settings.tts)){ - extension_settings.tts[ttsProviderName] = {} + if (Object.keys(extension_settings.tts).length === 0) { + Object.assign(extension_settings.tts, defaultSettings) } - if (Object.keys(extension_settings.tts[ttsProviderName]).length === 0) { - Object.assign(extension_settings.tts[ttsProviderName], defaultSettings) - extension_settings.tts[ttsProviderName].settings = Object.assign({}, ttsProvider.defaultSettings) - } - - $('#tts_api_key').val( - extension_settings.tts[ttsProviderName].apiKey - ) - $('#tts_voice_map').val( - extension_settings.tts[ttsProviderName].voiceMap - ) $('#tts_enabled').prop( 'checked', extension_settings.tts.enabled ) - ttsProvider.updateSettings(extension_settings.tts[ttsProviderName].settings) - onApplyClick() } const defaultSettings = { - apiKey: '', voiceMap: '', - ttsEnabled: false + ttsEnabled: false, + currentProvider: "ElevenLabs" + } function setTtsStatus(status, success) { @@ -279,21 +284,6 @@ function setTtsStatus(status, success) { } } -async function updateApiKey() { - const value = $('#tts_api_key').val() - - // Using this call to validate API key - ttsProvider.API_KEY = String(value) - await ttsProvider.fetchTtsVoiceIds().catch(error => { - ttsProvider.API_KEY = null - throw `TTS API key invalid` - }) - - extension_settings.tts[ttsProviderName].apiKey = String(value) - console.debug(`Saved new API_KEY: ${value}`) - saveSettingsDebounced() -} - function parseVoiceMap(voiceMapString) { let parsedVoiceMap = {} for (const [charName, voiceId] of voiceMapString @@ -323,13 +313,14 @@ async function voicemapIsValid(parsedVoiceMap) { async function updateVoiceMap() { let isValidResult = false const context = getContext() - // console.debug("onvoiceMapSubmit"); + const value = $('#tts_voice_map').val() const parsedVoiceMap = parseVoiceMap(value) + isValidResult = await voicemapIsValid(parsedVoiceMap) if (isValidResult) { - extension_settings.tts[ttsProviderName].voiceMap = String(value) - context.voiceMap = String(value) + ttsProvider.settings.voiceMap = String(value) + // console.debug(`ttsProvider.voiceMap: ${ttsProvider.settings.voiceMap}`) voiceMap = parsedVoiceMap console.debug(`Saved new voiceMap: ${value}`) saveSettingsDebounced() @@ -339,14 +330,18 @@ async function updateVoiceMap() { } function onApplyClick() { - Promise.all([updateApiKey(), updateVoiceMap()]) - .then(([result1, result2]) => { - updateUiAudioPlayState() - setTtsStatus('Successfully applied settings', true) - }) - .catch(error => { - setTtsStatus(error, false) - }) + Promise.all([ + ttsProvider.onApplyClick(), + updateVoiceMap() + ]).catch(error => { + console.error(error) + setTtsStatus(error, false) + }) + + extension_settings.tts[ttsProviderName] = ttsProvider.settings + saveSettingsDebounced() + setTtsStatus('Successfully applied settings', true) + console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`) } function onEnableClick() { @@ -357,49 +352,62 @@ function onEnableClick() { saveSettingsDebounced() } -function updateUiAudioPlayState() { - if (extension_settings.tts.enabled == true) { - audioControl.style.display = 'flex' - const img = !audioElement.paused - ? 'fa-solid fa-circle-pause' - : 'fa-solid fa-circle-play' - audioControl.className = img - } else { - audioControl.style.display = 'none' + +//##############// +// TTS Provider // +//##############// + +function loadTtsProvider(provider) { + //Clear the current config and add new config + $("#tts_provider_settings").html("") + + if (!provider) { + provider } -} - -function onAudioControlClicked() { - audioElement.paused ? audioElement.play() : audioElement.pause() - updateUiAudioPlayState() -} - -function addAudioControl() { - $('#send_but_sheld').prepend('
') - $('#send_but_sheld').on('click', onAudioControlClicked) - audioControl = document.getElementById('tts_media_control') - updateUiAudioPlayState() -} - -function addUiTtsProviderConfig() { - $('#tts_provider_settings').append(ttsProvider.settingsHtml) - ttsProvider.onSettingsChange() -} - -function loadTtsProvider(provider){ - // Set up provider references. No init dependencies + // Init provider references extension_settings.tts.currentProvider = provider ttsProviderName = provider ttsProvider = new ttsProviders[provider] - saveSettingsDebounced() + + // Init provider settings + $('#tts_provider_settings').append(ttsProvider.settingsHtml) + if (!(ttsProviderName in extension_settings.tts)) { + console.warn(`Provider ${ttsProviderName} not in Extension Settings, initiatilizing provider in settings`) + extension_settings.tts[ttsProviderName] = {} + } + + // Load voicemap settings + let voiceMapFromSettings + if ("voiceMap" in extension_settings.tts[ttsProviderName]) { + voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap + voiceMap = parseVoiceMap(voiceMapFromSettings) + } else { + voiceMapFromSettings = "" + voiceMap = {} + } + $('#tts_voice_map').val(voiceMapFromSettings) + $('#tts_provider').val(ttsProviderName) + + ttsProvider.loadSettings(extension_settings.tts[ttsProviderName]) } -function onTtsProviderSettingsInput(){ - ttsProvider.onSettingsChange() - extension_settings.tts[ttsProviderName].settings = ttsProvider.settings - saveSettingsDebounced() +function onTtsProviderChange() { + const ttsProviderSelection = $('#tts_provider').val() + loadTtsProvider(ttsProviderSelection) } +function onTtsProviderSettingsInput() { + ttsProvider.onSettingsChange() + + // Persist changes to SillyTavern tts extension settings + + extension_settings.tts[ttsProviderName] = ttsProvider.setttings + saveSettingsDebounced() + console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`) +} + + + $(document).ready(function () { function addExtensionControls() { const settingsHtml = ` @@ -410,14 +418,10 @@ $(document).ready(function () {
- - - - -
- - +
+ Select TTS Provider
+
+ + +
- -
-
- TTS Config -
+
+
+
+ +
-
-
@@ -445,11 +451,14 @@ $(document).ready(function () { $('#tts_enabled').on('click', onEnableClick) $('#tts_voices').on('click', onTtsVoicesClick) $('#tts_provider_settings').on('input', onTtsProviderSettingsInput) + for (const provider in ttsProviders) { + $('#tts_provider').append($("