diff --git a/public/scripts/extensions/elevenlabstts/elevenlabs.js b/public/scripts/extensions/elevenlabstts/elevenlabs.js new file mode 100644 index 000000000..e033de663 --- /dev/null +++ b/public/scripts/extensions/elevenlabstts/elevenlabs.js @@ -0,0 +1,102 @@ +export { ElevenLabsTtsProvider } + +class ElevenLabsTtsProvider { + API_KEY + set API_KEY(apiKey) { + this.API_KEY = apiKey + } + get API_KEY() { + return this.API_KEY + } + async fetchTtsVoiceIds() { + const headers = { + 'xi-api-key': this.API_KEY + } + const response = await fetch(`https://api.elevenlabs.io/v1/voices`, { + headers: headers + }) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.json()}`) + } + const responseJson = await response.json() + return responseJson.voices + } + + async fetchTtsVoiceSettings() { + const headers = { + 'xi-api-key': this.API_KEY + } + const response = await fetch( + `https://api.elevenlabs.io/v1/voices/settings/default`, + { + headers: headers + } + ) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.json()}`) + } + return response.json() + } + + async fetchTtsGeneration(text, voiceId) { + console.info(`Generating new TTS for voice_id ${voiceId}`) + const response = await fetch( + `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, + { + method: 'POST', + headers: { + 'xi-api-key': this.API_KEY, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ text: text }) + } + ) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.json()}`) + } + return response + } + + async fetchTtsFromHistory(history_item_id) { + console.info(`Fetched existing TTS with history_item_id ${history_item_id}`) + const response = await fetch( + `https://api.elevenlabs.io/v1/history/${history_item_id}/audio`, + { + headers: { + 'xi-api-key': this.API_KEY + } + } + ) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.json()}`) + } + return response + } + + async fetchTtsHistory() { + const headers = { + 'xi-api-key': this.API_KEY + } + const response = await fetch(`https://api.elevenlabs.io/v1/history`, { + headers: headers + }) + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.json()}`) + } + const responseJson = await response.json() + return responseJson.history + } + + async findTtsGenerationInHistory(message, voiceId) { + const ttsHistory = await this.fetchTtsHistory() + for (const history of ttsHistory) { + const text = history.text + const itemId = history.history_item_id + if (message === text && history.voice_id == voiceId) { + console.info(`Existing TTS history item ${itemId} found: ${text} `) + return itemId + } + } + return '' + } +} diff --git a/public/scripts/extensions/elevenlabstts/index.js b/public/scripts/extensions/elevenlabstts/index.js index 9643142dc..03a390c09 100644 --- a/public/scripts/extensions/elevenlabstts/index.js +++ b/public/scripts/extensions/elevenlabstts/index.js @@ -1,160 +1,80 @@ -import { callPopup, saveSettingsDebounced } from "../../../script.js"; -import { extension_settings, getContext } from "../../extensions.js"; -import { getStringHash } from "../../utils.js"; +import { callPopup, saveSettingsDebounced } from '../../../script.js' +import { extension_settings, getContext } from '../../extensions.js' +import { getStringHash } from '../../utils.js' +import { ElevenLabsTtsProvider } from './elevenlabs.js' -const UPDATE_INTERVAL = 1000; -let API_KEY +const UPDATE_INTERVAL = 1000 let voiceMap = {} // {charName:voiceid, charName2:voiceid2} let elevenlabsTtsVoices = [] let audioControl +let lastCharacterId = null +let lastGroupId = null +let lastChatId = null +let lastMessageHash = null -let lastCharacterId = null; -let lastGroupId = null; -let lastChatId = null; -let lastMessageHash = null; - +let ttsProvider = new ElevenLabsTtsProvider() async function moduleWorker() { // Primarily determinign when to add new chat to the TTS queue - const enabled = $("#elevenlabs_enabled").is(':checked'); + const enabled = $('#elevenlabs_enabled').is(':checked') if (!enabled) { - return; + return } const context = getContext() - const chat = context.chat; + const chat = context.chat - processTtsQueue(); - processAudioJobQueue(); - updateUiAudioPlayState(); + processTtsQueue() + processAudioJobQueue() + updateUiAudioPlayState() - // no characters or group selected + // no characters or group selected if (!context.groupId && !context.characterId) { - return; + return } // Chat/character/group changed - if ((context.groupId && lastGroupId !== context.groupId) || (context.characterId !== lastCharacterId) || (context.chatId !== lastChatId)) { + if ( + (context.groupId && lastGroupId !== context.groupId) || + context.characterId !== lastCharacterId || + context.chatId !== lastChatId + ) { currentMessageNumber = context.chat.length ? context.chat.length : 0 - saveLastValues(); - return; + saveLastValues() + return } // take the count of messages - let lastMessageNumber = context.chat.length ? context.chat.length : 0; + let lastMessageNumber = context.chat.length ? context.chat.length : 0 // There's no new messages - let diff = lastMessageNumber - currentMessageNumber; - let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? ''); + let diff = lastMessageNumber - currentMessageNumber + let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '') if (diff == 0 && hashNew === lastMessageHash) { - return; + return } - const message = chat[chat.length - 1]; + const message = chat[chat.length - 1] // We're currently swiping or streaming. Don't generate voice - if (message.mes === '...' || (context.streamingProcessor && !context.streamingProcessor.isFinished)) { - return; + if ( + message.mes === '...' || + (context.streamingProcessor && !context.streamingProcessor.isFinished) + ) { + return } // New messages, add new chat to history - lastMessageHash = hashNew; - currentMessageNumber = lastMessageNumber; + lastMessageHash = hashNew + currentMessageNumber = lastMessageNumber - console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`); - ttsJobQueue.push(message); -} - - -//#################// -// TTS API Calls // -//#################// - -async function fetchTtsVoiceIds() { - const headers = { - 'xi-api-key': API_KEY - }; - const response = await fetch(`https://api.elevenlabs.io/v1/voices`, { - headers: headers - }); - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${await response.json()}`); - } - const responseJson = await response.json(); - return responseJson.voices; -} - -async function fetchTtsVoiceSettings() { - const headers = { - 'xi-api-key': API_KEY - }; - const response = await fetch(`https://api.elevenlabs.io/v1/voices/settings/default`, { - headers: headers - }); - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${await response.json()}`); - } - return response.json(); -} - -async function fetchTtsGeneration(text, voiceId) { - console.info(`Generating new TTS for voice_id ${voiceId}`); - const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, { - method: 'POST', - headers: { - 'xi-api-key': API_KEY, - 'Content-Type': 'application/json' - }, - body: JSON.stringify({ text: text }) - }); - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${await response.json()}`); - } - return response; -} - -async function fetchTtsFromHistory(history_item_id) { - console.info(`Fetched existing TTS with history_item_id ${history_item_id}`); - const response = await fetch(`https://api.elevenlabs.io/v1/history/${history_item_id}/audio`, { - headers: { - 'xi-api-key': API_KEY - } - }); - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${await response.json()}`); - } - return response; -} - -async function fetchTtsHistory() { - const headers = { - 'xi-api-key': API_KEY - }; - const response = await fetch(`https://api.elevenlabs.io/v1/history`, { - headers: headers - }); - if (!response.ok) { - throw new Error(`HTTP ${response.status}: ${await response.json()}`); - } - const responseJson = await response.json(); - return responseJson.history; -} - - -async function findTtsGenerationInHistory(message, voiceId) { - const ttsHistory = await fetchTtsHistory(); - for (const history of ttsHistory) { - const text = history.text; - const itemId = history.history_item_id; - if (message === text && history.voice_id == voiceId) { - console.info(`Existing TTS history item ${itemId} found: ${text} `) - return itemId; - } - } - return '' + console.debug( + `Adding message from ${message.name} for TTS processing: "${message.mes}"` + ) + ttsJobQueue.push(message) } //##################// @@ -170,14 +90,13 @@ let queueProcessorReady = true let lastAudioPosition = 0 - async function playAudioData(audioBlob) { - const reader = new FileReader(); + const reader = new FileReader() reader.onload = function (e) { - const srcUrl = e.target.result; - audioElement.src = srcUrl; - }; - reader.readAsDataURL(audioBlob); + const srcUrl = e.target.result + audioElement.src = srcUrl + } + reader.readAsDataURL(audioBlob) audioElement.addEventListener('ended', completeCurrentAudioJob) audioElement.addEventListener('canplay', () => { console.debug(`Starting TTS playback`) @@ -185,28 +104,26 @@ async function playAudioData(audioBlob) { }) } -window['elevenlabsPreview'] = function(id) { - const audio = document.getElementById(id); - audio.play(); +window['elevenlabsPreview'] = function (id) { + const audio = document.getElementById(id) + audio.play() } async function onElevenlabsVoicesClick() { - let popupText = ''; + let popupText = '' try { - const voiceIds = await fetchTtsVoiceIds(); + const voiceIds = await ttsProvider.fetchTtsVoiceIds() for (const voice of voiceIds) { - popupText += `
${voice.name}
`; - popupText += ``; + popupText += `
${voice.name}
` + popupText += `` } - } - catch { + } catch { popupText = 'Could not load voices list. Check your API key.' } - - callPopup(popupText, 'text'); + callPopup(popupText, 'text') } function completeCurrentAudioJob() { @@ -217,21 +134,21 @@ function completeCurrentAudioJob() { /** * Accepts an HTTP response containing audio/mpeg data, and puts the data as a Blob() on the queue for playback - * @param {*} response + * @param {*} response */ async function addAudioJob(response) { const audioData = await response.blob() - if (audioData.type != "audio/mpeg") { + if (audioData.type != 'audio/mpeg') { throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` } audioJobQueue.push(audioData) - console.debug("Pushed audio job to queue.") + console.debug('Pushed audio job to queue.') } async function processAudioJobQueue() { // Nothing to do, audio not completed, or audio paused - stop processing. if (audioJobQueue.length == 0 || !queueProcessorReady || audioPaused) { - return; + return } try { queueProcessorReady = false @@ -243,7 +160,6 @@ async function processAudioJobQueue() { } } - //################// // TTS Control // //################// @@ -259,22 +175,24 @@ function completeTtsJob() { function saveLastValues() { const context = getContext() - lastGroupId = context.groupId; - lastCharacterId = context.characterId; - lastChatId = context.chatId; - lastMessageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? ''); + lastGroupId = context.groupId + lastCharacterId = context.characterId + lastChatId = context.chatId + lastMessageHash = getStringHash( + (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '' + ) } async function tts(text, voiceId) { - const historyId = await findTtsGenerationInHistory(text, voiceId); + const historyId = await ttsProvider.findTtsGenerationInHistory(text, voiceId) - let response; + let response if (historyId) { console.debug(`Found existing TTS generation with id ${historyId}`) - response = await fetchTtsFromHistory(historyId); + response = await ttsProvider.fetchTtsFromHistory(historyId) } else { console.debug(`No existing TTS generation found, requesting new generation`) - response = await fetchTtsGeneration(text, voiceId); + response = await ttsProvider.fetchTtsGeneration(text, voiceId) } addAudioJob(response) completeTtsJob() @@ -283,12 +201,12 @@ async function tts(text, voiceId) { async function processTtsQueue() { // Called each moduleWorker iteration to pull chat messages from queue if (currentTtsJob || ttsJobQueue.length <= 0 || audioPaused) { - return; + return } - console.debug("New message found, running TTS") + console.debug('New message found, running TTS') currentTtsJob = ttsJobQueue.shift() - const text = currentTtsJob.mes.replaceAll('*', '...'); + const text = currentTtsJob.mes.replaceAll('*', '...') const char = currentTtsJob.name try { @@ -298,20 +216,19 @@ async function processTtsQueue() { const voice = await getTtsVoice(voiceMap[char]) const voiceId = voice.voice_id if (voiceId == null) { - throw (`Unable to attain voiceId for ${char}`) + throw `Unable to attain voiceId for ${char}` } tts(text, voiceId) } catch (error) { console.error(error) currentTtsJob = null } - } // Secret function for now async function playFullConversation() { const context = getContext() - const chat = context.chat; + const chat = context.chat ttsJobQueue = chat } window.playFullConversation = playFullConversation @@ -323,52 +240,60 @@ window.playFullConversation = playFullConversation function loadSettings() { const context = getContext() if (Object.keys(extension_settings.elevenlabstts).length === 0) { - Object.assign(extension_settings.elevenlabstts, defaultSettings); + Object.assign(extension_settings.elevenlabstts, defaultSettings) } - $('#elevenlabs_api_key').val(extension_settings.elevenlabstts.elevenlabsApiKey); - $('#elevenlabs_voice_map').val(extension_settings.elevenlabstts.elevenlabsVoiceMap); - $('#elevenlabs_enabled').prop('checked', extension_settings.elevenlabstts.enabled); + $('#elevenlabs_api_key').val( + extension_settings.elevenlabstts.elevenlabsApiKey + ) + $('#elevenlabs_voice_map').val( + extension_settings.elevenlabstts.elevenlabsVoiceMap + ) + $('#elevenlabs_enabled').prop( + 'checked', + extension_settings.elevenlabstts.enabled + ) onElevenlabsApplyClick() } const defaultSettings = { - elevenlabsApiKey: "", - elevenlabsVoiceMap: "", + elevenlabsApiKey: '', + elevenlabsVoiceMap: '', elevenlabsEnabed: false -}; - +} function setElevenLabsStatus(status, success) { $('#elevenlabs_status').text(status) if (success) { - $("#elevenlabs_status").removeAttr("style"); + $('#elevenlabs_status').removeAttr('style') } else { - $('#elevenlabs_status').css('color', 'red'); + $('#elevenlabs_status').css('color', 'red') } } async function updateApiKey() { - const context = getContext(); - const value = $('#elevenlabs_api_key').val(); + const context = getContext() + const value = $('#elevenlabs_api_key').val() // Using this call to validate API key - API_KEY = String(value) - await fetchTtsVoiceIds().catch((error => { - API_KEY = null + ttsProvider.API_KEY = String(value) + await ttsProvider.fetchTtsVoiceIds().catch(error => { + ttsProvider.API_KEY = null throw `ElevenLabs TTS API key invalid` - })) + }) - extension_settings.elevenlabstts.elevenlabsApiKey = String(value); - console.debug(`Saved new API_KEY: ${value}`); - saveSettingsDebounced(); + extension_settings.elevenlabstts.elevenlabsApiKey = String(value) + console.debug(`Saved new API_KEY: ${value}`) + saveSettingsDebounced() } function parseVoiceMap(voiceMapString) { let parsedVoiceMap = {} - for (const [charName, voiceId] of voiceMapString.split(",").map(s => s.split(":"))) { + for (const [charName, voiceId] of voiceMapString + .split(',') + .map(s => s.split(':'))) { if (charName && voiceId) { - parsedVoiceMap[charName.trim()] = voiceId.trim(); + parsedVoiceMap[charName.trim()] = voiceId.trim() } } return parsedVoiceMap @@ -377,24 +302,26 @@ function parseVoiceMap(voiceMapString) { async function getTtsVoice(name) { // We're caching the list of voice_ids. This might cause trouble if the user creates a new voice without restarting if (elevenlabsTtsVoices.length == 0) { - elevenlabsTtsVoices = await fetchTtsVoiceIds(); + elevenlabsTtsVoices = await ttsProvider.fetchTtsVoiceIds() } - const match = elevenlabsTtsVoices.filter((elevenVoice) => elevenVoice.name == name)[0]; + const match = elevenlabsTtsVoices.filter( + elevenVoice => elevenVoice.name == name + )[0] if (!match) { - throw `TTS Voice name ${name} not found in ElevenLabs account`; + throw `TTS Voice name ${name} not found in ElevenLabs account` } - return match; + return match } async function voicemapIsValid(parsedVoiceMap) { let valid = true for (const characterName in parsedVoiceMap) { - const parsedVoiceName = parsedVoiceMap[characterName]; + const parsedVoiceName = parsedVoiceMap[characterName] try { - await getTtsVoice(parsedVoiceName); + await getTtsVoice(parsedVoiceName) } catch (error) { console.error(error) - valid = false; + valid = false } } return valid @@ -402,19 +329,19 @@ async function voicemapIsValid(parsedVoiceMap) { async function updateVoiceMap() { let isValidResult = false - const context = getContext(); + const context = getContext() // console.debug("onElevenlabsVoiceMapSubmit"); - const value = $('#elevenlabs_voice_map').val(); - const parsedVoiceMap = parseVoiceMap(value); - isValidResult = await voicemapIsValid(parsedVoiceMap); + const value = $('#elevenlabs_voice_map').val() + const parsedVoiceMap = parseVoiceMap(value) + isValidResult = await voicemapIsValid(parsedVoiceMap) if (isValidResult) { - extension_settings.elevenlabstts.elevenlabsVoiceMap = String(value); + extension_settings.elevenlabstts.elevenlabsVoiceMap = String(value) context.elevenlabsVoiceMap = String(value) voiceMap = parsedVoiceMap console.debug(`Saved new voiceMap: ${value}`) - saveSettingsDebounced(); + saveSettingsDebounced() } else { - throw "Voice map is invalid, check console for errors" + throw 'Voice map is invalid, check console for errors' } } @@ -422,23 +349,27 @@ function onElevenlabsApplyClick() { Promise.all([updateApiKey(), updateVoiceMap()]) .then(([result1, result2]) => { updateUiAudioPlayState() - setElevenLabsStatus("Successfully applied settings", true) + setElevenLabsStatus('Successfully applied settings', true) }) - .catch((error) => { + .catch(error => { setElevenLabsStatus(error, false) - }); + }) } function onElevenlabsEnableClick() { - extension_settings.elevenlabstts.enabled = $("#elevenlabs_enabled").is(':checked'); + extension_settings.elevenlabstts.enabled = $('#elevenlabs_enabled').is( + ':checked' + ) updateUiAudioPlayState() - saveSettingsDebounced(); + saveSettingsDebounced() } function updateUiAudioPlayState() { if (extension_settings.elevenlabstts.enabled == true) { audioControl.style.display = 'flex' - const img = !audioElement.paused ? "fa-solid fa-circle-pause" : "fa-solid fa-circle-play" + const img = !audioElement.paused + ? 'fa-solid fa-circle-pause' + : 'fa-solid fa-circle-play' audioControl.className = img } else { audioControl.style.display = 'none' @@ -453,8 +384,8 @@ function onAudioControlClicked() { function addAudioControl() { $('#send_but_sheld').prepend('
') $('#send_but_sheld').on('click', onAudioControlClicked) - audioControl = document.getElementById('tts_media_control'); - updateUiAudioPlayState(); + audioControl = document.getElementById('tts_media_control') + updateUiAudioPlayState() } $(document).ready(function () { @@ -467,6 +398,8 @@ $(document).ready(function () {
+ + @@ -487,14 +420,14 @@ $(document).ready(function () {
- `; - $('#extensions_settings').append(settingsHtml); - $('#elevenlabs_apply').on('click', onElevenlabsApplyClick); - $('#elevenlabs_enabled').on('click', onElevenlabsEnableClick); - $('#elevenlabs_voices').on('click', onElevenlabsVoicesClick); + ` + $('#extensions_settings').append(settingsHtml) + $('#elevenlabs_apply').on('click', onElevenlabsApplyClick) + $('#elevenlabs_enabled').on('click', onElevenlabsEnableClick) + $('#elevenlabs_voices').on('click', onElevenlabsVoicesClick) } - addAudioControl(); - addExtensionControls(); - loadSettings(); - setInterval(moduleWorker, UPDATE_INTERVAL); -}); \ No newline at end of file + addAudioControl() + addExtensionControls() + loadSettings() + setInterval(moduleWorker, UPDATE_INTERVAL) +})