import { getStringHash, debounce } from "../../utils.js"; import { chat_metadata, saveSettingsDebounced } from "../../../script.js"; import { extension_settings, getContext } from "../../extensions.js"; export { MODULE_NAME }; const saveChatDebounced = debounce(async () => await getContext().saveChat(), 1000); const MODULE_NAME = '3_elevenlabs_tts'; // <= Deliberate, for sorting lower than memory const UPDATE_INTERVAL = 1000; let API_KEY let ttsJobQueue = [] let currentMessageNumber = 0 let voiceMap = {} // {charName:voiceid, charName2:voiceid2} let currentTtsJob let elevenlabsTtsVoices = [] //############// // TTS Code // //############// function completeTtsJob(){ console.info(`Current TTS job for ${currentTtsJob.name} completed.`) currentTtsJob = null } async function playAudioFromResponse(response) { const audioContext = new AudioContext(); const audioBlob = await response.blob() if (audioBlob.type != "audio/mpeg"){ throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioBlob.type}` } const buffer = await audioContext.decodeAudioData(await audioBlob.arrayBuffer()) // assuming the audio data is in the 'data' property of the response const source = new AudioBufferSourceNode(audioContext); source.onended = completeTtsJob source.buffer = buffer; source.connect(audioContext.destination); console.debug(`Starting TTS playback`) source.start(0); } async function fetchTtsVoiceIds() { const headers = { 'xi-api-key': API_KEY }; const response = await fetch(`https://api.elevenlabs.io/v1/voices`, { headers: headers }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${await response.json()}`); } const responseJson = await response.json(); return responseJson.voices; } async function fetchTtsVoiceSettings() { const headers = { 'xi-api-key': API_KEY }; const response = await fetch(`https://api.elevenlabs.io/v1/voices/settings/default`, { headers: headers }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${await response.json()}`); } return response.json(); } async function fetchTtsGeneration(text, voiceId) { console.info(`Generating new TTS for voice_id ${voiceId}`); const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, { method: 'POST', headers: { 'xi-api-key': API_KEY, 'Content-Type': 'application/json' }, body: JSON.stringify({ text: text }) }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${await response.json()}`); } return response; } async function fetchTtsFromHistory(history_item_id) { console.info(`Fetched existing TTS with history_item_id ${history_item_id}`); const response = await fetch(`https://api.elevenlabs.io/v1/history/${history_item_id}/audio`, { headers: { 'xi-api-key': API_KEY } }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${await response.json()}`); } return response; } async function fetchTtsHistory() { const headers = { 'xi-api-key': API_KEY }; const response = await fetch(`https://api.elevenlabs.io/v1/history`, { headers: headers }); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${await response.json()}`); } const responseJson = await response.json(); return responseJson.history; } async function findTtsGenerationInHistory(message, voiceId) { const ttsHistory = await fetchTtsHistory(); for (const history of ttsHistory) { const text = history.text; const itemId = history.history_item_id; if (message === text) { console.info(`Existing TTS history item ${itemId} found: ${text} `) return itemId; } } return '' } /** * Plays text as ElevenLabs TTS using voiceId voice. Will check history for * previously generated speech just in case. * * @param {*} voiceId * @param {*} text * @returns */ async function tts(text, voiceId) { const historyId = await findTtsGenerationInHistory(text, voiceId); let response; if (historyId) { console.debug(`Found existing TTS generation with id ${historyId}`) response = await fetchTtsFromHistory(historyId); } else { console.debug(`No existing TTS generation found, requesting new generation`) response = await fetchTtsGeneration(text, voiceId); } await playAudioFromResponse(response) } async function processTtsQueue() { if (currentTtsJob || ttsJobQueue.length <= 0) { return; } console.debug("New message found, running TTS") currentTtsJob = ttsJobQueue.shift() const text = currentTtsJob.mes.replaceAll('*','...'); const char = currentTtsJob.name try { if (!voiceMap[char]) { throw `${char} not in voicemap. Configure character in extension settings voice map` } const voice = await getTtsVoice(voiceMap[char]) const voiceId = voice.voice_id if (voiceId == null){ throw (`Unable to attain voiceId for ${char}`) } tts(text, voiceId) } catch (error) { console.error(error) currentTtsJob = null } } async function playFullConversation() { const context = getContext() const chat = context.chat; ttsJobQueue = chat } window.playFullConversation = playFullConversation //##################// // Extension Code // //##################// const defaultSettings = { elevenlabsApiKey: "", elevenlabsVoiceMap: "", elevenlabsEnabed: false }; function setElevenLabsStatus(status, success) { $('#elevenlabs_status').text(status) if (success) { $("#elevenlabs_status").removeAttr("style"); } else { $('#elevenlabs_status').css('color', 'red'); } } async function updateApiKey() { //TODO: Add validation for API key const context = getContext(); const value = $('#elevenlabs_api_key').val(); // Using this call to validate API key API_KEY = String(value) await fetchTtsVoiceIds().catch((error => { API_KEY = null throw `ElevenLabs TTS API key invalid` })) extension_settings.elevenlabstts.elevenlabsApiKey = String(value); console.debug(`Saved new API_KEY: ${value}`); saveSettingsDebounced(); } function parseVoiceMap(voiceMapString) { let parsedVoiceMap = {} for (const [charName, voiceId] of voiceMapString.split(",").map(s => s.split(":"))) { if (charName && voiceId) { parsedVoiceMap[charName.trim()] = voiceId.trim(); } } return parsedVoiceMap } async function getTtsVoice(name){ // We're caching the list of voice_ids. This might cause trouble. if (elevenlabsTtsVoices.length == 0) { elevenlabsTtsVoices = await fetchTtsVoiceIds(); } const match = elevenlabsTtsVoices.filter((elevenVoice) => elevenVoice.name == name)[0] ; if (!match) { throw `TTS Voice name ${name} not found in ElevenLabs account`; } return match; } async function voicemapIsValid(parsedVoiceMap) { let valid = true for (const characterName in parsedVoiceMap) { const parsedVoiceName = parsedVoiceMap[characterName]; try{ await getTtsVoice(parsedVoiceName); } catch(error) { console.error(error) valid = false; } } return valid } async function updateVoiceMap() { let isValidResult = false const context = getContext(); // console.debug("onElevenlabsVoiceMapSubmit"); const value = $('#elevenlabs_voice_map').val(); const parsedVoiceMap = parseVoiceMap(value); isValidResult = await voicemapIsValid(parsedVoiceMap); if (isValidResult) { extension_settings.elevenlabstts.elevenlabsVoiceMap = String(value); context.elevenlabsVoiceMap = String(value) voiceMap = parsedVoiceMap console.debug(`Saved new voiceMap: ${value}`) saveSettingsDebounced(); } else { throw "Voice map is invalid, check console for errors" } } function onElevenlabsConnectClick() { Promise.all([updateApiKey(), updateVoiceMap()]) .then(([result1, result2]) => { setElevenLabsStatus("Successfully applied settings", true) }) .catch((error) => { setElevenLabsStatus(error, false) }); } function onElevenlabsEnableClick() { extension_settings.elevenlabstts.enabled = $("#elevenlabs_enabled").is(':checked'); saveSettingsDebounced(); } function loadSettings() { if (Object.keys(extension_settings.elevenlabstts).length === 0) { Object.assign(extension_settings.elevenlabstts, defaultSettings); } $('#elevenlabs_api_key').val(extension_settings.elevenlabstts.elevenlabsApiKey); $('#elevenlabs_voice_map').val(extension_settings.elevenlabstts.elevenlabsVoiceMap); $('#elevenlabs_enabled').prop('checked', extension_settings.elevenlabstts.enabled); onElevenlabsConnectClick() } async function moduleWorker() { const enabled = $("#elevenlabs_enabled").is(':checked'); if (!enabled){ return; } const context = getContext() const chat = context.chat; processTtsQueue(); // no characters or group selected if (!context.groupId && !context.characterId) { return; } // take the count of messages let lastMessageNumber = Array.isArray(context.chat) && context.chat.length ? context.chat.length : 0; // special case for new chat if (Array.isArray(context.chat) && context.chat.length === 1) { lastMessageNumber = 1; } // There's no new messages let diff = lastMessageNumber - currentMessageNumber; if (diff == 0) { return; } // New messages add to history currentMessageNumber = lastMessageNumber; const message = chat[chat.length - 1] console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`); ttsJobQueue.push(message); } $(document).ready(function () { function addExtensionControls() { const settingsHtml = `

ElevenLabs TTS


`; $('#extensions_settings').append(settingsHtml); $('#elevenlabs_connect').on('click', onElevenlabsConnectClick); $('#elevenlabs_enabled').on('click', onElevenlabsEnableClick); } addExtensionControls(); loadSettings(); setInterval(moduleWorker, UPDATE_INTERVAL); });