diff --git a/public/scripts/extensions/elevenlabstts/elevenlabs.js b/public/scripts/extensions/elevenlabstts/elevenlabs.js
new file mode 100644
index 000000000..e033de663
--- /dev/null
+++ b/public/scripts/extensions/elevenlabstts/elevenlabs.js
@@ -0,0 +1,102 @@
+export { ElevenLabsTtsProvider }
+
+class ElevenLabsTtsProvider {
+ API_KEY
+ set API_KEY(apiKey) {
+ this.API_KEY = apiKey
+ }
+ get API_KEY() {
+ return this.API_KEY
+ }
+ async fetchTtsVoiceIds() {
+ const headers = {
+ 'xi-api-key': this.API_KEY
+ }
+ const response = await fetch(`https://api.elevenlabs.io/v1/voices`, {
+ headers: headers
+ })
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${await response.json()}`)
+ }
+ const responseJson = await response.json()
+ return responseJson.voices
+ }
+
+ async fetchTtsVoiceSettings() {
+ const headers = {
+ 'xi-api-key': this.API_KEY
+ }
+ const response = await fetch(
+ `https://api.elevenlabs.io/v1/voices/settings/default`,
+ {
+ headers: headers
+ }
+ )
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${await response.json()}`)
+ }
+ return response.json()
+ }
+
+ async fetchTtsGeneration(text, voiceId) {
+ console.info(`Generating new TTS for voice_id ${voiceId}`)
+ const response = await fetch(
+ `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
+ {
+ method: 'POST',
+ headers: {
+ 'xi-api-key': this.API_KEY,
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify({ text: text })
+ }
+ )
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${await response.json()}`)
+ }
+ return response
+ }
+
+ async fetchTtsFromHistory(history_item_id) {
+ console.info(`Fetched existing TTS with history_item_id ${history_item_id}`)
+ const response = await fetch(
+ `https://api.elevenlabs.io/v1/history/${history_item_id}/audio`,
+ {
+ headers: {
+ 'xi-api-key': this.API_KEY
+ }
+ }
+ )
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${await response.json()}`)
+ }
+ return response
+ }
+
+ async fetchTtsHistory() {
+ const headers = {
+ 'xi-api-key': this.API_KEY
+ }
+ const response = await fetch(`https://api.elevenlabs.io/v1/history`, {
+ headers: headers
+ })
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}: ${await response.json()}`)
+ }
+ const responseJson = await response.json()
+ return responseJson.history
+ }
+
+ async findTtsGenerationInHistory(message, voiceId) {
+ const ttsHistory = await this.fetchTtsHistory()
+ for (const history of ttsHistory) {
+ const text = history.text
+ const itemId = history.history_item_id
+ if (message === text && history.voice_id == voiceId) {
+ console.info(`Existing TTS history item ${itemId} found: ${text} `)
+ return itemId
+ }
+ }
+ return ''
+ }
+}
diff --git a/public/scripts/extensions/elevenlabstts/index.js b/public/scripts/extensions/elevenlabstts/index.js
index 9643142dc..03a390c09 100644
--- a/public/scripts/extensions/elevenlabstts/index.js
+++ b/public/scripts/extensions/elevenlabstts/index.js
@@ -1,160 +1,80 @@
-import { callPopup, saveSettingsDebounced } from "../../../script.js";
-import { extension_settings, getContext } from "../../extensions.js";
-import { getStringHash } from "../../utils.js";
+import { callPopup, saveSettingsDebounced } from '../../../script.js'
+import { extension_settings, getContext } from '../../extensions.js'
+import { getStringHash } from '../../utils.js'
+import { ElevenLabsTtsProvider } from './elevenlabs.js'
-const UPDATE_INTERVAL = 1000;
-let API_KEY
+const UPDATE_INTERVAL = 1000
let voiceMap = {} // {charName:voiceid, charName2:voiceid2}
let elevenlabsTtsVoices = []
let audioControl
+let lastCharacterId = null
+let lastGroupId = null
+let lastChatId = null
+let lastMessageHash = null
-let lastCharacterId = null;
-let lastGroupId = null;
-let lastChatId = null;
-let lastMessageHash = null;
-
+let ttsProvider = new ElevenLabsTtsProvider()
async function moduleWorker() {
// Primarily determinign when to add new chat to the TTS queue
- const enabled = $("#elevenlabs_enabled").is(':checked');
+ const enabled = $('#elevenlabs_enabled').is(':checked')
if (!enabled) {
- return;
+ return
}
const context = getContext()
- const chat = context.chat;
+ const chat = context.chat
- processTtsQueue();
- processAudioJobQueue();
- updateUiAudioPlayState();
+ processTtsQueue()
+ processAudioJobQueue()
+ updateUiAudioPlayState()
- // no characters or group selected
+ // no characters or group selected
if (!context.groupId && !context.characterId) {
- return;
+ return
}
// Chat/character/group changed
- if ((context.groupId && lastGroupId !== context.groupId) || (context.characterId !== lastCharacterId) || (context.chatId !== lastChatId)) {
+ if (
+ (context.groupId && lastGroupId !== context.groupId) ||
+ context.characterId !== lastCharacterId ||
+ context.chatId !== lastChatId
+ ) {
currentMessageNumber = context.chat.length ? context.chat.length : 0
- saveLastValues();
- return;
+ saveLastValues()
+ return
}
// take the count of messages
- let lastMessageNumber = context.chat.length ? context.chat.length : 0;
+ let lastMessageNumber = context.chat.length ? context.chat.length : 0
// There's no new messages
- let diff = lastMessageNumber - currentMessageNumber;
- let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '');
+ let diff = lastMessageNumber - currentMessageNumber
+ let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '')
if (diff == 0 && hashNew === lastMessageHash) {
- return;
+ return
}
- const message = chat[chat.length - 1];
+ const message = chat[chat.length - 1]
// We're currently swiping or streaming. Don't generate voice
- if (message.mes === '...' || (context.streamingProcessor && !context.streamingProcessor.isFinished)) {
- return;
+ if (
+ message.mes === '...' ||
+ (context.streamingProcessor && !context.streamingProcessor.isFinished)
+ ) {
+ return
}
// New messages, add new chat to history
- lastMessageHash = hashNew;
- currentMessageNumber = lastMessageNumber;
+ lastMessageHash = hashNew
+ currentMessageNumber = lastMessageNumber
- console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`);
- ttsJobQueue.push(message);
-}
-
-
-//#################//
-// TTS API Calls //
-//#################//
-
-async function fetchTtsVoiceIds() {
- const headers = {
- 'xi-api-key': API_KEY
- };
- const response = await fetch(`https://api.elevenlabs.io/v1/voices`, {
- headers: headers
- });
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${await response.json()}`);
- }
- const responseJson = await response.json();
- return responseJson.voices;
-}
-
-async function fetchTtsVoiceSettings() {
- const headers = {
- 'xi-api-key': API_KEY
- };
- const response = await fetch(`https://api.elevenlabs.io/v1/voices/settings/default`, {
- headers: headers
- });
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${await response.json()}`);
- }
- return response.json();
-}
-
-async function fetchTtsGeneration(text, voiceId) {
- console.info(`Generating new TTS for voice_id ${voiceId}`);
- const response = await fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`, {
- method: 'POST',
- headers: {
- 'xi-api-key': API_KEY,
- 'Content-Type': 'application/json'
- },
- body: JSON.stringify({ text: text })
- });
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${await response.json()}`);
- }
- return response;
-}
-
-async function fetchTtsFromHistory(history_item_id) {
- console.info(`Fetched existing TTS with history_item_id ${history_item_id}`);
- const response = await fetch(`https://api.elevenlabs.io/v1/history/${history_item_id}/audio`, {
- headers: {
- 'xi-api-key': API_KEY
- }
- });
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${await response.json()}`);
- }
- return response;
-}
-
-async function fetchTtsHistory() {
- const headers = {
- 'xi-api-key': API_KEY
- };
- const response = await fetch(`https://api.elevenlabs.io/v1/history`, {
- headers: headers
- });
- if (!response.ok) {
- throw new Error(`HTTP ${response.status}: ${await response.json()}`);
- }
- const responseJson = await response.json();
- return responseJson.history;
-}
-
-
-async function findTtsGenerationInHistory(message, voiceId) {
- const ttsHistory = await fetchTtsHistory();
- for (const history of ttsHistory) {
- const text = history.text;
- const itemId = history.history_item_id;
- if (message === text && history.voice_id == voiceId) {
- console.info(`Existing TTS history item ${itemId} found: ${text} `)
- return itemId;
- }
- }
- return ''
+ console.debug(
+ `Adding message from ${message.name} for TTS processing: "${message.mes}"`
+ )
+ ttsJobQueue.push(message)
}
//##################//
@@ -170,14 +90,13 @@ let queueProcessorReady = true
let lastAudioPosition = 0
-
async function playAudioData(audioBlob) {
- const reader = new FileReader();
+ const reader = new FileReader()
reader.onload = function (e) {
- const srcUrl = e.target.result;
- audioElement.src = srcUrl;
- };
- reader.readAsDataURL(audioBlob);
+ const srcUrl = e.target.result
+ audioElement.src = srcUrl
+ }
+ reader.readAsDataURL(audioBlob)
audioElement.addEventListener('ended', completeCurrentAudioJob)
audioElement.addEventListener('canplay', () => {
console.debug(`Starting TTS playback`)
@@ -185,28 +104,26 @@ async function playAudioData(audioBlob) {
})
}
-window['elevenlabsPreview'] = function(id) {
- const audio = document.getElementById(id);
- audio.play();
+window['elevenlabsPreview'] = function (id) {
+ const audio = document.getElementById(id)
+ audio.play()
}
async function onElevenlabsVoicesClick() {
- let popupText = '';
+ let popupText = ''
try {
- const voiceIds = await fetchTtsVoiceIds();
+ const voiceIds = await ttsProvider.fetchTtsVoiceIds()
for (const voice of voiceIds) {
- popupText += `
${voice.name}
`;
- popupText += ``;
+ popupText += `${voice.name}
`
+ popupText += ``
}
- }
- catch {
+ } catch {
popupText = 'Could not load voices list. Check your API key.'
}
-
- callPopup(popupText, 'text');
+ callPopup(popupText, 'text')
}
function completeCurrentAudioJob() {
@@ -217,21 +134,21 @@ function completeCurrentAudioJob() {
/**
* Accepts an HTTP response containing audio/mpeg data, and puts the data as a Blob() on the queue for playback
- * @param {*} response
+ * @param {*} response
*/
async function addAudioJob(response) {
const audioData = await response.blob()
- if (audioData.type != "audio/mpeg") {
+ if (audioData.type != 'audio/mpeg') {
throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}`
}
audioJobQueue.push(audioData)
- console.debug("Pushed audio job to queue.")
+ console.debug('Pushed audio job to queue.')
}
async function processAudioJobQueue() {
// Nothing to do, audio not completed, or audio paused - stop processing.
if (audioJobQueue.length == 0 || !queueProcessorReady || audioPaused) {
- return;
+ return
}
try {
queueProcessorReady = false
@@ -243,7 +160,6 @@ async function processAudioJobQueue() {
}
}
-
//################//
// TTS Control //
//################//
@@ -259,22 +175,24 @@ function completeTtsJob() {
function saveLastValues() {
const context = getContext()
- lastGroupId = context.groupId;
- lastCharacterId = context.characterId;
- lastChatId = context.chatId;
- lastMessageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
+ lastGroupId = context.groupId
+ lastCharacterId = context.characterId
+ lastChatId = context.chatId
+ lastMessageHash = getStringHash(
+ (context.chat.length && context.chat[context.chat.length - 1].mes) ?? ''
+ )
}
async function tts(text, voiceId) {
- const historyId = await findTtsGenerationInHistory(text, voiceId);
+ const historyId = await ttsProvider.findTtsGenerationInHistory(text, voiceId)
- let response;
+ let response
if (historyId) {
console.debug(`Found existing TTS generation with id ${historyId}`)
- response = await fetchTtsFromHistory(historyId);
+ response = await ttsProvider.fetchTtsFromHistory(historyId)
} else {
console.debug(`No existing TTS generation found, requesting new generation`)
- response = await fetchTtsGeneration(text, voiceId);
+ response = await ttsProvider.fetchTtsGeneration(text, voiceId)
}
addAudioJob(response)
completeTtsJob()
@@ -283,12 +201,12 @@ async function tts(text, voiceId) {
async function processTtsQueue() {
// Called each moduleWorker iteration to pull chat messages from queue
if (currentTtsJob || ttsJobQueue.length <= 0 || audioPaused) {
- return;
+ return
}
- console.debug("New message found, running TTS")
+ console.debug('New message found, running TTS')
currentTtsJob = ttsJobQueue.shift()
- const text = currentTtsJob.mes.replaceAll('*', '...');
+ const text = currentTtsJob.mes.replaceAll('*', '...')
const char = currentTtsJob.name
try {
@@ -298,20 +216,19 @@ async function processTtsQueue() {
const voice = await getTtsVoice(voiceMap[char])
const voiceId = voice.voice_id
if (voiceId == null) {
- throw (`Unable to attain voiceId for ${char}`)
+ throw `Unable to attain voiceId for ${char}`
}
tts(text, voiceId)
} catch (error) {
console.error(error)
currentTtsJob = null
}
-
}
// Secret function for now
async function playFullConversation() {
const context = getContext()
- const chat = context.chat;
+ const chat = context.chat
ttsJobQueue = chat
}
window.playFullConversation = playFullConversation
@@ -323,52 +240,60 @@ window.playFullConversation = playFullConversation
function loadSettings() {
const context = getContext()
if (Object.keys(extension_settings.elevenlabstts).length === 0) {
- Object.assign(extension_settings.elevenlabstts, defaultSettings);
+ Object.assign(extension_settings.elevenlabstts, defaultSettings)
}
- $('#elevenlabs_api_key').val(extension_settings.elevenlabstts.elevenlabsApiKey);
- $('#elevenlabs_voice_map').val(extension_settings.elevenlabstts.elevenlabsVoiceMap);
- $('#elevenlabs_enabled').prop('checked', extension_settings.elevenlabstts.enabled);
+ $('#elevenlabs_api_key').val(
+ extension_settings.elevenlabstts.elevenlabsApiKey
+ )
+ $('#elevenlabs_voice_map').val(
+ extension_settings.elevenlabstts.elevenlabsVoiceMap
+ )
+ $('#elevenlabs_enabled').prop(
+ 'checked',
+ extension_settings.elevenlabstts.enabled
+ )
onElevenlabsApplyClick()
}
const defaultSettings = {
- elevenlabsApiKey: "",
- elevenlabsVoiceMap: "",
+ elevenlabsApiKey: '',
+ elevenlabsVoiceMap: '',
elevenlabsEnabed: false
-};
-
+}
function setElevenLabsStatus(status, success) {
$('#elevenlabs_status').text(status)
if (success) {
- $("#elevenlabs_status").removeAttr("style");
+ $('#elevenlabs_status').removeAttr('style')
} else {
- $('#elevenlabs_status').css('color', 'red');
+ $('#elevenlabs_status').css('color', 'red')
}
}
async function updateApiKey() {
- const context = getContext();
- const value = $('#elevenlabs_api_key').val();
+ const context = getContext()
+ const value = $('#elevenlabs_api_key').val()
// Using this call to validate API key
- API_KEY = String(value)
- await fetchTtsVoiceIds().catch((error => {
- API_KEY = null
+ ttsProvider.API_KEY = String(value)
+ await ttsProvider.fetchTtsVoiceIds().catch(error => {
+ ttsProvider.API_KEY = null
throw `ElevenLabs TTS API key invalid`
- }))
+ })
- extension_settings.elevenlabstts.elevenlabsApiKey = String(value);
- console.debug(`Saved new API_KEY: ${value}`);
- saveSettingsDebounced();
+ extension_settings.elevenlabstts.elevenlabsApiKey = String(value)
+ console.debug(`Saved new API_KEY: ${value}`)
+ saveSettingsDebounced()
}
function parseVoiceMap(voiceMapString) {
let parsedVoiceMap = {}
- for (const [charName, voiceId] of voiceMapString.split(",").map(s => s.split(":"))) {
+ for (const [charName, voiceId] of voiceMapString
+ .split(',')
+ .map(s => s.split(':'))) {
if (charName && voiceId) {
- parsedVoiceMap[charName.trim()] = voiceId.trim();
+ parsedVoiceMap[charName.trim()] = voiceId.trim()
}
}
return parsedVoiceMap
@@ -377,24 +302,26 @@ function parseVoiceMap(voiceMapString) {
async function getTtsVoice(name) {
// We're caching the list of voice_ids. This might cause trouble if the user creates a new voice without restarting
if (elevenlabsTtsVoices.length == 0) {
- elevenlabsTtsVoices = await fetchTtsVoiceIds();
+ elevenlabsTtsVoices = await ttsProvider.fetchTtsVoiceIds()
}
- const match = elevenlabsTtsVoices.filter((elevenVoice) => elevenVoice.name == name)[0];
+ const match = elevenlabsTtsVoices.filter(
+ elevenVoice => elevenVoice.name == name
+ )[0]
if (!match) {
- throw `TTS Voice name ${name} not found in ElevenLabs account`;
+ throw `TTS Voice name ${name} not found in ElevenLabs account`
}
- return match;
+ return match
}
async function voicemapIsValid(parsedVoiceMap) {
let valid = true
for (const characterName in parsedVoiceMap) {
- const parsedVoiceName = parsedVoiceMap[characterName];
+ const parsedVoiceName = parsedVoiceMap[characterName]
try {
- await getTtsVoice(parsedVoiceName);
+ await getTtsVoice(parsedVoiceName)
} catch (error) {
console.error(error)
- valid = false;
+ valid = false
}
}
return valid
@@ -402,19 +329,19 @@ async function voicemapIsValid(parsedVoiceMap) {
async function updateVoiceMap() {
let isValidResult = false
- const context = getContext();
+ const context = getContext()
// console.debug("onElevenlabsVoiceMapSubmit");
- const value = $('#elevenlabs_voice_map').val();
- const parsedVoiceMap = parseVoiceMap(value);
- isValidResult = await voicemapIsValid(parsedVoiceMap);
+ const value = $('#elevenlabs_voice_map').val()
+ const parsedVoiceMap = parseVoiceMap(value)
+ isValidResult = await voicemapIsValid(parsedVoiceMap)
if (isValidResult) {
- extension_settings.elevenlabstts.elevenlabsVoiceMap = String(value);
+ extension_settings.elevenlabstts.elevenlabsVoiceMap = String(value)
context.elevenlabsVoiceMap = String(value)
voiceMap = parsedVoiceMap
console.debug(`Saved new voiceMap: ${value}`)
- saveSettingsDebounced();
+ saveSettingsDebounced()
} else {
- throw "Voice map is invalid, check console for errors"
+ throw 'Voice map is invalid, check console for errors'
}
}
@@ -422,23 +349,27 @@ function onElevenlabsApplyClick() {
Promise.all([updateApiKey(), updateVoiceMap()])
.then(([result1, result2]) => {
updateUiAudioPlayState()
- setElevenLabsStatus("Successfully applied settings", true)
+ setElevenLabsStatus('Successfully applied settings', true)
})
- .catch((error) => {
+ .catch(error => {
setElevenLabsStatus(error, false)
- });
+ })
}
function onElevenlabsEnableClick() {
- extension_settings.elevenlabstts.enabled = $("#elevenlabs_enabled").is(':checked');
+ extension_settings.elevenlabstts.enabled = $('#elevenlabs_enabled').is(
+ ':checked'
+ )
updateUiAudioPlayState()
- saveSettingsDebounced();
+ saveSettingsDebounced()
}
function updateUiAudioPlayState() {
if (extension_settings.elevenlabstts.enabled == true) {
audioControl.style.display = 'flex'
- const img = !audioElement.paused ? "fa-solid fa-circle-pause" : "fa-solid fa-circle-play"
+ const img = !audioElement.paused
+ ? 'fa-solid fa-circle-pause'
+ : 'fa-solid fa-circle-play'
audioControl.className = img
} else {
audioControl.style.display = 'none'
@@ -453,8 +384,8 @@ function onAudioControlClicked() {
function addAudioControl() {
$('#send_but_sheld').prepend('')
$('#send_but_sheld').on('click', onAudioControlClicked)
- audioControl = document.getElementById('tts_media_control');
- updateUiAudioPlayState();
+ audioControl = document.getElementById('tts_media_control')
+ updateUiAudioPlayState()
}
$(document).ready(function () {
@@ -467,6 +398,8 @@ $(document).ready(function () {
+
+
@@ -487,14 +420,14 @@ $(document).ready(function () {
- `;
- $('#extensions_settings').append(settingsHtml);
- $('#elevenlabs_apply').on('click', onElevenlabsApplyClick);
- $('#elevenlabs_enabled').on('click', onElevenlabsEnableClick);
- $('#elevenlabs_voices').on('click', onElevenlabsVoicesClick);
+ `
+ $('#extensions_settings').append(settingsHtml)
+ $('#elevenlabs_apply').on('click', onElevenlabsApplyClick)
+ $('#elevenlabs_enabled').on('click', onElevenlabsEnableClick)
+ $('#elevenlabs_voices').on('click', onElevenlabsVoicesClick)
}
- addAudioControl();
- addExtensionControls();
- loadSettings();
- setInterval(moduleWorker, UPDATE_INTERVAL);
-});
\ No newline at end of file
+ addAudioControl()
+ addExtensionControls()
+ loadSettings()
+ setInterval(moduleWorker, UPDATE_INTERVAL)
+})