From fc1896dcff96907d78efe6e1eed5d7bdc8f2dab4 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Tue, 9 Apr 2024 17:50:27 +0300 Subject: [PATCH] #2047 (WIP) Refactor TTS worker to use event source --- public/scripts/extensions/tts/index.js | 205 +++++++++++-------------- 1 file changed, 92 insertions(+), 113 deletions(-) diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 556f6b967..f00641082 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -19,8 +19,9 @@ const UPDATE_INTERVAL = 1000; let voiceMapEntries = []; let voiceMap = {}; // {charName:voiceid, charName2:voiceid2} -let storedvalue = false; +let talkingHeadState = false; let lastChatId = null; +let lastMessage = null; let lastMessageHash = null; const DEFAULT_VOICE_MARKER = '[Default Voice]'; @@ -67,7 +68,7 @@ export function getPreviewString(lang) { return previewStrings[lang] ?? fallbackPreview; } -let ttsProviders = { +const ttsProviders = { ElevenLabs: ElevenLabsTtsProvider, Silero: SileroTtsProvider, XTTSv2: XTTSTtsProvider, @@ -82,7 +83,6 @@ let ttsProviders = { let ttsProvider; let ttsProviderName; -let ttsLastMessage = null; async function onNarrateOneMessage() { audioElement.src = '/sounds/silence.mp3'; @@ -130,103 +130,13 @@ async function onNarrateText(args, text) { } async function moduleWorker() { - // Primarily determining when to add new chat to the TTS queue - const enabled = $('#tts_enabled').is(':checked'); - $('body').toggleClass('tts', enabled); - if (!enabled) { + if (!extension_settings.tts.enabled) { return; } - const context = getContext(); - const chat = context.chat; - processTtsQueue(); processAudioJobQueue(); updateUiAudioPlayState(); - - // Auto generation is disabled - if (extension_settings.tts.auto_generation == false) { - return; - } - - // no characters or group selected - if (!context.groupId && context.characterId === undefined) { - return; - } - - // Chat changed - if ( - context.chatId !== lastChatId - ) { - currentMessageNumber = context.chat.length ? context.chat.length : 0; - saveLastValues(); - - // Force to speak on the first message in the new chat - if (context.chat.length === 1) { - lastMessageHash = -1; - } - - return; - } - - // take the count of messages - let lastMessageNumber = context.chat.length ? context.chat.length : 0; - - // There's no new messages - let diff = lastMessageNumber - currentMessageNumber; - let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? ''); - - // if messages got deleted, diff will be < 0 - if (diff < 0) { - // necessary actions will be taken by the onChatDeleted() handler - return; - } - - // if no new messages, or same message, or same message hash, do nothing - if (diff == 0 && hashNew === lastMessageHash) { - return; - } - - // If streaming, wait for streaming to finish before processing new messages - if (context.streamingProcessor && !context.streamingProcessor.isFinished) { - return; - } - - // clone message object, as things go haywire if message object is altered below (it's passed by reference) - const message = structuredClone(chat[chat.length - 1]); - - // if last message within current message, message got extended. only send diff to TTS. - if (ttsLastMessage !== null && message.mes.indexOf(ttsLastMessage) !== -1) { - let tmp = message.mes; - message.mes = message.mes.replace(ttsLastMessage, ''); - ttsLastMessage = tmp; - } else { - ttsLastMessage = message.mes; - } - - // We're currently swiping. Don't generate voice - if (!message || message.mes === '...' || message.mes === '') { - return; - } - - // Don't generate if message doesn't have a display text - if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) { - return; - } - - // Don't generate if message is a user message and user message narration is disabled - if (message.is_user && !extension_settings.tts.narrate_user) { - return; - } - - // New messages, add new chat to history - lastMessageHash = hashNew; - currentMessageNumber = lastMessageNumber; - - console.debug( - `Adding message from ${message.name} for TTS processing: "${message.mes}"`, - ); - ttsJobQueue.push(message); } function talkingAnimation(switchValue) { @@ -238,11 +148,11 @@ function talkingAnimation(switchValue) { const apiUrl = getApiUrl(); const animationType = switchValue ? 'start' : 'stop'; - if (switchValue !== storedvalue) { + if (switchValue !== talkingHeadState) { try { console.log(animationType + ' Talking Animation'); doExtrasFetch(`${apiUrl}/api/talkinghead/${animationType}_talking`); - storedvalue = switchValue; // Update the storedvalue to the current switchValue + talkingHeadState = switchValue; } catch (error) { // Handle the error here or simply ignore it to prevent logging } @@ -289,7 +199,6 @@ function debugTtsPlayback() { { 'ttsProviderName': ttsProviderName, 'voiceMap': voiceMap, - 'currentMessageNumber': currentMessageNumber, 'audioPaused': audioPaused, 'audioJobQueue': audioJobQueue, 'currentAudioJob': currentAudioJob, @@ -477,21 +386,12 @@ async function processAudioJobQueue() { let ttsJobQueue = []; let currentTtsJob; // Null if nothing is currently being processed -let currentMessageNumber = 0; function completeTtsJob() { console.info(`Current TTS job for ${currentTtsJob?.name} completed.`); currentTtsJob = null; } -function saveLastValues() { - const context = getContext(); - lastChatId = context.chatId; - lastMessageHash = getStringHash( - (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '', - ); -} - async function tts(text, voiceId, char) { async function processResponse(response) { // RVC injection @@ -764,26 +664,103 @@ async function onChatChanged() { await resetTtsPlayback(); const voiceMapInit = initVoiceMap(); await Promise.race([voiceMapInit, delay(1000)]); - ttsLastMessage = null; + lastMessage = null; } -async function onChatDeleted() { +async function onMessageEvent(messageId) { + // If TTS is disabled, do nothing + if (!extension_settings.tts.enabled) { + return; + } + + // Auto generation is disabled + if (!extension_settings.tts.auto_generation) { + return; + } + + const context = getContext(); + + // no characters or group selected + if (!context.groupId && context.characterId === undefined) { + return; + } + + // Chat changed + if (context.chatId !== lastChatId) { + lastChatId = context.chatId; + lastMessageHash = getStringHash(context.chat[messageId]?.mes ?? ''); + + // Force to speak on the first message in the new chat + if (context.chat.length === 1) { + lastMessageHash = -1; + } + } + + // clone message object, as things go haywire if message object is altered below (it's passed by reference) + const message = structuredClone(context.chat[messageId]); + const hashNew = getStringHash(message?.mes ?? ''); + + // if no new messages, or same message, or same message hash, do nothing + if (hashNew === lastMessageHash) { + return; + } + + const isLastMessageInCurrent = () => + lastMessage && + typeof lastMessage === 'object' && + message.swipe_id === lastMessage.swipe_id && + message.name === lastMessage.name && + message.is_user === lastMessage.is_user && + message.mes.indexOf(lastMessage.mes) !== -1; + + // if last message within current message, message got extended. only send diff to TTS. + if (isLastMessageInCurrent()) { + const tmp = structuredClone(message); + message.mes = message.mes.replace(lastMessage.mes, ''); + lastMessage = tmp; + } else { + lastMessage = structuredClone(message); + } + + // We're currently swiping. Don't generate voice + if (!message || message.mes === '...' || message.mes === '') { + return; + } + + // Don't generate if message doesn't have a display text + if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) { + return; + } + + // Don't generate if message is a user message and user message narration is disabled + if (message.is_user && !extension_settings.tts.narrate_user) { + return; + } + + // New messages, add new chat to history + lastMessageHash = hashNew; + lastChatId = context.chatId; + + console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`); + ttsJobQueue.push(message); +} + +async function onMessageDeleted() { const context = getContext(); // update internal references to new last message lastChatId = context.chatId; - currentMessageNumber = context.chat.length ? context.chat.length : 0; // compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue - let messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? ''); + const messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? ''); if (messageHash === lastMessageHash) { return; } lastMessageHash = messageHash; - ttsLastMessage = (context.chat.length && context.chat[context.chat.length - 1].mes) ?? ''; + lastMessage = context.chat.length ? structuredClone(context.chat[context.chat.length - 1]) : null; // stop any tts playback since message might not exist anymore - await resetTtsPlayback(); + resetTtsPlayback(); } /** @@ -1079,8 +1056,10 @@ $(document).ready(function () { setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback); eventSource.on(event_types.CHAT_CHANGED, onChatChanged); - eventSource.on(event_types.MESSAGE_DELETED, onChatDeleted); + eventSource.on(event_types.MESSAGE_DELETED, onMessageDeleted); eventSource.on(event_types.GROUP_UPDATED, onChatChanged); + eventSource.on(event_types.MESSAGE_SENT, onMessageEvent); + eventSource.on(event_types.MESSAGE_RECEIVED, onMessageEvent); registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], '(text) – narrate any text using currently selected character\'s voice. Use voice="Character Name" argument to set other voice from the voice map, example: /speak voice="Donald Duck" Quack!', true, true); document.body.appendChild(audioElement); });