From fc1896dcff96907d78efe6e1eed5d7bdc8f2dab4 Mon Sep 17 00:00:00 2001
From: Cohee <18619528+Cohee1207@users.noreply.github.com>
Date: Tue, 9 Apr 2024 17:50:27 +0300
Subject: [PATCH] #2047 (WIP) Refactor TTS worker to use event source
---
public/scripts/extensions/tts/index.js | 205 +++++++++++--------------
1 file changed, 92 insertions(+), 113 deletions(-)
diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js
index 556f6b967..f00641082 100644
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@@ -19,8 +19,9 @@ const UPDATE_INTERVAL = 1000;
let voiceMapEntries = [];
let voiceMap = {}; // {charName:voiceid, charName2:voiceid2}
-let storedvalue = false;
+let talkingHeadState = false;
let lastChatId = null;
+let lastMessage = null;
let lastMessageHash = null;
const DEFAULT_VOICE_MARKER = '[Default Voice]';
@@ -67,7 +68,7 @@ export function getPreviewString(lang) {
return previewStrings[lang] ?? fallbackPreview;
}
-let ttsProviders = {
+const ttsProviders = {
ElevenLabs: ElevenLabsTtsProvider,
Silero: SileroTtsProvider,
XTTSv2: XTTSTtsProvider,
@@ -82,7 +83,6 @@ let ttsProviders = {
let ttsProvider;
let ttsProviderName;
-let ttsLastMessage = null;
async function onNarrateOneMessage() {
audioElement.src = '/sounds/silence.mp3';
@@ -130,103 +130,13 @@ async function onNarrateText(args, text) {
}
async function moduleWorker() {
- // Primarily determining when to add new chat to the TTS queue
- const enabled = $('#tts_enabled').is(':checked');
- $('body').toggleClass('tts', enabled);
- if (!enabled) {
+ if (!extension_settings.tts.enabled) {
return;
}
- const context = getContext();
- const chat = context.chat;
-
processTtsQueue();
processAudioJobQueue();
updateUiAudioPlayState();
-
- // Auto generation is disabled
- if (extension_settings.tts.auto_generation == false) {
- return;
- }
-
- // no characters or group selected
- if (!context.groupId && context.characterId === undefined) {
- return;
- }
-
- // Chat changed
- if (
- context.chatId !== lastChatId
- ) {
- currentMessageNumber = context.chat.length ? context.chat.length : 0;
- saveLastValues();
-
- // Force to speak on the first message in the new chat
- if (context.chat.length === 1) {
- lastMessageHash = -1;
- }
-
- return;
- }
-
- // take the count of messages
- let lastMessageNumber = context.chat.length ? context.chat.length : 0;
-
- // There's no new messages
- let diff = lastMessageNumber - currentMessageNumber;
- let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '');
-
- // if messages got deleted, diff will be < 0
- if (diff < 0) {
- // necessary actions will be taken by the onChatDeleted() handler
- return;
- }
-
- // if no new messages, or same message, or same message hash, do nothing
- if (diff == 0 && hashNew === lastMessageHash) {
- return;
- }
-
- // If streaming, wait for streaming to finish before processing new messages
- if (context.streamingProcessor && !context.streamingProcessor.isFinished) {
- return;
- }
-
- // clone message object, as things go haywire if message object is altered below (it's passed by reference)
- const message = structuredClone(chat[chat.length - 1]);
-
- // if last message within current message, message got extended. only send diff to TTS.
- if (ttsLastMessage !== null && message.mes.indexOf(ttsLastMessage) !== -1) {
- let tmp = message.mes;
- message.mes = message.mes.replace(ttsLastMessage, '');
- ttsLastMessage = tmp;
- } else {
- ttsLastMessage = message.mes;
- }
-
- // We're currently swiping. Don't generate voice
- if (!message || message.mes === '...' || message.mes === '') {
- return;
- }
-
- // Don't generate if message doesn't have a display text
- if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
- return;
- }
-
- // Don't generate if message is a user message and user message narration is disabled
- if (message.is_user && !extension_settings.tts.narrate_user) {
- return;
- }
-
- // New messages, add new chat to history
- lastMessageHash = hashNew;
- currentMessageNumber = lastMessageNumber;
-
- console.debug(
- `Adding message from ${message.name} for TTS processing: "${message.mes}"`,
- );
- ttsJobQueue.push(message);
}
function talkingAnimation(switchValue) {
@@ -238,11 +148,11 @@ function talkingAnimation(switchValue) {
const apiUrl = getApiUrl();
const animationType = switchValue ? 'start' : 'stop';
- if (switchValue !== storedvalue) {
+ if (switchValue !== talkingHeadState) {
try {
console.log(animationType + ' Talking Animation');
doExtrasFetch(`${apiUrl}/api/talkinghead/${animationType}_talking`);
- storedvalue = switchValue; // Update the storedvalue to the current switchValue
+ talkingHeadState = switchValue;
} catch (error) {
// Handle the error here or simply ignore it to prevent logging
}
@@ -289,7 +199,6 @@ function debugTtsPlayback() {
{
'ttsProviderName': ttsProviderName,
'voiceMap': voiceMap,
- 'currentMessageNumber': currentMessageNumber,
'audioPaused': audioPaused,
'audioJobQueue': audioJobQueue,
'currentAudioJob': currentAudioJob,
@@ -477,21 +386,12 @@ async function processAudioJobQueue() {
let ttsJobQueue = [];
let currentTtsJob; // Null if nothing is currently being processed
-let currentMessageNumber = 0;
function completeTtsJob() {
console.info(`Current TTS job for ${currentTtsJob?.name} completed.`);
currentTtsJob = null;
}
-function saveLastValues() {
- const context = getContext();
- lastChatId = context.chatId;
- lastMessageHash = getStringHash(
- (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '',
- );
-}
-
async function tts(text, voiceId, char) {
async function processResponse(response) {
// RVC injection
@@ -764,26 +664,103 @@ async function onChatChanged() {
await resetTtsPlayback();
const voiceMapInit = initVoiceMap();
await Promise.race([voiceMapInit, delay(1000)]);
- ttsLastMessage = null;
+ lastMessage = null;
}
-async function onChatDeleted() {
+async function onMessageEvent(messageId) {
+ // If TTS is disabled, do nothing
+ if (!extension_settings.tts.enabled) {
+ return;
+ }
+
+ // Auto generation is disabled
+ if (!extension_settings.tts.auto_generation) {
+ return;
+ }
+
+ const context = getContext();
+
+ // no characters or group selected
+ if (!context.groupId && context.characterId === undefined) {
+ return;
+ }
+
+ // Chat changed
+ if (context.chatId !== lastChatId) {
+ lastChatId = context.chatId;
+ lastMessageHash = getStringHash(context.chat[messageId]?.mes ?? '');
+
+ // Force to speak on the first message in the new chat
+ if (context.chat.length === 1) {
+ lastMessageHash = -1;
+ }
+ }
+
+ // clone message object, as things go haywire if message object is altered below (it's passed by reference)
+ const message = structuredClone(context.chat[messageId]);
+ const hashNew = getStringHash(message?.mes ?? '');
+
+ // if no new messages, or same message, or same message hash, do nothing
+ if (hashNew === lastMessageHash) {
+ return;
+ }
+
+ const isLastMessageInCurrent = () =>
+ lastMessage &&
+ typeof lastMessage === 'object' &&
+ message.swipe_id === lastMessage.swipe_id &&
+ message.name === lastMessage.name &&
+ message.is_user === lastMessage.is_user &&
+ message.mes.indexOf(lastMessage.mes) !== -1;
+
+ // if last message within current message, message got extended. only send diff to TTS.
+ if (isLastMessageInCurrent()) {
+ const tmp = structuredClone(message);
+ message.mes = message.mes.replace(lastMessage.mes, '');
+ lastMessage = tmp;
+ } else {
+ lastMessage = structuredClone(message);
+ }
+
+ // We're currently swiping. Don't generate voice
+ if (!message || message.mes === '...' || message.mes === '') {
+ return;
+ }
+
+ // Don't generate if message doesn't have a display text
+ if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
+ return;
+ }
+
+ // Don't generate if message is a user message and user message narration is disabled
+ if (message.is_user && !extension_settings.tts.narrate_user) {
+ return;
+ }
+
+ // New messages, add new chat to history
+ lastMessageHash = hashNew;
+ lastChatId = context.chatId;
+
+ console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`);
+ ttsJobQueue.push(message);
+}
+
+async function onMessageDeleted() {
const context = getContext();
// update internal references to new last message
lastChatId = context.chatId;
- currentMessageNumber = context.chat.length ? context.chat.length : 0;
// compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue
- let messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
+ const messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
if (messageHash === lastMessageHash) {
return;
}
lastMessageHash = messageHash;
- ttsLastMessage = (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '';
+ lastMessage = context.chat.length ? structuredClone(context.chat[context.chat.length - 1]) : null;
// stop any tts playback since message might not exist anymore
- await resetTtsPlayback();
+ resetTtsPlayback();
}
/**
@@ -1079,8 +1056,10 @@ $(document).ready(function () {
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
eventSource.on(event_types.CHAT_CHANGED, onChatChanged);
- eventSource.on(event_types.MESSAGE_DELETED, onChatDeleted);
+ eventSource.on(event_types.MESSAGE_DELETED, onMessageDeleted);
eventSource.on(event_types.GROUP_UPDATED, onChatChanged);
+ eventSource.on(event_types.MESSAGE_SENT, onMessageEvent);
+ eventSource.on(event_types.MESSAGE_RECEIVED, onMessageEvent);
registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], '(text) – narrate any text using currently selected character\'s voice. Use voice="Character Name" argument to set other voice from the voice map, example: /speak voice="Donald Duck" Quack!', true, true);
document.body.appendChild(audioElement);
});