#2047 (WIP) Refactor TTS worker to use event source

This commit is contained in:
Cohee 2024-04-09 17:50:27 +03:00
parent 877824a4f9
commit fc1896dcff
1 changed files with 92 additions and 113 deletions

View File

@ -19,8 +19,9 @@ const UPDATE_INTERVAL = 1000;
let voiceMapEntries = [];
let voiceMap = {}; // {charName:voiceid, charName2:voiceid2}
let storedvalue = false;
let talkingHeadState = false;
let lastChatId = null;
let lastMessage = null;
let lastMessageHash = null;
const DEFAULT_VOICE_MARKER = '[Default Voice]';
@ -67,7 +68,7 @@ export function getPreviewString(lang) {
return previewStrings[lang] ?? fallbackPreview;
let ttsProviders = {
const ttsProviders = {
ElevenLabs: ElevenLabsTtsProvider,
Silero: SileroTtsProvider,
XTTSv2: XTTSTtsProvider,
@ -82,7 +83,6 @@ let ttsProviders = {
let ttsProvider;
let ttsProviderName;
let ttsLastMessage = null;
async function onNarrateOneMessage() {
audioElement.src = '/sounds/silence.mp3';
@ -130,103 +130,13 @@ async function onNarrateText(args, text) {
async function moduleWorker() {
// Primarily determining when to add new chat to the TTS queue
const enabled = $('#tts_enabled').is(':checked');
$('body').toggleClass('tts', enabled);
if (!enabled) {
if (!extension_settings.tts.enabled) {
const context = getContext();
const chat = context.chat;
// Auto generation is disabled
if (extension_settings.tts.auto_generation == false) {
// no characters or group selected
if (!context.groupId && context.characterId === undefined) {
// Chat changed
if (
context.chatId !== lastChatId
) {
currentMessageNumber = context.chat.length ? context.chat.length : 0;
// Force to speak on the first message in the new chat
if (context.chat.length === 1) {
lastMessageHash = -1;
// take the count of messages
let lastMessageNumber = context.chat.length ? context.chat.length : 0;
// There's no new messages
let diff = lastMessageNumber - currentMessageNumber;
let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '');
// if messages got deleted, diff will be < 0
if (diff < 0) {
// necessary actions will be taken by the onChatDeleted() handler
// if no new messages, or same message, or same message hash, do nothing
if (diff == 0 && hashNew === lastMessageHash) {
// If streaming, wait for streaming to finish before processing new messages
if (context.streamingProcessor && !context.streamingProcessor.isFinished) {
// clone message object, as things go haywire if message object is altered below (it's passed by reference)
const message = structuredClone(chat[chat.length - 1]);
// if last message within current message, message got extended. only send diff to TTS.
if (ttsLastMessage !== null && message.mes.indexOf(ttsLastMessage) !== -1) {
let tmp = message.mes;
message.mes = message.mes.replace(ttsLastMessage, '');
ttsLastMessage = tmp;
} else {
ttsLastMessage = message.mes;
// We're currently swiping. Don't generate voice
if (!message || message.mes === '...' || message.mes === '') {
// Don't generate if message doesn't have a display text
if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
// Don't generate if message is a user message and user message narration is disabled
if (message.is_user && !extension_settings.tts.narrate_user) {
// New messages, add new chat to history
lastMessageHash = hashNew;
currentMessageNumber = lastMessageNumber;
`Adding message from ${message.name} for TTS processing: "${message.mes}"`,
function talkingAnimation(switchValue) {
@ -238,11 +148,11 @@ function talkingAnimation(switchValue) {
const apiUrl = getApiUrl();
const animationType = switchValue ? 'start' : 'stop';
if (switchValue !== storedvalue) {
if (switchValue !== talkingHeadState) {
try {
console.log(animationType + ' Talking Animation');
storedvalue = switchValue; // Update the storedvalue to the current switchValue
talkingHeadState = switchValue;
} catch (error) {
// Handle the error here or simply ignore it to prevent logging
@ -289,7 +199,6 @@ function debugTtsPlayback() {
'ttsProviderName': ttsProviderName,
'voiceMap': voiceMap,
'currentMessageNumber': currentMessageNumber,
'audioPaused': audioPaused,
'audioJobQueue': audioJobQueue,
'currentAudioJob': currentAudioJob,
@ -477,21 +386,12 @@ async function processAudioJobQueue() {
let ttsJobQueue = [];
let currentTtsJob; // Null if nothing is currently being processed
let currentMessageNumber = 0;
function completeTtsJob() {
console.info(`Current TTS job for ${currentTtsJob?.name} completed.`);
currentTtsJob = null;
function saveLastValues() {
const context = getContext();
lastChatId = context.chatId;
lastMessageHash = getStringHash(
(context.chat.length && context.chat[context.chat.length - 1].mes) ?? '',
async function tts(text, voiceId, char) {
async function processResponse(response) {
// RVC injection
@ -764,26 +664,103 @@ async function onChatChanged() {
await resetTtsPlayback();
const voiceMapInit = initVoiceMap();
await Promise.race([voiceMapInit, delay(1000)]);
ttsLastMessage = null;
lastMessage = null;
async function onChatDeleted() {
async function onMessageEvent(messageId) {
// If TTS is disabled, do nothing
if (!extension_settings.tts.enabled) {
// Auto generation is disabled
if (!extension_settings.tts.auto_generation) {
const context = getContext();
// no characters or group selected
if (!context.groupId && context.characterId === undefined) {
// Chat changed
if (context.chatId !== lastChatId) {
lastChatId = context.chatId;
lastMessageHash = getStringHash(context.chat[messageId]?.mes ?? '');
// Force to speak on the first message in the new chat
if (context.chat.length === 1) {
lastMessageHash = -1;
// clone message object, as things go haywire if message object is altered below (it's passed by reference)
const message = structuredClone(context.chat[messageId]);
const hashNew = getStringHash(message?.mes ?? '');
// if no new messages, or same message, or same message hash, do nothing
if (hashNew === lastMessageHash) {
const isLastMessageInCurrent = () =>
lastMessage &&
typeof lastMessage === 'object' &&
message.swipe_id === lastMessage.swipe_id &&
message.name === lastMessage.name &&
message.is_user === lastMessage.is_user &&
message.mes.indexOf(lastMessage.mes) !== -1;
// if last message within current message, message got extended. only send diff to TTS.
if (isLastMessageInCurrent()) {
const tmp = structuredClone(message);
message.mes = message.mes.replace(lastMessage.mes, '');
lastMessage = tmp;
} else {
lastMessage = structuredClone(message);
// We're currently swiping. Don't generate voice
if (!message || message.mes === '...' || message.mes === '') {
// Don't generate if message doesn't have a display text
if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
// Don't generate if message is a user message and user message narration is disabled
if (message.is_user && !extension_settings.tts.narrate_user) {
// New messages, add new chat to history
lastMessageHash = hashNew;
lastChatId = context.chatId;
console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`);
async function onMessageDeleted() {
const context = getContext();
// update internal references to new last message
lastChatId = context.chatId;
currentMessageNumber = context.chat.length ? context.chat.length : 0;
// compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue
let messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
const messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
if (messageHash === lastMessageHash) {
lastMessageHash = messageHash;
ttsLastMessage = (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '';
lastMessage = context.chat.length ? structuredClone(context.chat[context.chat.length - 1]) : null;
// stop any tts playback since message might not exist anymore
await resetTtsPlayback();
@ -1079,8 +1056,10 @@ $(document).ready(function () {
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
eventSource.on(event_types.CHAT_CHANGED, onChatChanged);
eventSource.on(event_types.MESSAGE_DELETED, onChatDeleted);
eventSource.on(event_types.MESSAGE_DELETED, onMessageDeleted);
eventSource.on(event_types.GROUP_UPDATED, onChatChanged);
eventSource.on(event_types.MESSAGE_SENT, onMessageEvent);
eventSource.on(event_types.MESSAGE_RECEIVED, onMessageEvent);
registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], '<span class="monospace">(text)</span> narrate any text using currently selected character\'s voice. Use voice="Character Name" argument to set other voice from the voice map, example: <tt>/speak voice="Donald Duck" Quack!</tt>', true, true);