import { cancelTtsPlay, eventSource, event_types, getCurrentChatId, isStreamingEnabled, name2, saveSettingsDebounced, substituteParams } from '../../../script.js'; import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules, renderExtensionTemplateAsync } from '../../extensions.js'; import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js'; import { EdgeTtsProvider } from './edge.js'; import { ElevenLabsTtsProvider } from './elevenlabs.js'; import { SileroTtsProvider } from './silerotts.js'; import { CoquiTtsProvider } from './coqui.js'; import { SystemTtsProvider } from './system.js'; import { NovelTtsProvider } from './novel.js'; import { power_user } from '../../power-user.js'; import { OpenAITtsProvider } from './openai.js'; import { OpenAICompatibleTtsProvider } from './openai-compatible.js'; import { XTTSTtsProvider } from './xtts.js'; import { VITSTtsProvider } from './vits.js'; import { GSVITtsProvider } from './gsvi.js'; import { SBVits2TtsProvider } from './sbvits2.js'; import { AllTalkTtsProvider } from './alltalk.js'; import { SpeechT5TtsProvider } from './speecht5.js'; import { AzureTtsProvider } from './azure.js'; import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js'; import { SlashCommand } from '../../slash-commands/SlashCommand.js'; import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js'; import { debounce_timeout } from '../../constants.js'; import { SlashCommandEnumValue, enumTypes } from '../../slash-commands/SlashCommandEnumValue.js'; import { enumIcons } from '../../slash-commands/SlashCommandCommonEnumsProvider.js'; import { POPUP_TYPE, callGenericPopup } from '../../popup.js'; export { talkingAnimation }; const UPDATE_INTERVAL = 1000; let voiceMapEntries = []; let voiceMap = {}; // {charName:voiceid, charName2:voiceid2} let talkingHeadState = false; let lastChatId = null; let lastMessage = null; let lastMessageHash = null; let periodicMessageGenerationTimer = null; let lastPositionOfParagraphEnd = -1; let currentInitVoiceMapPromise = null; const DEFAULT_VOICE_MARKER = '[Default Voice]'; const DISABLED_VOICE_MARKER = 'disabled'; export function getPreviewString(lang) { const previewStrings = { 'en-US': 'The quick brown fox jumps over the lazy dog', 'en-GB': 'Sphinx of black quartz, judge my vow', 'fr-FR': 'Portez ce vieux whisky au juge blond qui fume', 'de-DE': 'Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich', 'it-IT': 'Pranzo d\'acqua fa volti sghembi', 'es-ES': 'Quiere la boca exhausta vid, kiwi, piña y fugaz jamón', 'es-MX': 'Fabio me exige, sin tapujos, que añada cerveza al whisky', 'ru-RU': 'В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!', 'pt-BR': 'Vejo xá gritando que fez show sem playback.', 'pt-PR': 'Todo pajé vulgar faz boquinha sexy com kiwi.', 'uk-UA': 'Фабрикуймо гідність, лящім їжею, ґав хапаймо, з\'єднавці чаш!', 'pl-PL': 'Pchnąć w tę łódź jeża lub ośm skrzyń fig', 'cs-CZ': 'Příliš žluťoučký kůň úpěl ďábelské ódy', 'sk-SK': 'Vyhŕňme si rukávy a vyprážajme čínske ryžové cestoviny', 'hu-HU': 'Árvíztűrő tükörfúrógép', 'tr-TR': 'Pijamalı hasta yağız şoföre çabucak güvendi', 'nl-NL': 'De waard heeft een kalfje en een pinkje opgegeten', 'sv-SE': 'Yxskaftbud, ge vårbygd, zinkqvarn', 'da-DK': 'Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Walther spillede på xylofon', 'ja-JP': 'いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま けふこえて あさきゆめみし ゑひもせす', 'ko-KR': '가나다라마바사아자차카타파하', 'zh-CN': '我能吞下玻璃而不伤身体', 'ro-RO': 'Muzicologă în bej vând whisky și tequila, preț fix', 'bg-BG': 'Щъркелите се разпръснаха по цялото небе', 'el-GR': 'Ταχίστη αλώπηξ βαφής ψημένη γη, δρασκελίζει υπέρ νωθρού κυνός', 'fi-FI': 'Voi veljet, miksi juuri teille myin nämä vehkeet?', 'he-IL': 'הקצינים צעקו: "כל הכבוד לצבא הצבאות!"', 'id-ID': 'Jangkrik itu memang enak, apalagi kalau digoreng', 'ms-MY': 'Muzik penyanyi wanita itu menggambarkan kehidupan yang penuh dengan duka nestapa', 'th-TH': 'เป็นไงบ้างครับ ผมชอบกินข้าวผัดกระเพราหมูกรอบ', 'vi-VN': 'Cô bé quàng khăn đỏ đang ngồi trên bãi cỏ xanh', 'ar-SA': 'أَبْجَدِيَّة عَرَبِيَّة', 'hi-IN': 'श्वेता ने श्वेता के श्वेते हाथों में श्वेता का श्वेता चावल पकड़ा', }; const fallbackPreview = 'Neque porro quisquam est qui dolorem ipsum quia dolor sit amet'; return previewStrings[lang] ?? fallbackPreview; } const ttsProviders = { AllTalk: AllTalkTtsProvider, Azure: AzureTtsProvider, Coqui: CoquiTtsProvider, Edge: EdgeTtsProvider, ElevenLabs: ElevenLabsTtsProvider, GSVI: GSVITtsProvider, Novel: NovelTtsProvider, OpenAI: OpenAITtsProvider, 'OpenAI Compatible': OpenAICompatibleTtsProvider, SBVits2: SBVits2TtsProvider, Silero: SileroTtsProvider, SpeechT5: SpeechT5TtsProvider, System: SystemTtsProvider, VITS: VITSTtsProvider, XTTSv2: XTTSTtsProvider, }; let ttsProvider; let ttsProviderName; async function onNarrateOneMessage() { audioElement.src = '/sounds/silence.mp3'; const context = getContext(); const id = $(this).closest('.mes').attr('mesid'); const message = context.chat[id]; if (!message) { return; } resetTtsPlayback(); ttsJobQueue.push(message); moduleWorker(); } async function onNarrateText(args, text) { if (!text) { return ''; } audioElement.src = '/sounds/silence.mp3'; // To load all characters in the voice map, set unrestricted to true await initVoiceMap(true); const baseName = args?.voice || name2; const name = (baseName === 'SillyTavern System' ? DEFAULT_VOICE_MARKER : baseName) || DEFAULT_VOICE_MARKER; const voiceMapEntry = voiceMap[name] === DEFAULT_VOICE_MARKER ? voiceMap[DEFAULT_VOICE_MARKER] : voiceMap[name]; if (!voiceMapEntry || voiceMapEntry === DISABLED_VOICE_MARKER) { toastr.info(`Specified voice for ${name} was not found. Check the TTS extension settings.`); return; } resetTtsPlayback(); ttsJobQueue.push({ mes: text, name: name }); await moduleWorker(); // Return back to the chat voices await initVoiceMap(false); return ''; } async function moduleWorker() { if (!extension_settings.tts.enabled) { return; } processTtsQueue(); processAudioJobQueue(); updateUiAudioPlayState(); } function talkingAnimation(switchValue) { if (!modules.includes('talkinghead')) { console.debug('Talking Animation module not loaded'); return; } const apiUrl = getApiUrl(); const animationType = switchValue ? 'start' : 'stop'; if (switchValue !== talkingHeadState) { try { console.log(animationType + ' Talking Animation'); doExtrasFetch(`${apiUrl}/api/talkinghead/${animationType}_talking`); talkingHeadState = switchValue; } catch (error) { // Handle the error here or simply ignore it to prevent logging } } updateUiAudioPlayState(); } function resetTtsPlayback() { // Stop system TTS utterance cancelTtsPlay(); // Clear currently processing jobs currentTtsJob = null; currentAudioJob = null; // Reset audio element audioElement.currentTime = 0; audioElement.src = ''; // Clear any queue items ttsJobQueue.splice(0, ttsJobQueue.length); audioJobQueue.splice(0, audioJobQueue.length); // Set audio ready to process again audioQueueProcessorReady = true; } function isTtsProcessing() { let processing = false; // Check job queues if (ttsJobQueue.length > 0 || audioJobQueue.length > 0) { processing = true; } // Check current jobs if (currentTtsJob != null || currentAudioJob != null) { processing = true; } return processing; } function debugTtsPlayback() { console.log(JSON.stringify( { 'ttsProviderName': ttsProviderName, 'voiceMap': voiceMap, 'audioPaused': audioPaused, 'audioJobQueue': audioJobQueue, 'currentAudioJob': currentAudioJob, 'audioQueueProcessorReady': audioQueueProcessorReady, 'ttsJobQueue': ttsJobQueue, 'currentTtsJob': currentTtsJob, 'ttsConfig': extension_settings.tts, }, )); } window['debugTtsPlayback'] = debugTtsPlayback; //##################// // Audio Control // //##################// let audioElement = new Audio(); audioElement.id = 'tts_audio'; audioElement.autoplay = true; /** * @type AudioJob[] Audio job queue * @typedef {{audioBlob: Blob | string, char: string}} AudioJob Audio job object */ let audioJobQueue = []; /** * @type AudioJob Current audio job */ let currentAudioJob; let audioPaused = false; let audioQueueProcessorReady = true; /** * Play audio data from audio job object. * @param {AudioJob} audioJob Audio job object * @returns {Promise} Promise that resolves when audio playback is started */ async function playAudioData(audioJob) { const { audioBlob, char } = audioJob; // Since current audio job can be cancelled, don't playback if it is null if (currentAudioJob == null) { console.log('Cancelled TTS playback because currentAudioJob was null'); } if (audioBlob instanceof Blob) { const srcUrl = await getBase64Async(audioBlob); // VRM lip sync if (extension_settings.vrm?.enabled && typeof window['vrmLipSync'] === 'function') { await window['vrmLipSync'](audioBlob, char); } audioElement.src = srcUrl; } else if (typeof audioBlob === 'string') { audioElement.src = audioBlob; } else { throw `TTS received invalid audio data type ${typeof audioBlob}`; } audioElement.addEventListener('ended', completeCurrentAudioJob); audioElement.addEventListener('canplay', () => { console.debug('Starting TTS playback'); audioElement.playbackRate = extension_settings.tts.playback_rate; audioElement.play(); }); } window['tts_preview'] = function (id) { const audio = document.getElementById(id); if (audio instanceof HTMLAudioElement && !$(audio).data('disabled')) { audio.play(); } else { ttsProvider.previewTtsVoice(id); } }; async function onTtsVoicesClick() { let popupText = ''; try { const voiceIds = await ttsProvider.fetchTtsVoiceObjects(); for (const voice of voiceIds) { popupText += `
${voice.lang || ''} ${voice.name}
`; if (voice.preview_url) { popupText += ``; } } } catch { popupText = 'Could not load voices list. Check your API key.'; } callGenericPopup(popupText, POPUP_TYPE.TEXT, '', { allowVerticalScrolling: true }); } function updateUiAudioPlayState() { if (extension_settings.tts.enabled == true) { $('#ttsExtensionMenuItem').show(); let img; // Give user feedback that TTS is active by setting the stop icon if processing or playing if (!audioElement.paused || isTtsProcessing()) { img = 'fa-solid fa-stop-circle extensionsMenuExtensionButton'; } else { img = 'fa-solid fa-circle-play extensionsMenuExtensionButton'; } $('#tts_media_control').attr('class', img); } else { $('#ttsExtensionMenuItem').hide(); } } function onAudioControlClicked() { audioElement.src = '/sounds/silence.mp3'; let context = getContext(); // Not pausing, doing a full stop to anything TTS is doing. Better UX as pause is not as useful if (!audioElement.paused || isTtsProcessing()) { resetTtsPlayback(); talkingAnimation(false); } else { // Default play behavior if not processing or playing is to play the last message. ttsJobQueue.push(context.chat[context.chat.length - 1]); } updateUiAudioPlayState(); } function addAudioControl() { $('#tts_wand_container').append(`
TTS Playback
`); $('#tts_wand_container').append(`
Narrate All Chat
`); $('#ttsExtensionMenuItem').attr('title', 'TTS play/pause').on('click', onAudioControlClicked); $('#ttsExtensionNarrateAll').attr('title', 'Narrate all messages in the current chat. Includes user messages, excludes hidden comments.').on('click', playFullConversation); updateUiAudioPlayState(); } function completeCurrentAudioJob() { audioQueueProcessorReady = true; currentAudioJob = null; talkingAnimation(false); //stop lip animation // updateUiPlayState(); } /** * Accepts an HTTP response containing audio/mpeg data, and puts the data as a Blob() on the queue for playback * @param {Response} response */ async function addAudioJob(response, char) { if (typeof response === 'string') { audioJobQueue.push({ audioBlob: response, char: char }); } else { const audioData = await response.blob(); if (!audioData.type.startsWith('audio/')) { throw `TTS received HTTP response with invalid data format. Expecting audio/*, got ${audioData.type}`; } audioJobQueue.push({ audioBlob: audioData, char: char }); } console.debug('Pushed audio job to queue.'); } async function processAudioJobQueue() { // Nothing to do, audio not completed, or audio paused - stop processing. if (audioJobQueue.length == 0 || !audioQueueProcessorReady || audioPaused) { return; } try { audioQueueProcessorReady = false; currentAudioJob = audioJobQueue.shift(); playAudioData(currentAudioJob); talkingAnimation(true); } catch (error) { toastr.error(error.toString()); console.error(error); audioQueueProcessorReady = true; } } //################// // TTS Control // //################// let ttsJobQueue = []; let currentTtsJob; // Null if nothing is currently being processed function completeTtsJob() { console.info(`Current TTS job for ${currentTtsJob?.name} completed.`); currentTtsJob = null; } async function tts(text, voiceId, char) { async function processResponse(response) { // RVC injection if (typeof window['rvcVoiceConversion'] === 'function' && extension_settings.rvc.enabled) response = await window['rvcVoiceConversion'](response, char, text); await addAudioJob(response, char); } let response = await ttsProvider.generateTts(text, voiceId); // If async generator, process every chunk as it comes in if (typeof response[Symbol.asyncIterator] === 'function') { for await (const chunk of response) { await processResponse(chunk); } } else { await processResponse(response); } completeTtsJob(); } async function processTtsQueue() { // Called each moduleWorker iteration to pull chat messages from queue if (currentTtsJob || ttsJobQueue.length <= 0 || audioPaused) { return; } console.debug('New message found, running TTS'); currentTtsJob = ttsJobQueue.shift(); let text = extension_settings.tts.narrate_translated_only ? (currentTtsJob?.extra?.display_text || currentTtsJob.mes) : currentTtsJob.mes; // Substitute macros text = substituteParams(text); if (extension_settings.tts.skip_codeblocks) { text = text.replace(/^\s{4}.*$/gm, '').trim(); text = text.replace(/```.*?```/gs, '').trim(); } if (extension_settings.tts.skip_tags) { text = text.replace(/<.*?>.*?<\/.*?>/g, '').trim(); } if (!extension_settings.tts.pass_asterisks) { text = extension_settings.tts.narrate_dialogues_only ? text.replace(/\*[^*]*?(\*|$)/g, '').trim() // remove asterisks content : text.replaceAll('*', '').trim(); // remove just the asterisks } if (extension_settings.tts.narrate_quoted_only) { const special_quotes = /[“”«»]/g; // Extend this regex to include other special quotes text = text.replace(special_quotes, '"'); const matches = text.match(/".*?"/g); // Matches text inside double quotes, non-greedily const partJoiner = (ttsProvider?.separator || ' ... '); text = matches ? matches.join(partJoiner) : text; } if (typeof ttsProvider?.processText === 'function') { text = await ttsProvider.processText(text); } // Collapse newlines and spaces into single space text = text.replace(/\s+/g, ' ').trim(); console.log(`TTS: ${text}`); const char = currentTtsJob.name; // Remove character name from start of the line if power user setting is disabled if (char && !power_user.allow_name2_display) { const escapedChar = escapeRegex(char); text = text.replace(new RegExp(`^${escapedChar}:`, 'gm'), ''); } try { if (!text) { console.warn('Got empty text in TTS queue job.'); completeTtsJob(); return; } const voiceMapEntry = voiceMap[char] === DEFAULT_VOICE_MARKER ? voiceMap[DEFAULT_VOICE_MARKER] : voiceMap[char]; if (!voiceMapEntry || voiceMapEntry === DISABLED_VOICE_MARKER) { throw `${char} not in voicemap. Configure character in extension settings voice map`; } const voice = await ttsProvider.getVoice(voiceMapEntry); const voiceId = voice.voice_id; if (voiceId == null) { toastr.error(`Specified voice for ${char} was not found. Check the TTS extension settings.`); throw `Unable to attain voiceId for ${char}`; } await tts(text, voiceId, char); } catch (error) { toastr.error(error.toString()); console.error(error); currentTtsJob = null; } } async function playFullConversation() { resetTtsPlayback(); if (!extension_settings.tts.enabled) { return toastr.warning('TTS is disabled. Please enable it in the extension settings.'); } const context = getContext(); const chat = context.chat.filter(x => !x.is_system && x.mes !== '...' && x.mes !== ''); if (chat.length === 0) { return toastr.info('No messages to narrate.'); } ttsJobQueue = chat; } window['playFullConversation'] = playFullConversation; //#############################// // Extension UI and Settings // //#############################// function loadSettings() { if (Object.keys(extension_settings.tts).length === 0) { Object.assign(extension_settings.tts, defaultSettings); } for (const key in defaultSettings) { if (!(key in extension_settings.tts)) { extension_settings.tts[key] = defaultSettings[key]; } } $('#tts_provider').val(extension_settings.tts.currentProvider); $('#tts_enabled').prop( 'checked', extension_settings.tts.enabled, ); $('#tts_narrate_dialogues').prop('checked', extension_settings.tts.narrate_dialogues_only); $('#tts_narrate_quoted').prop('checked', extension_settings.tts.narrate_quoted_only); $('#tts_auto_generation').prop('checked', extension_settings.tts.auto_generation); $('#tts_periodic_auto_generation').prop('checked', extension_settings.tts.periodic_auto_generation); $('#tts_narrate_translated_only').prop('checked', extension_settings.tts.narrate_translated_only); $('#tts_narrate_user').prop('checked', extension_settings.tts.narrate_user); $('#tts_pass_asterisks').prop('checked', extension_settings.tts.pass_asterisks); $('#tts_skip_codeblocks').prop('checked', extension_settings.tts.skip_codeblocks); $('#tts_skip_tags').prop('checked', extension_settings.tts.skip_tags); $('#playback_rate').val(extension_settings.tts.playback_rate); $('#playback_rate_counter').val(Number(extension_settings.tts.playback_rate).toFixed(2)); $('#playback_rate_block').toggle(extension_settings.tts.currentProvider !== 'System'); $('body').toggleClass('tts', extension_settings.tts.enabled); } const defaultSettings = { voiceMap: '', ttsEnabled: false, currentProvider: 'ElevenLabs', auto_generation: true, narrate_user: false, playback_rate: 1, }; function setTtsStatus(status, success) { $('#tts_status').text(status); if (success) { $('#tts_status').removeAttr('style'); } else { $('#tts_status').css('color', 'red'); } } function onRefreshClick() { Promise.all([ ttsProvider.onRefreshClick(), // updateVoiceMap() ]).then(() => { extension_settings.tts[ttsProviderName] = ttsProvider.settings; saveSettingsDebounced(); setTtsStatus('Successfully applied settings', true); console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`); initVoiceMap(); updateVoiceMap(); }).catch(error => { toastr.error(error.toString()); console.error(error); setTtsStatus(error, false); }); } function onEnableClick() { extension_settings.tts.enabled = $('#tts_enabled').is( ':checked', ); updateUiAudioPlayState(); saveSettingsDebounced(); $('body').toggleClass('tts', extension_settings.tts.enabled); } function onAutoGenerationClick() { extension_settings.tts.auto_generation = !!$('#tts_auto_generation').prop('checked'); saveSettingsDebounced(); } function onPeriodicAutoGenerationClick() { extension_settings.tts.periodic_auto_generation = !!$('#tts_periodic_auto_generation').prop('checked'); saveSettingsDebounced(); } function onNarrateDialoguesClick() { extension_settings.tts.narrate_dialogues_only = !!$('#tts_narrate_dialogues').prop('checked'); saveSettingsDebounced(); } function onNarrateUserClick() { extension_settings.tts.narrate_user = !!$('#tts_narrate_user').prop('checked'); saveSettingsDebounced(); } function onNarrateQuotedClick() { extension_settings.tts.narrate_quoted_only = !!$('#tts_narrate_quoted').prop('checked'); saveSettingsDebounced(); } function onNarrateTranslatedOnlyClick() { extension_settings.tts.narrate_translated_only = !!$('#tts_narrate_translated_only').prop('checked'); saveSettingsDebounced(); } function onSkipCodeblocksClick() { extension_settings.tts.skip_codeblocks = !!$('#tts_skip_codeblocks').prop('checked'); saveSettingsDebounced(); } function onSkipTagsClick() { extension_settings.tts.skip_tags = !!$('#tts_skip_tags').prop('checked'); saveSettingsDebounced(); } function onPassAsterisksClick() { extension_settings.tts.pass_asterisks = !!$('#tts_pass_asterisks').prop('checked'); saveSettingsDebounced(); console.log('setting pass asterisks', extension_settings.tts.pass_asterisks); } //##############// // TTS Provider // //##############// async function loadTtsProvider(provider) { //Clear the current config and add new config $('#tts_provider_settings').html(''); if (!provider) { return; } // Init provider references extension_settings.tts.currentProvider = provider; ttsProviderName = provider; ttsProvider = new ttsProviders[provider]; // Init provider settings $('#tts_provider_settings').append(ttsProvider.settingsHtml); if (!(ttsProviderName in extension_settings.tts)) { console.warn(`Provider ${ttsProviderName} not in Extension Settings, initiatilizing provider in settings`); extension_settings.tts[ttsProviderName] = {}; } await ttsProvider.loadSettings(extension_settings.tts[ttsProviderName]); await initVoiceMap(); } function onTtsProviderChange() { const ttsProviderSelection = $('#tts_provider').val(); extension_settings.tts.currentProvider = ttsProviderSelection; $('#playback_rate_block').toggle(extension_settings.tts.currentProvider !== 'System'); loadTtsProvider(ttsProviderSelection); } // Ensure that TTS provider settings are saved to extension settings. export function saveTtsProviderSettings() { extension_settings.tts[ttsProviderName] = ttsProvider.settings; updateVoiceMap(); saveSettingsDebounced(); console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`); } //###################// // voiceMap Handling // //###################// async function onChatChanged() { await onGenerationEnded(); resetTtsPlayback(); const voiceMapInit = initVoiceMap(); await Promise.race([voiceMapInit, delay(debounce_timeout.relaxed)]); lastMessage = null; } async function onMessageEvent(messageId, lastCharIndex) { // If TTS is disabled, do nothing if (!extension_settings.tts.enabled) { return; } // Auto generation is disabled if (!extension_settings.tts.auto_generation) { return; } const context = getContext(); // no characters or group selected if (!context.groupId && context.characterId === undefined) { return; } // Chat changed if (context.chatId !== lastChatId) { lastChatId = context.chatId; lastMessageHash = getStringHash(context.chat[messageId]?.mes ?? ''); // Force to speak on the first message in the new chat if (context.chat.length === 1) { lastMessageHash = -1; } } // clone message object, as things go haywire if message object is altered below (it's passed by reference) const message = structuredClone(context.chat[messageId]); const hashNew = getStringHash(message?.mes ?? ''); // Ignore prompt-hidden messages if (message.is_system) { return; } // if no new messages, or same message, or same message hash, do nothing if (hashNew === lastMessageHash) { return; } // if we only want to process part of the message if (lastCharIndex) { message.mes = message.mes.substring(0, lastCharIndex); } const isLastMessageInCurrent = () => lastMessage && typeof lastMessage === 'object' && message.swipe_id === lastMessage.swipe_id && message.name === lastMessage.name && message.is_user === lastMessage.is_user && message.mes.indexOf(lastMessage.mes) !== -1; // if last message within current message, message got extended. only send diff to TTS. if (isLastMessageInCurrent()) { const tmp = structuredClone(message); message.mes = message.mes.replace(lastMessage.mes, ''); lastMessage = tmp; } else { lastMessage = structuredClone(message); } // We're currently swiping. Don't generate voice if (!message || message.mes === '...' || message.mes === '') { return; } // Don't generate if message doesn't have a display text if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) { return; } // Don't generate if message is a user message and user message narration is disabled if (message.is_user && !extension_settings.tts.narrate_user) { return; } // New messages, add new chat to history lastMessageHash = hashNew; lastChatId = context.chatId; console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`); ttsJobQueue.push(message); } async function onMessageDeleted() { const context = getContext(); // update internal references to new last message lastChatId = context.chatId; // compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue const messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? ''); if (messageHash === lastMessageHash) { return; } lastMessageHash = messageHash; lastMessage = context.chat.length ? structuredClone(context.chat[context.chat.length - 1]) : null; // stop any tts playback since message might not exist anymore resetTtsPlayback(); } async function onGenerationStarted(generationType, _args, isDryRun) { // If dry running or quiet mode, do nothing if (isDryRun || ['quiet', 'impersonate'].includes(generationType)) { return; } // If TTS is disabled, do nothing if (!extension_settings.tts.enabled) { return; } // Auto generation is disabled if (!extension_settings.tts.auto_generation) { return; } // Periodic auto generation is disabled if (!extension_settings.tts.periodic_auto_generation) { return; } // If the reply is not being streamed if (!isStreamingEnabled()) { return; } // start the timer if (!periodicMessageGenerationTimer) { periodicMessageGenerationTimer = setInterval(onPeriodicMessageGenerationTick, UPDATE_INTERVAL); } } async function onGenerationEnded() { if (periodicMessageGenerationTimer) { clearInterval(periodicMessageGenerationTimer); periodicMessageGenerationTimer = null; } lastPositionOfParagraphEnd = -1; } async function onPeriodicMessageGenerationTick() { const context = getContext(); // no characters or group selected if (!context.groupId && context.characterId === undefined) { return; } const lastMessageId = context.chat.length - 1; // the last message was from the user if (context.chat[lastMessageId].is_user) { return; } const lastMessage = structuredClone(context.chat[lastMessageId]); const lastMessageText = lastMessage?.mes ?? ''; // look for double ending lines which should indicate the end of a paragraph let newLastPositionOfParagraphEnd = lastMessageText .indexOf('\n\n', lastPositionOfParagraphEnd + 1); // if not found, look for a single ending line which should indicate the end of a paragraph if (newLastPositionOfParagraphEnd === -1) { newLastPositionOfParagraphEnd = lastMessageText .indexOf('\n', lastPositionOfParagraphEnd + 1); } // send the message to the tts module if we found the new end of a paragraph if (newLastPositionOfParagraphEnd > -1) { onMessageEvent(lastMessageId, newLastPositionOfParagraphEnd); if (periodicMessageGenerationTimer) { lastPositionOfParagraphEnd = newLastPositionOfParagraphEnd; } } } /** * Get characters in current chat * @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat. * @returns {string[]} - Array of character names */ function getCharacters(unrestricted) { const context = getContext(); if (unrestricted) { const names = context.characters.map(char => char.name); names.unshift(DEFAULT_VOICE_MARKER); return names.filter(onlyUnique); } let characters = []; if (context.groupId === null) { // Single char chat characters.push(DEFAULT_VOICE_MARKER); characters.push(context.name1); characters.push(context.name2); } else { // Group chat characters.push(DEFAULT_VOICE_MARKER); characters.push(context.name1); const group = context.groups.find(group => context.groupId == group.id); for (let member of group.members) { const character = context.characters.find(char => char.avatar == member); if (character) { characters.push(character.name); } } } return characters.filter(onlyUnique); } function sanitizeId(input) { // Remove any non-alphanumeric characters except underscore (_) and hyphen (-) let sanitized = encodeURIComponent(input).replace(/[^a-zA-Z0-9-_]/g, ''); // Ensure first character is always a letter if (!/^[a-zA-Z]/.test(sanitized)) { sanitized = 'element_' + sanitized; } return sanitized; } function parseVoiceMap(voiceMapString) { let parsedVoiceMap = {}; for (const [charName, voiceId] of voiceMapString .split(',') .map(s => s.split(':'))) { if (charName && voiceId) { parsedVoiceMap[charName.trim()] = voiceId.trim(); } } return parsedVoiceMap; } /** * Apply voiceMap based on current voiceMapEntries */ function updateVoiceMap() { const tempVoiceMap = {}; for (const voice of voiceMapEntries) { if (voice.voiceId === null) { continue; } tempVoiceMap[voice.name] = voice.voiceId; } if (Object.keys(tempVoiceMap).length !== 0) { voiceMap = tempVoiceMap; console.log(`Voicemap updated to ${JSON.stringify(voiceMap)}`); } if (!extension_settings.tts[ttsProviderName].voiceMap) { extension_settings.tts[ttsProviderName].voiceMap = {}; } Object.assign(extension_settings.tts[ttsProviderName].voiceMap, voiceMap); saveSettingsDebounced(); } class VoiceMapEntry { name; voiceId; selectElement; constructor(name, voiceId = DEFAULT_VOICE_MARKER) { this.name = name; this.voiceId = voiceId; this.selectElement = null; } addUI(voiceIds) { let sanitizedName = sanitizeId(this.name); let defaultOption = this.name === DEFAULT_VOICE_MARKER ? `` : ``; let template = `
${this.name}
`; $('#tts_voicemap_block').append(template); // Populate voice ID select list for (const voiceId of voiceIds) { const option = document.createElement('option'); option.innerText = voiceId.name; option.value = voiceId.name; $(`#tts_voicemap_char_${sanitizedName}_voice`).append(option); } this.selectElement = $(`#tts_voicemap_char_${sanitizedName}_voice`); this.selectElement.on('change', args => this.onSelectChange(args)); this.selectElement.val(this.voiceId); } onSelectChange(args) { this.voiceId = this.selectElement.find(':selected').val(); updateVoiceMap(); } } /** * Init voiceMapEntries for character select list. * If an initialization is already in progress, it returns the existing Promise instead of starting a new one. * @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat. * @returns {Promise} A promise that resolves when the initialization is complete. */ export async function initVoiceMap(unrestricted = false) { // Preventing parallel execution if (currentInitVoiceMapPromise) { return currentInitVoiceMapPromise; } currentInitVoiceMapPromise = (async () => { const initialChatId = getCurrentChatId(); try { await initVoiceMapInternal(unrestricted); } finally { currentInitVoiceMapPromise = null; } const currentChatId = getCurrentChatId(); if (initialChatId !== currentChatId) { // Chat changed during initialization, reinitialize await initVoiceMap(unrestricted); } })(); return currentInitVoiceMapPromise; } /** * Init voiceMapEntries for character select list. * @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat. */ async function initVoiceMapInternal(unrestricted) { // Gate initialization if not enabled or TTS Provider not ready. Prevents error popups. const enabled = $('#tts_enabled').is(':checked'); if (!enabled) { return; } // Keep errors inside extension UI rather than toastr. Toastr errors for TTS are annoying. try { await ttsProvider.checkReady(); } catch (error) { const message = `TTS Provider not ready. ${error}`; setTtsStatus(message, false); return; } setTtsStatus('TTS Provider Loaded', true); // Clear existing voiceMap state $('#tts_voicemap_block').empty(); voiceMapEntries = []; // Get characters in current chat const characters = getCharacters(unrestricted); // Get saved voicemap from provider settings, handling new and old representations let voiceMapFromSettings = {}; if ('voiceMap' in extension_settings.tts[ttsProviderName]) { // Handle previous representation if (typeof extension_settings.tts[ttsProviderName].voiceMap === 'string') { voiceMapFromSettings = parseVoiceMap(extension_settings.tts[ttsProviderName].voiceMap); // Handle new representation } else if (typeof extension_settings.tts[ttsProviderName].voiceMap === 'object') { voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap; } } // Get voiceIds from provider let voiceIdsFromProvider; try { voiceIdsFromProvider = await ttsProvider.fetchTtsVoiceObjects(); } catch { toastr.error('TTS Provider failed to return voice ids.'); } // Build UI using VoiceMapEntry objects for (const character of characters) { if (character === 'SillyTavern System') { continue; } // Check provider settings for voiceIds let voiceId; if (character in voiceMapFromSettings) { voiceId = voiceMapFromSettings[character]; } else if (character === DEFAULT_VOICE_MARKER) { voiceId = DISABLED_VOICE_MARKER; } else { voiceId = DEFAULT_VOICE_MARKER; } const voiceMapEntry = new VoiceMapEntry(character, voiceId); voiceMapEntry.addUI(voiceIdsFromProvider); voiceMapEntries.push(voiceMapEntry); } updateVoiceMap(); } jQuery(async function () { async function addExtensionControls() { const settingsHtml = $(await renderExtensionTemplateAsync('tts', 'settings')); $('#tts_container').append(settingsHtml); $('#tts_refresh').on('click', onRefreshClick); $('#tts_enabled').on('click', onEnableClick); $('#tts_narrate_dialogues').on('click', onNarrateDialoguesClick); $('#tts_narrate_quoted').on('click', onNarrateQuotedClick); $('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick); $('#tts_skip_codeblocks').on('click', onSkipCodeblocksClick); $('#tts_skip_tags').on('click', onSkipTagsClick); $('#tts_pass_asterisks').on('click', onPassAsterisksClick); $('#tts_auto_generation').on('click', onAutoGenerationClick); $('#tts_periodic_auto_generation').on('click', onPeriodicAutoGenerationClick); $('#tts_narrate_user').on('click', onNarrateUserClick); $('#playback_rate').on('input', function () { const value = $(this).val(); const formattedValue = Number(value).toFixed(2); extension_settings.tts.playback_rate = value; $('#playback_rate_counter').val(formattedValue); saveSettingsDebounced(); }); $('#tts_voices').on('click', onTtsVoicesClick); for (const provider in ttsProviders) { $('#tts_provider').append($('