Merge branch 'staging' into X-T-E-R/release

This commit is contained in:
Cohee
2024-05-18 19:52:33 +03:00
402 changed files with 55418 additions and 23307 deletions

View File

@@ -1,4 +1,4 @@
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js';
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced, substituteParams } from '../../../script.js';
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js';
import { delay, escapeRegex, getBase64Async, getStringHash, onlyUnique } from '../../utils.js';
import { EdgeTtsProvider } from './edge.js';
@@ -8,20 +8,23 @@ import { CoquiTtsProvider } from './coqui.js';
import { SystemTtsProvider } from './system.js';
import { NovelTtsProvider } from './novel.js';
import { power_user } from '../../power-user.js';
import { registerSlashCommand } from '../../slash-commands.js';
import { OpenAITtsProvider } from './openai.js';
import { XTTSTtsProvider } from './xtts.js';
import { GSVITtsProvider } from './gsvi.js';
import { AllTalkTtsProvider } from './alltalk.js';
import { SpeechT5TtsProvider } from './speecht5.js';
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
export { talkingAnimation };
const UPDATE_INTERVAL = 1000;
let voiceMapEntries = [];
let voiceMap = {}; // {charName:voiceid, charName2:voiceid2}
let storedvalue = false;
let talkingHeadState = false;
let lastChatId = null;
let lastMessage = null;
let lastMessageHash = null;
const DEFAULT_VOICE_MARKER = '[Default Voice]';
@@ -68,7 +71,7 @@ export function getPreviewString(lang) {
return previewStrings[lang] ?? fallbackPreview;
}
let ttsProviders = {
const ttsProviders = {
ElevenLabs: ElevenLabsTtsProvider,
Silero: SileroTtsProvider,
XTTSv2: XTTSTtsProvider,
@@ -84,7 +87,6 @@ let ttsProviders = {
let ttsProvider;
let ttsProviderName;
let ttsLastMessage = null;
async function onNarrateOneMessage() {
audioElement.src = '/sounds/silence.mp3';
@@ -132,103 +134,13 @@ async function onNarrateText(args, text) {
}
async function moduleWorker() {
// Primarily determining when to add new chat to the TTS queue
const enabled = $('#tts_enabled').is(':checked');
$('body').toggleClass('tts', enabled);
if (!enabled) {
if (!extension_settings.tts.enabled) {
return;
}
const context = getContext();
const chat = context.chat;
processTtsQueue();
processAudioJobQueue();
updateUiAudioPlayState();
// Auto generation is disabled
if (extension_settings.tts.auto_generation == false) {
return;
}
// no characters or group selected
if (!context.groupId && context.characterId === undefined) {
return;
}
// Chat changed
if (
context.chatId !== lastChatId
) {
currentMessageNumber = context.chat.length ? context.chat.length : 0;
saveLastValues();
// Force to speak on the first message in the new chat
if (context.chat.length === 1) {
lastMessageHash = -1;
}
return;
}
// take the count of messages
let lastMessageNumber = context.chat.length ? context.chat.length : 0;
// There's no new messages
let diff = lastMessageNumber - currentMessageNumber;
let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '');
// if messages got deleted, diff will be < 0
if (diff < 0) {
// necessary actions will be taken by the onChatDeleted() handler
return;
}
// if no new messages, or same message, or same message hash, do nothing
if (diff == 0 && hashNew === lastMessageHash) {
return;
}
// If streaming, wait for streaming to finish before processing new messages
if (context.streamingProcessor && !context.streamingProcessor.isFinished) {
return;
}
// clone message object, as things go haywire if message object is altered below (it's passed by reference)
const message = structuredClone(chat[chat.length - 1]);
// if last message within current message, message got extended. only send diff to TTS.
if (ttsLastMessage !== null && message.mes.indexOf(ttsLastMessage) !== -1) {
let tmp = message.mes;
message.mes = message.mes.replace(ttsLastMessage, '');
ttsLastMessage = tmp;
} else {
ttsLastMessage = message.mes;
}
// We're currently swiping. Don't generate voice
if (!message || message.mes === '...' || message.mes === '') {
return;
}
// Don't generate if message doesn't have a display text
if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
return;
}
// Don't generate if message is a user message and user message narration is disabled
if (message.is_user && !extension_settings.tts.narrate_user) {
return;
}
// New messages, add new chat to history
lastMessageHash = hashNew;
currentMessageNumber = lastMessageNumber;
console.debug(
`Adding message from ${message.name} for TTS processing: "${message.mes}"`,
);
ttsJobQueue.push(message);
}
function talkingAnimation(switchValue) {
@@ -240,11 +152,11 @@ function talkingAnimation(switchValue) {
const apiUrl = getApiUrl();
const animationType = switchValue ? 'start' : 'stop';
if (switchValue !== storedvalue) {
if (switchValue !== talkingHeadState) {
try {
console.log(animationType + ' Talking Animation');
doExtrasFetch(`${apiUrl}/api/talkinghead/${animationType}_talking`);
storedvalue = switchValue; // Update the storedvalue to the current switchValue
talkingHeadState = switchValue;
} catch (error) {
// Handle the error here or simply ignore it to prevent logging
}
@@ -291,7 +203,6 @@ function debugTtsPlayback() {
{
'ttsProviderName': ttsProviderName,
'voiceMap': voiceMap,
'currentMessageNumber': currentMessageNumber,
'audioPaused': audioPaused,
'audioJobQueue': audioJobQueue,
'currentAudioJob': currentAudioJob,
@@ -352,6 +263,7 @@ async function playAudioData(audioJob) {
audioElement.addEventListener('ended', completeCurrentAudioJob);
audioElement.addEventListener('canplay', () => {
console.debug('Starting TTS playback');
audioElement.playbackRate = extension_settings.tts.playback_rate;
audioElement.play();
});
}
@@ -467,6 +379,7 @@ async function processAudioJobQueue() {
playAudioData(currentAudioJob);
talkingAnimation(true);
} catch (error) {
toastr.error(error.toString());
console.error(error);
audioQueueProcessorReady = true;
}
@@ -478,21 +391,12 @@ async function processAudioJobQueue() {
let ttsJobQueue = [];
let currentTtsJob; // Null if nothing is currently being processed
let currentMessageNumber = 0;
function completeTtsJob() {
console.info(`Current TTS job for ${currentTtsJob?.name} completed.`);
currentTtsJob = null;
}
function saveLastValues() {
const context = getContext();
lastChatId = context.chatId;
lastMessageHash = getStringHash(
(context.chat.length && context.chat[context.chat.length - 1].mes) ?? '',
);
}
async function tts(text, voiceId, char) {
async function processResponse(response) {
// RVC injection
@@ -526,11 +430,18 @@ async function processTtsQueue() {
currentTtsJob = ttsJobQueue.shift();
let text = extension_settings.tts.narrate_translated_only ? (currentTtsJob?.extra?.display_text || currentTtsJob.mes) : currentTtsJob.mes;
// Substitute macros
text = substituteParams(text);
if (extension_settings.tts.skip_codeblocks) {
text = text.replace(/^\s{4}.*$/gm, '').trim();
text = text.replace(/```.*?```/gs, '').trim();
}
if (extension_settings.tts.skip_tags) {
text = text.replace(/<.*?>.*?<\/.*?>/g, '').trim();
}
if (!extension_settings.tts.pass_asterisks) {
text = extension_settings.tts.narrate_dialogues_only
? text.replace(/\*[^*]*?(\*|$)/g, '').trim() // remove asterisks content
@@ -579,8 +490,9 @@ async function processTtsQueue() {
toastr.error(`Specified voice for ${char} was not found. Check the TTS extension settings.`);
throw `Unable to attain voiceId for ${char}`;
}
tts(text, voiceId, char);
await tts(text, voiceId, char);
} catch (error) {
toastr.error(error.toString());
console.error(error);
currentTtsJob = null;
}
@@ -618,6 +530,12 @@ function loadSettings() {
$('#tts_narrate_translated_only').prop('checked', extension_settings.tts.narrate_translated_only);
$('#tts_narrate_user').prop('checked', extension_settings.tts.narrate_user);
$('#tts_pass_asterisks').prop('checked', extension_settings.tts.pass_asterisks);
$('#tts_skip_codeblocks').prop('checked', extension_settings.tts.skip_codeblocks);
$('#tts_skip_tags').prop('checked', extension_settings.tts.skip_tags);
$('#playback_rate').val(extension_settings.tts.playback_rate);
$('#playback_rate_counter').val(Number(extension_settings.tts.playback_rate).toFixed(2));
$('#playback_rate_block').toggle(extension_settings.tts.currentProvider !== 'System');
$('body').toggleClass('tts', extension_settings.tts.enabled);
}
@@ -627,6 +545,7 @@ const defaultSettings = {
currentProvider: 'ElevenLabs',
auto_generation: true,
narrate_user: false,
playback_rate: 1,
};
function setTtsStatus(status, success) {
@@ -650,6 +569,7 @@ function onRefreshClick() {
initVoiceMap();
updateVoiceMap();
}).catch(error => {
toastr.error(error.toString());
console.error(error);
setTtsStatus(error, false);
});
@@ -696,6 +616,11 @@ function onSkipCodeblocksClick() {
saveSettingsDebounced();
}
function onSkipTagsClick() {
extension_settings.tts.skip_tags = !!$('#tts_skip_tags').prop('checked');
saveSettingsDebounced();
}
function onPassAsterisksClick() {
extension_settings.tts.pass_asterisks = !!$('#tts_pass_asterisks').prop('checked');
saveSettingsDebounced();
@@ -732,6 +657,7 @@ async function loadTtsProvider(provider) {
function onTtsProviderChange() {
const ttsProviderSelection = $('#tts_provider').val();
extension_settings.tts.currentProvider = ttsProviderSelection;
$('#playback_rate_block').toggle(extension_settings.tts.currentProvider !== 'System');
loadTtsProvider(ttsProviderSelection);
}
@@ -752,26 +678,103 @@ async function onChatChanged() {
await resetTtsPlayback();
const voiceMapInit = initVoiceMap();
await Promise.race([voiceMapInit, delay(1000)]);
ttsLastMessage = null;
lastMessage = null;
}
async function onChatDeleted() {
async function onMessageEvent(messageId) {
// If TTS is disabled, do nothing
if (!extension_settings.tts.enabled) {
return;
}
// Auto generation is disabled
if (!extension_settings.tts.auto_generation) {
return;
}
const context = getContext();
// no characters or group selected
if (!context.groupId && context.characterId === undefined) {
return;
}
// Chat changed
if (context.chatId !== lastChatId) {
lastChatId = context.chatId;
lastMessageHash = getStringHash(context.chat[messageId]?.mes ?? '');
// Force to speak on the first message in the new chat
if (context.chat.length === 1) {
lastMessageHash = -1;
}
}
// clone message object, as things go haywire if message object is altered below (it's passed by reference)
const message = structuredClone(context.chat[messageId]);
const hashNew = getStringHash(message?.mes ?? '');
// if no new messages, or same message, or same message hash, do nothing
if (hashNew === lastMessageHash) {
return;
}
const isLastMessageInCurrent = () =>
lastMessage &&
typeof lastMessage === 'object' &&
message.swipe_id === lastMessage.swipe_id &&
message.name === lastMessage.name &&
message.is_user === lastMessage.is_user &&
message.mes.indexOf(lastMessage.mes) !== -1;
// if last message within current message, message got extended. only send diff to TTS.
if (isLastMessageInCurrent()) {
const tmp = structuredClone(message);
message.mes = message.mes.replace(lastMessage.mes, '');
lastMessage = tmp;
} else {
lastMessage = structuredClone(message);
}
// We're currently swiping. Don't generate voice
if (!message || message.mes === '...' || message.mes === '') {
return;
}
// Don't generate if message doesn't have a display text
if (extension_settings.tts.narrate_translated_only && !(message?.extra?.display_text)) {
return;
}
// Don't generate if message is a user message and user message narration is disabled
if (message.is_user && !extension_settings.tts.narrate_user) {
return;
}
// New messages, add new chat to history
lastMessageHash = hashNew;
lastChatId = context.chatId;
console.debug(`Adding message from ${message.name} for TTS processing: "${message.mes}"`);
ttsJobQueue.push(message);
}
async function onMessageDeleted() {
const context = getContext();
// update internal references to new last message
lastChatId = context.chatId;
currentMessageNumber = context.chat.length ? context.chat.length : 0;
// compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue
let messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
const messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '');
if (messageHash === lastMessageHash) {
return;
}
lastMessageHash = messageHash;
ttsLastMessage = (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '';
lastMessage = context.chat.length ? structuredClone(context.chat[context.chat.length - 1]) : null;
// stop any tts playback since message might not exist anymore
await resetTtsPlayback();
resetTtsPlayback();
}
/**
@@ -1019,11 +1022,29 @@ $(document).ready(function () {
<input type="checkbox" id="tts_skip_codeblocks">
<small>Skip codeblocks</small>
</label>
<label class="checkbox_label" for="tts_skip_tags">
<input type="checkbox" id="tts_skip_tags">
<small>Skip &lt;tagged&gt; blocks</small>
</label>
<label class="checkbox_label" for="tts_pass_asterisks">
<input type="checkbox" id="tts_pass_asterisks">
<small>Pass Asterisks to TTS Engine</small>
</label>
</div>
<div id="playback_rate_block" class="range-block">
<hr>
<div class="range-block-title justifyLeft" data-i18n="Audio Playback Speed">
<small>Audio Playback Speed</small>
</div>
<div class="range-block-range-and-counter">
<div class="range-block-range">
<input type="range" id="playback_rate" name="volume" min="0" max="3" step="0.05">
</div>
<div class="range-block-counter">
<input type="number" min="0" max="3" step="0.05" data-for="playback_rate" id="playback_rate_counter">
</div>
</div>
</div>
<div id="tts_voicemap_block">
</div>
<hr>
@@ -1044,9 +1065,19 @@ $(document).ready(function () {
$('#tts_narrate_quoted').on('click', onNarrateQuotedClick);
$('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick);
$('#tts_skip_codeblocks').on('click', onSkipCodeblocksClick);
$('#tts_skip_tags').on('click', onSkipTagsClick);
$('#tts_pass_asterisks').on('click', onPassAsterisksClick);
$('#tts_auto_generation').on('click', onAutoGenerationClick);
$('#tts_narrate_user').on('click', onNarrateUserClick);
$('#playback_rate').on('input', function () {
const value = $(this).val();
const formattedValue = Number(value).toFixed(2);
extension_settings.tts.playback_rate = value;
$('#playback_rate_counter').val(formattedValue);
saveSettingsDebounced();
});
$('#tts_voices').on('click', onTtsVoicesClick);
for (const provider in ttsProviders) {
$('#tts_provider').append($('<option />').val(provider).text(provider));
@@ -1062,8 +1093,40 @@ $(document).ready(function () {
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
eventSource.on(event_types.CHAT_CHANGED, onChatChanged);
eventSource.on(event_types.MESSAGE_DELETED, onChatDeleted);
eventSource.on(event_types.MESSAGE_DELETED, onMessageDeleted);
eventSource.on(event_types.GROUP_UPDATED, onChatChanged);
registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], '<span class="monospace">(text)</span> narrate any text using currently selected character\'s voice. Use voice="Character Name" argument to set other voice from the voice map, example: <tt>/speak voice="Donald Duck" Quack!</tt>', true, true);
eventSource.makeLast(event_types.CHARACTER_MESSAGE_RENDERED, onMessageEvent);
eventSource.makeLast(event_types.USER_MESSAGE_RENDERED, onMessageEvent);
SlashCommandParser.addCommandObject(SlashCommand.fromProps({ name: 'speak',
callback: onNarrateText,
aliases: ['narrate', 'tts'],
namedArgumentList: [
new SlashCommandNamedArgument(
'voice', 'character voice name', [ARGUMENT_TYPE.STRING], false,
),
],
unnamedArgumentList: [
new SlashCommandArgument(
'text', [ARGUMENT_TYPE.STRING], true,
),
],
helpString: `
<div>
Narrate any text using currently selected character's voice.
</div>
<div>
Use <code>voice="Character Name"</code> argument to set other voice from the voice map.
</div>
<div>
<strong>Example:</strong>
<ul>
<li>
<pre><code>/speak voice="Donald Duck" Quack!</code></pre>
</li>
</ul>
</div>
`,
}));
document.body.appendChild(audioElement);
});