From ffc4f220124b1fc68d1007cc348a3eeea93123e7 Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Mon, 27 Nov 2023 13:25:49 +0200 Subject: [PATCH] Add provider-specific TTS processing: XTTS - replace ellipsis, Novel - remove tildes. --- public/scripts/extensions/tts/index.js | 4 ++++ public/scripts/extensions/tts/novel.js | 11 +++++++++++ public/scripts/extensions/tts/readme.md | 17 +++++++++++------ public/scripts/extensions/tts/xtts.js | 13 +++++++++++++ 4 files changed, 39 insertions(+), 6 deletions(-) diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 3b85868b2..110dc0838 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -503,6 +503,10 @@ async function processTtsQueue() { text = matches ? matches.join(partJoiner) : text; } + if (typeof ttsProvider?.processText === 'function') { + text = await ttsProvider.processText(text); + } + // Collapse newlines and spaces into single space text = text.replace(/\s+/g, ' '); diff --git a/public/scripts/extensions/tts/novel.js b/public/scripts/extensions/tts/novel.js index 02ccd9ad5..27eb46be4 100644 --- a/public/scripts/extensions/tts/novel.js +++ b/public/scripts/extensions/tts/novel.js @@ -19,6 +19,17 @@ class NovelTtsProvider { customVoices: [] } + /** + * Perform any text processing before passing to TTS engine. + * @param {string} text Input text + * @returns {string} Processed text + */ + processText(text) { + // Novel reads tilde as a word. Replace with full stop + text = text.replace(/~/g, '.'); + return text; + } + get settingsHtml() { let html = `
diff --git a/public/scripts/extensions/tts/readme.md b/public/scripts/extensions/tts/readme.md index fb48e116b..26ad50f77 100644 --- a/public/scripts/extensions/tts/readme.md +++ b/public/scripts/extensions/tts/readme.md @@ -1,8 +1,8 @@ -# Provider Requirements. +# Provider Requirements. Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate. ### class YourTtsProvider -#### Required +#### Required Exported for use in extension index.js, and added to providers list in index.js 1. generateTts(text, voiceId) 2. fetchTtsVoiceObjects() @@ -13,8 +13,9 @@ Exported for use in extension index.js, and added to providers list in index.js 7. settingsHtml field #### Optional -1. previewTtsVoice() +1. previewTtsVoice() 2. separator field +3. processText(text) # Requirement Descriptions ### generateTts(text, voiceId) @@ -49,14 +50,14 @@ Return without error to let TTS extension know that the provider is ready. Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly. ### loadSettings(settingsObject) -Required. +Required. Handle the input settings from the TTS extension on provider load. Put code in here to load your provider settings. ### settings field Required, used for storing any provider state that needs to be saved. Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`. -TTS extension doesn't expect any specific contents. +TTS extension doesn't expect any specific contents. ### settingsHtml field Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly. @@ -68,4 +69,8 @@ Function to handle playing previews of voice samples if no direct preview_url is ### separator field Optional. Used when narrate quoted text is enabled. -Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...` \ No newline at end of file +Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...` + +### processText(text) +Optional. +A function applied to the input text before passing it to the TTS generator. Can be async. diff --git a/public/scripts/extensions/tts/xtts.js b/public/scripts/extensions/tts/xtts.js index 624e926cc..3add9ed7e 100644 --- a/public/scripts/extensions/tts/xtts.js +++ b/public/scripts/extensions/tts/xtts.js @@ -13,6 +13,19 @@ class XTTSTtsProvider { voices = [] separator = '. ' + /** + * Perform any text processing before passing to TTS engine. + * @param {string} text Input text + * @returns {string} Processed text + */ + processText(text) { + // Replace fancy ellipsis with "..." + text = text.replace(/…/g, '...') + // Replace multiple "." with single "." + text = text.replace(/\.+/g, '.') + return text + } + languageLabels = { "Arabic": "ar", "Brazilian Portuguese": "pt",