diff --git a/public/scripts/extensions/tts/kokoro.js b/public/scripts/extensions/tts/kokoro.js index 115e5db8f..052a5aa21 100644 --- a/public/scripts/extensions/tts/kokoro.js +++ b/public/scripts/extensions/tts/kokoro.js @@ -1,5 +1,5 @@ import { debounce_timeout } from '../../constants.js'; -import { debounceAsync } from '../../utils.js'; +import { debounceAsync, splitRecursive } from '../../utils.js'; import { getPreviewString, saveTtsProviderSettings } from './index.js'; export class KokoroTtsProvider { @@ -52,6 +52,17 @@ export class KokoroTtsProvider { this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed); } + /** + * Perform any text processing before passing to TTS engine. + * @param {string} text Input text + * @returns {string} Processed text + */ + processText(text) { + // TILDE! + text = text.replace(/~/g, '.'); + return text; + } + async loadSettings(settings) { if (settings.modelId !== undefined) this.settings.modelId = settings.modelId; if (settings.dtype !== undefined) this.settings.dtype = settings.dtype; @@ -258,13 +269,17 @@ export class KokoroTtsProvider { const voice = this.getVoice(voiceId); const previewText = getPreviewString(voice.lang); - const response = await this.generateTts(previewText, voiceId); - const audio = await response.blob(); - const url = URL.createObjectURL(audio); - const audioElement = new Audio(); - audioElement.src = url; - audioElement.play(); - audioElement.onended = () => URL.revokeObjectURL(url); + for await (const response of this.generateTts(previewText, voiceId)) { + const audio = await response.blob(); + const url = URL.createObjectURL(audio); + await new Promise(resolve => { + const audioElement = new Audio(); + audioElement.src = url; + audioElement.play(); + audioElement.onended = () => resolve(); + }); + URL.revokeObjectURL(url); + } } getVoiceDisplayName(voiceId) { @@ -282,7 +297,13 @@ export class KokoroTtsProvider { }; } - async generateTts(text, voiceId) { + /** + * Generate TTS audio for the given text using the specified voice. + * @param {string} text Text to generate + * @param {string} voiceId Voice ID + * @returns {AsyncGenerator} Audio response generator + */ + async* generateTts(text, voiceId) { if (!this.ready || !this.worker) { console.log('TTS not ready, initializing...'); await this.initializeWorker(); @@ -299,21 +320,26 @@ export class KokoroTtsProvider { const voice = this.getVoice(voiceId); const requestId = this.nextRequestId++; - return new Promise((resolve, reject) => { - // Store the promise callbacks - this.pendingRequests.set(requestId, { resolve, reject }); + const chunkSize = 400; + const chunks = splitRecursive(text, chunkSize, ['\n\n', '\n', '.', '?', '!', ',', ' ', '']); - // Send the request to the worker - this.worker.postMessage({ - action: 'generateTts', - data: { - text, - voice: voice.voice_id, - speakingRate: this.settings.speakingRate || 1.0, - requestId, - }, + for (const chunk of chunks) { + yield await new Promise((resolve, reject) => { + // Store the promise callbacks + this.pendingRequests.set(requestId, { resolve, reject }); + + // Send the request to the worker + this.worker.postMessage({ + action: 'generateTts', + data: { + text: chunk, + voice: voice.voice_id, + speakingRate: this.settings.speakingRate || 1.0, + requestId, + }, + }); }); - }); + } } dispose() { diff --git a/public/scripts/utils.js b/public/scripts/utils.js index f0ae9bf59..0b3c8776a 100644 --- a/public/scripts/utils.js +++ b/public/scripts/utils.js @@ -1015,6 +1015,10 @@ export function splitRecursive(input, length, delimiters = ['\n\n', '\n', ' ', ' return result; } +export function splitSentences(input, length) { + var pattRegex = new RegExp(`^[\\s\\S]{${Math.floor(length / 2)},${length}}[.!?,]{1}|^[\\s\\S]{1,${length}}$|^[\\s\\S]{1,${length}}`); +} + /** * Checks if a string is a valid data URL. * @param {string} str The string to check.