mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-03-13 02:20:14 +01:00
* feature: 'kokoro-js' supports TTS #3412 * Linting, add credits for kokoro library * Fix voice preview * Fix display languages on previews * Fix settings restoration. Debounce model init on settings change * Fix engine sorting * Move TTS processing to a web worker. Remove unused gain setting * Speaking rate fix * Update status when recreating a worker * Pass voices list from TTS engine * Call dispose function on provider change * Extend worker init timeout to 10 minutes --------- Co-authored-by: ryan <1014670860@qq.com> Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
327 lines
12 KiB
JavaScript
327 lines
12 KiB
JavaScript
import { debounce_timeout } from '../../constants.js';
|
|
import { debounceAsync } from '../../utils.js';
|
|
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
|
|
|
export class KokoroTtsProvider {
|
|
constructor() {
|
|
this.settings = {
|
|
modelId: 'onnx-community/Kokoro-82M-v1.0-ONNX',
|
|
dtype: 'q8',
|
|
device: 'wasm',
|
|
voiceMap: {},
|
|
defaultVoice: 'af_heart',
|
|
speakingRate: 1.0,
|
|
};
|
|
this.ready = false;
|
|
this.voices = [
|
|
'af_heart',
|
|
'af_alloy',
|
|
'af_aoede',
|
|
'af_bella',
|
|
'af_jessica',
|
|
'af_kore',
|
|
'af_nicole',
|
|
'af_nova',
|
|
'af_river',
|
|
'af_sarah',
|
|
'af_sky',
|
|
'am_adam',
|
|
'am_echo',
|
|
'am_eric',
|
|
'am_fenrir',
|
|
'am_liam',
|
|
'am_michael',
|
|
'am_onyx',
|
|
'am_puck',
|
|
'am_santa',
|
|
'bf_emma',
|
|
'bf_isabella',
|
|
'bm_george',
|
|
'bm_lewis',
|
|
'bf_alice',
|
|
'bf_lily',
|
|
'bm_daniel',
|
|
'bm_fable',
|
|
];
|
|
this.worker = null;
|
|
this.separator = ' ... ... ... ';
|
|
this.pendingRequests = new Map();
|
|
this.nextRequestId = 1;
|
|
|
|
// Update display values immediately but only reinitialize TTS after a delay
|
|
this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed);
|
|
}
|
|
|
|
async loadSettings(settings) {
|
|
if (settings.modelId !== undefined) this.settings.modelId = settings.modelId;
|
|
if (settings.dtype !== undefined) this.settings.dtype = settings.dtype;
|
|
if (settings.device !== undefined) this.settings.device = settings.device;
|
|
if (settings.voiceMap !== undefined) this.settings.voiceMap = settings.voiceMap;
|
|
if (settings.defaultVoice !== undefined) this.settings.defaultVoice = settings.defaultVoice;
|
|
if (settings.speakingRate !== undefined) this.settings.speakingRate = settings.speakingRate;
|
|
|
|
$('#kokoro_model_id').val(this.settings.modelId).on('input', this.onSettingsChange.bind(this));
|
|
$('#kokoro_dtype').val(this.settings.dtype).on('change', this.onSettingsChange.bind(this));
|
|
$('#kokoro_device').val(this.settings.device).on('change', this.onSettingsChange.bind(this));
|
|
$('#kokoro_speaking_rate').val(this.settings.speakingRate).on('input', this.onSettingsChange.bind(this));
|
|
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
|
}
|
|
|
|
initializeWorker() {
|
|
return new Promise((resolve, reject) => {
|
|
try {
|
|
// Terminate the existing worker if it exists
|
|
if (this.worker) {
|
|
this.worker.terminate();
|
|
$('#kokoro_status_text').text('Initializing...').removeAttr('style');
|
|
}
|
|
|
|
// Create a new worker
|
|
this.worker = new Worker(new URL('./kokoro-worker.js', import.meta.url), { type: 'module' });
|
|
|
|
// Set up message handling
|
|
this.worker.onmessage = this.handleWorkerMessage.bind(this);
|
|
|
|
// Initialize the worker with the current settings
|
|
this.worker.postMessage({
|
|
action: 'initialize',
|
|
data: {
|
|
modelId: this.settings.modelId,
|
|
dtype: this.settings.dtype,
|
|
device: this.settings.device,
|
|
},
|
|
});
|
|
|
|
// Create a promise that will resolve when initialization completes
|
|
const initPromise = new Promise((initResolve, initReject) => {
|
|
const timeoutId = setTimeout(() => {
|
|
initReject(new Error('Worker initialization timed out'));
|
|
}, 600000); // 600 second timeout
|
|
|
|
this.pendingRequests.set('initialization', {
|
|
resolve: (result) => {
|
|
clearTimeout(timeoutId);
|
|
initResolve(result);
|
|
},
|
|
reject: (error) => {
|
|
clearTimeout(timeoutId);
|
|
initReject(error);
|
|
},
|
|
});
|
|
});
|
|
|
|
// Resolve the outer promise when initialization completes
|
|
initPromise.then(success => {
|
|
this.ready = success;
|
|
this.updateStatusDisplay();
|
|
resolve(success);
|
|
}).catch(error => {
|
|
console.error('Worker initialization failed:', error);
|
|
this.ready = false;
|
|
this.updateStatusDisplay();
|
|
reject(error);
|
|
});
|
|
} catch (error) {
|
|
console.error('Failed to create worker:', error);
|
|
this.ready = false;
|
|
this.updateStatusDisplay();
|
|
reject(error);
|
|
}
|
|
});
|
|
}
|
|
|
|
handleWorkerMessage(event) {
|
|
const { action, success, ready, error, requestId, blobUrl } = event.data;
|
|
|
|
switch (action) {
|
|
case 'initialized': {
|
|
const initRequest = this.pendingRequests.get('initialization');
|
|
if (initRequest) {
|
|
if (success) {
|
|
initRequest.resolve(true);
|
|
} else {
|
|
initRequest.reject(new Error(error || 'Initialization failed'));
|
|
}
|
|
this.pendingRequests.delete('initialization');
|
|
}
|
|
} break;
|
|
case 'generatedTts': {
|
|
const request = this.pendingRequests.get(requestId);
|
|
if (request) {
|
|
if (success) {
|
|
fetch(blobUrl).then(response => response.blob()).then(audioBlob => {
|
|
// Clean up the blob URL
|
|
URL.revokeObjectURL(blobUrl);
|
|
|
|
request.resolve(new Response(audioBlob, {
|
|
headers: {
|
|
'Content-Type': 'audio/wav',
|
|
},
|
|
}));
|
|
}).catch(error => {
|
|
request.reject(new Error('Failed to fetch TTS audio blob: ' + error));
|
|
});
|
|
} else {
|
|
request.reject(new Error(error || 'TTS generation failed'));
|
|
}
|
|
this.pendingRequests.delete(requestId);
|
|
}
|
|
} break;
|
|
case 'readyStatus':
|
|
this.ready = ready;
|
|
this.updateStatusDisplay();
|
|
break;
|
|
}
|
|
}
|
|
|
|
updateStatusDisplay() {
|
|
const statusText = this.ready ? 'Ready' : 'Failed';
|
|
const statusColor = this.ready ? 'green' : 'red';
|
|
$('#kokoro_status_text').text(statusText).css('color', statusColor);
|
|
}
|
|
|
|
async checkReady() {
|
|
if (!this.worker) {
|
|
return await this.initializeWorker();
|
|
}
|
|
|
|
this.worker.postMessage({ action: 'checkReady' });
|
|
return this.ready;
|
|
}
|
|
|
|
async onRefreshClick() {
|
|
return await this.initializeWorker();
|
|
}
|
|
|
|
get settingsHtml() {
|
|
return `
|
|
<div class="kokoro_tts_settings">
|
|
<label for="kokoro_model_id">Model ID:</label>
|
|
<input id="kokoro_model_id" type="text" class="text_pole" value="${this.settings.modelId}" />
|
|
|
|
<label for="kokoro_dtype">Data Type:</label>
|
|
<select id="kokoro_dtype" class="text_pole">
|
|
<option value="q8" ${this.settings.dtype === 'q8' ? 'selected' : ''}>q8 (Recommended)</option>
|
|
<option value="fp32" ${this.settings.dtype === 'fp32' ? 'selected' : ''}>fp32 (High Precision)</option>
|
|
<option value="fp16" ${this.settings.dtype === 'fp16' ? 'selected' : ''}>fp16</option>
|
|
<option value="q4" ${this.settings.dtype === 'q4' ? 'selected' : ''}>q4 (Low Memory)</option>
|
|
<option value="q4f16" ${this.settings.dtype === 'q4f16' ? 'selected' : ''}>q4f16</option>
|
|
</select>
|
|
|
|
<label for="kokoro_device">Device:</label>
|
|
<select id="kokoro_device" class="text_pole">
|
|
<option value="wasm" ${this.settings.device === 'wasm' ? 'selected' : ''}>WebAssembly (CPU)</option>
|
|
<option value="webgpu" ${this.settings.device === 'webgpu' ? 'selected' : ''}>WebGPU (GPU Acceleration)</option>
|
|
</select>
|
|
|
|
<label for="kokoro_speaking_rate">Speaking Rate: <span id="kokoro_speaking_rate_output">${this.settings.speakingRate}x</span></label>
|
|
<input id="kokoro_speaking_rate" type="range" value="${this.settings.speakingRate}" min="0.5" max="2.0" step="0.1" />
|
|
|
|
<hr>
|
|
<div>
|
|
Status: <span id="kokoro_status_text">Initializing...</span>
|
|
</div>
|
|
</div>
|
|
`;
|
|
}
|
|
|
|
async onSettingsChange() {
|
|
this.settings.modelId = $('#kokoro_model_id').val().toString();
|
|
this.settings.dtype = $('#kokoro_dtype').val().toString();
|
|
this.settings.device = $('#kokoro_device').val().toString();
|
|
this.settings.speakingRate = parseFloat($('#kokoro_speaking_rate').val().toString());
|
|
|
|
// Update UI display
|
|
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
|
|
|
// Reinitialize TTS engine with debounce
|
|
this.initTtsDebounced();
|
|
saveTtsProviderSettings();
|
|
}
|
|
|
|
async fetchTtsVoiceObjects() {
|
|
if (!this.ready) {
|
|
await this.checkReady();
|
|
}
|
|
return this.voices.map(voice => ({
|
|
name: voice,
|
|
voice_id: voice,
|
|
preview_url: null,
|
|
lang: voice.startsWith('b') ? 'en-GB' : 'en-US',
|
|
}));
|
|
}
|
|
|
|
async previewTtsVoice(voiceId) {
|
|
if (!this.ready) {
|
|
await this.checkReady();
|
|
}
|
|
|
|
const voice = this.getVoice(voiceId);
|
|
const previewText = getPreviewString(voice.lang);
|
|
const response = await this.generateTts(previewText, voiceId);
|
|
const audio = await response.blob();
|
|
const url = URL.createObjectURL(audio);
|
|
const audioElement = new Audio();
|
|
audioElement.src = url;
|
|
audioElement.play();
|
|
audioElement.onended = () => URL.revokeObjectURL(url);
|
|
}
|
|
|
|
getVoiceDisplayName(voiceId) {
|
|
return voiceId;
|
|
}
|
|
|
|
getVoice(voiceName) {
|
|
const defaultVoice = this.settings.defaultVoice || 'af_heart';
|
|
const actualVoiceName = this.voices.includes(voiceName) ? voiceName : defaultVoice;
|
|
return {
|
|
name: actualVoiceName,
|
|
voice_id: actualVoiceName,
|
|
preview_url: null,
|
|
lang: actualVoiceName.startsWith('b') ? 'en-GB' : 'en-US',
|
|
};
|
|
}
|
|
|
|
async generateTts(text, voiceId) {
|
|
if (!this.ready || !this.worker) {
|
|
console.log('TTS not ready, initializing...');
|
|
await this.initializeWorker();
|
|
}
|
|
|
|
if (!this.ready || !this.worker) {
|
|
throw new Error('Failed to initialize TTS engine');
|
|
}
|
|
|
|
if (text.trim().length === 0) {
|
|
throw new Error('Empty text');
|
|
}
|
|
|
|
const voice = this.getVoice(voiceId);
|
|
const requestId = this.nextRequestId++;
|
|
|
|
return new Promise((resolve, reject) => {
|
|
// Store the promise callbacks
|
|
this.pendingRequests.set(requestId, { resolve, reject });
|
|
|
|
// Send the request to the worker
|
|
this.worker.postMessage({
|
|
action: 'generateTts',
|
|
data: {
|
|
text,
|
|
voice: voice.voice_id,
|
|
speakingRate: this.settings.speakingRate || 1.0,
|
|
requestId,
|
|
},
|
|
});
|
|
});
|
|
}
|
|
|
|
dispose() {
|
|
// Clean up the worker when the provider is disposed
|
|
if (this.worker) {
|
|
this.worker.terminate();
|
|
this.worker = null;
|
|
}
|
|
}
|
|
}
|