mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
* feature: 'kokoro-js' supports TTS #3412 * Linting, add credits for kokoro library * Fix voice preview * Fix display languages on previews * Fix settings restoration. Debounce model init on settings change * Fix engine sorting * Move TTS processing to a web worker. Remove unused gain setting * Speaking rate fix * Update status when recreating a worker * Pass voices list from TTS engine * Call dispose function on provider change * Extend worker init timeout to 10 minutes --------- Co-authored-by: ryan <1014670860@qq.com> Co-authored-by: Cohee <18619528+Cohee1207@users.noreply.github.com>
This commit is contained in:
326
public/scripts/extensions/tts/kokoro.js
Normal file
326
public/scripts/extensions/tts/kokoro.js
Normal file
@@ -0,0 +1,326 @@
|
||||
import { debounce_timeout } from '../../constants.js';
|
||||
import { debounceAsync } from '../../utils.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
|
||||
export class KokoroTtsProvider {
|
||||
constructor() {
|
||||
this.settings = {
|
||||
modelId: 'onnx-community/Kokoro-82M-v1.0-ONNX',
|
||||
dtype: 'q8',
|
||||
device: 'wasm',
|
||||
voiceMap: {},
|
||||
defaultVoice: 'af_heart',
|
||||
speakingRate: 1.0,
|
||||
};
|
||||
this.ready = false;
|
||||
this.voices = [
|
||||
'af_heart',
|
||||
'af_alloy',
|
||||
'af_aoede',
|
||||
'af_bella',
|
||||
'af_jessica',
|
||||
'af_kore',
|
||||
'af_nicole',
|
||||
'af_nova',
|
||||
'af_river',
|
||||
'af_sarah',
|
||||
'af_sky',
|
||||
'am_adam',
|
||||
'am_echo',
|
||||
'am_eric',
|
||||
'am_fenrir',
|
||||
'am_liam',
|
||||
'am_michael',
|
||||
'am_onyx',
|
||||
'am_puck',
|
||||
'am_santa',
|
||||
'bf_emma',
|
||||
'bf_isabella',
|
||||
'bm_george',
|
||||
'bm_lewis',
|
||||
'bf_alice',
|
||||
'bf_lily',
|
||||
'bm_daniel',
|
||||
'bm_fable',
|
||||
];
|
||||
this.worker = null;
|
||||
this.separator = ' ... ... ... ';
|
||||
this.pendingRequests = new Map();
|
||||
this.nextRequestId = 1;
|
||||
|
||||
// Update display values immediately but only reinitialize TTS after a delay
|
||||
this.initTtsDebounced = debounceAsync(this.initializeWorker.bind(this), debounce_timeout.relaxed);
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
if (settings.modelId !== undefined) this.settings.modelId = settings.modelId;
|
||||
if (settings.dtype !== undefined) this.settings.dtype = settings.dtype;
|
||||
if (settings.device !== undefined) this.settings.device = settings.device;
|
||||
if (settings.voiceMap !== undefined) this.settings.voiceMap = settings.voiceMap;
|
||||
if (settings.defaultVoice !== undefined) this.settings.defaultVoice = settings.defaultVoice;
|
||||
if (settings.speakingRate !== undefined) this.settings.speakingRate = settings.speakingRate;
|
||||
|
||||
$('#kokoro_model_id').val(this.settings.modelId).on('input', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_dtype').val(this.settings.dtype).on('change', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_device').val(this.settings.device).on('change', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_speaking_rate').val(this.settings.speakingRate).on('input', this.onSettingsChange.bind(this));
|
||||
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
||||
}
|
||||
|
||||
initializeWorker() {
|
||||
return new Promise((resolve, reject) => {
|
||||
try {
|
||||
// Terminate the existing worker if it exists
|
||||
if (this.worker) {
|
||||
this.worker.terminate();
|
||||
$('#kokoro_status_text').text('Initializing...').removeAttr('style');
|
||||
}
|
||||
|
||||
// Create a new worker
|
||||
this.worker = new Worker(new URL('./kokoro-worker.js', import.meta.url), { type: 'module' });
|
||||
|
||||
// Set up message handling
|
||||
this.worker.onmessage = this.handleWorkerMessage.bind(this);
|
||||
|
||||
// Initialize the worker with the current settings
|
||||
this.worker.postMessage({
|
||||
action: 'initialize',
|
||||
data: {
|
||||
modelId: this.settings.modelId,
|
||||
dtype: this.settings.dtype,
|
||||
device: this.settings.device,
|
||||
},
|
||||
});
|
||||
|
||||
// Create a promise that will resolve when initialization completes
|
||||
const initPromise = new Promise((initResolve, initReject) => {
|
||||
const timeoutId = setTimeout(() => {
|
||||
initReject(new Error('Worker initialization timed out'));
|
||||
}, 600000); // 600 second timeout
|
||||
|
||||
this.pendingRequests.set('initialization', {
|
||||
resolve: (result) => {
|
||||
clearTimeout(timeoutId);
|
||||
initResolve(result);
|
||||
},
|
||||
reject: (error) => {
|
||||
clearTimeout(timeoutId);
|
||||
initReject(error);
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
// Resolve the outer promise when initialization completes
|
||||
initPromise.then(success => {
|
||||
this.ready = success;
|
||||
this.updateStatusDisplay();
|
||||
resolve(success);
|
||||
}).catch(error => {
|
||||
console.error('Worker initialization failed:', error);
|
||||
this.ready = false;
|
||||
this.updateStatusDisplay();
|
||||
reject(error);
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Failed to create worker:', error);
|
||||
this.ready = false;
|
||||
this.updateStatusDisplay();
|
||||
reject(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
handleWorkerMessage(event) {
|
||||
const { action, success, ready, error, requestId, blobUrl } = event.data;
|
||||
|
||||
switch (action) {
|
||||
case 'initialized': {
|
||||
const initRequest = this.pendingRequests.get('initialization');
|
||||
if (initRequest) {
|
||||
if (success) {
|
||||
initRequest.resolve(true);
|
||||
} else {
|
||||
initRequest.reject(new Error(error || 'Initialization failed'));
|
||||
}
|
||||
this.pendingRequests.delete('initialization');
|
||||
}
|
||||
} break;
|
||||
case 'generatedTts': {
|
||||
const request = this.pendingRequests.get(requestId);
|
||||
if (request) {
|
||||
if (success) {
|
||||
fetch(blobUrl).then(response => response.blob()).then(audioBlob => {
|
||||
// Clean up the blob URL
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
|
||||
request.resolve(new Response(audioBlob, {
|
||||
headers: {
|
||||
'Content-Type': 'audio/wav',
|
||||
},
|
||||
}));
|
||||
}).catch(error => {
|
||||
request.reject(new Error('Failed to fetch TTS audio blob: ' + error));
|
||||
});
|
||||
} else {
|
||||
request.reject(new Error(error || 'TTS generation failed'));
|
||||
}
|
||||
this.pendingRequests.delete(requestId);
|
||||
}
|
||||
} break;
|
||||
case 'readyStatus':
|
||||
this.ready = ready;
|
||||
this.updateStatusDisplay();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
updateStatusDisplay() {
|
||||
const statusText = this.ready ? 'Ready' : 'Failed';
|
||||
const statusColor = this.ready ? 'green' : 'red';
|
||||
$('#kokoro_status_text').text(statusText).css('color', statusColor);
|
||||
}
|
||||
|
||||
async checkReady() {
|
||||
if (!this.worker) {
|
||||
return await this.initializeWorker();
|
||||
}
|
||||
|
||||
this.worker.postMessage({ action: 'checkReady' });
|
||||
return this.ready;
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
return await this.initializeWorker();
|
||||
}
|
||||
|
||||
get settingsHtml() {
|
||||
return `
|
||||
<div class="kokoro_tts_settings">
|
||||
<label for="kokoro_model_id">Model ID:</label>
|
||||
<input id="kokoro_model_id" type="text" class="text_pole" value="${this.settings.modelId}" />
|
||||
|
||||
<label for="kokoro_dtype">Data Type:</label>
|
||||
<select id="kokoro_dtype" class="text_pole">
|
||||
<option value="q8" ${this.settings.dtype === 'q8' ? 'selected' : ''}>q8 (Recommended)</option>
|
||||
<option value="fp32" ${this.settings.dtype === 'fp32' ? 'selected' : ''}>fp32 (High Precision)</option>
|
||||
<option value="fp16" ${this.settings.dtype === 'fp16' ? 'selected' : ''}>fp16</option>
|
||||
<option value="q4" ${this.settings.dtype === 'q4' ? 'selected' : ''}>q4 (Low Memory)</option>
|
||||
<option value="q4f16" ${this.settings.dtype === 'q4f16' ? 'selected' : ''}>q4f16</option>
|
||||
</select>
|
||||
|
||||
<label for="kokoro_device">Device:</label>
|
||||
<select id="kokoro_device" class="text_pole">
|
||||
<option value="wasm" ${this.settings.device === 'wasm' ? 'selected' : ''}>WebAssembly (CPU)</option>
|
||||
<option value="webgpu" ${this.settings.device === 'webgpu' ? 'selected' : ''}>WebGPU (GPU Acceleration)</option>
|
||||
</select>
|
||||
|
||||
<label for="kokoro_speaking_rate">Speaking Rate: <span id="kokoro_speaking_rate_output">${this.settings.speakingRate}x</span></label>
|
||||
<input id="kokoro_speaking_rate" type="range" value="${this.settings.speakingRate}" min="0.5" max="2.0" step="0.1" />
|
||||
|
||||
<hr>
|
||||
<div>
|
||||
Status: <span id="kokoro_status_text">Initializing...</span>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
async onSettingsChange() {
|
||||
this.settings.modelId = $('#kokoro_model_id').val().toString();
|
||||
this.settings.dtype = $('#kokoro_dtype').val().toString();
|
||||
this.settings.device = $('#kokoro_device').val().toString();
|
||||
this.settings.speakingRate = parseFloat($('#kokoro_speaking_rate').val().toString());
|
||||
|
||||
// Update UI display
|
||||
$('#kokoro_speaking_rate_output').text(this.settings.speakingRate + 'x');
|
||||
|
||||
// Reinitialize TTS engine with debounce
|
||||
this.initTtsDebounced();
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (!this.ready) {
|
||||
await this.checkReady();
|
||||
}
|
||||
return this.voices.map(voice => ({
|
||||
name: voice,
|
||||
voice_id: voice,
|
||||
preview_url: null,
|
||||
lang: voice.startsWith('b') ? 'en-GB' : 'en-US',
|
||||
}));
|
||||
}
|
||||
|
||||
async previewTtsVoice(voiceId) {
|
||||
if (!this.ready) {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
const voice = this.getVoice(voiceId);
|
||||
const previewText = getPreviewString(voice.lang);
|
||||
const response = await this.generateTts(previewText, voiceId);
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
const audioElement = new Audio();
|
||||
audioElement.src = url;
|
||||
audioElement.play();
|
||||
audioElement.onended = () => URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
getVoiceDisplayName(voiceId) {
|
||||
return voiceId;
|
||||
}
|
||||
|
||||
getVoice(voiceName) {
|
||||
const defaultVoice = this.settings.defaultVoice || 'af_heart';
|
||||
const actualVoiceName = this.voices.includes(voiceName) ? voiceName : defaultVoice;
|
||||
return {
|
||||
name: actualVoiceName,
|
||||
voice_id: actualVoiceName,
|
||||
preview_url: null,
|
||||
lang: actualVoiceName.startsWith('b') ? 'en-GB' : 'en-US',
|
||||
};
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
if (!this.ready || !this.worker) {
|
||||
console.log('TTS not ready, initializing...');
|
||||
await this.initializeWorker();
|
||||
}
|
||||
|
||||
if (!this.ready || !this.worker) {
|
||||
throw new Error('Failed to initialize TTS engine');
|
||||
}
|
||||
|
||||
if (text.trim().length === 0) {
|
||||
throw new Error('Empty text');
|
||||
}
|
||||
|
||||
const voice = this.getVoice(voiceId);
|
||||
const requestId = this.nextRequestId++;
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
// Store the promise callbacks
|
||||
this.pendingRequests.set(requestId, { resolve, reject });
|
||||
|
||||
// Send the request to the worker
|
||||
this.worker.postMessage({
|
||||
action: 'generateTts',
|
||||
data: {
|
||||
text,
|
||||
voice: voice.voice_id,
|
||||
speakingRate: this.settings.speakingRate || 1.0,
|
||||
requestId,
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
dispose() {
|
||||
// Clean up the worker when the provider is disposed
|
||||
if (this.worker) {
|
||||
this.worker.terminate();
|
||||
this.worker = null;
|
||||
}
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user