SillyTavern/public/scripts/extensions/tts/edge.js

271 lines
8.5 KiB
JavaScript
Raw Normal View History

2023-12-02 20:11:06 +01:00
import { getRequestHeaders } from '../../../script.js';
import { getApiUrl } from '../../extensions.js';
import { doExtrasFetch, modules } from '../../extensions.js';
import { getPreviewString } from './index.js';
import { saveTtsProviderSettings } from './index.js';
2023-07-20 19:32:15 +02:00
2023-12-02 20:11:06 +01:00
export { EdgeTtsProvider };
2023-07-20 19:32:15 +02:00
const EDGE_TTS_PROVIDER = {
extras: 'extras',
plugin: 'plugin',
};
2023-07-20 19:32:15 +02:00
class EdgeTtsProvider {
//########//
// Config //
//########//
2023-12-02 20:11:06 +01:00
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
2023-07-20 19:32:15 +02:00
defaultSettings = {
voiceMap: {},
rate: 0,
provider: EDGE_TTS_PROVIDER.extras,
2023-12-02 20:11:06 +01:00
};
2023-07-20 19:32:15 +02:00
get settingsHtml() {
let html = `Microsoft Edge TTS<br>
<label for="edge_tts_provider">Provider</label>
<select id="edge_tts_provider">
<option value="${EDGE_TTS_PROVIDER.extras}">Extras</option>
<option value="${EDGE_TTS_PROVIDER.plugin}">Plugin</option>
</select>
2023-07-20 19:32:15 +02:00
<label for="edge_tts_rate">Rate: <span id="edge_tts_rate_output"></span></label>
<input id="edge_tts_rate" type="range" value="${this.defaultSettings.rate}" min="-100" max="100" step="1" />
`;
2023-12-02 20:11:06 +01:00
return html;
2023-07-20 19:32:15 +02:00
}
onSettingsChange() {
this.settings.rate = Number($('#edge_tts_rate').val());
$('#edge_tts_rate_output').text(this.settings.rate);
this.settings.provider = String($('#edge_tts_provider').val());
2023-12-02 20:11:06 +01:00
saveTtsProviderSettings();
2023-07-20 19:32:15 +02:00
}
async loadSettings(settings) {
2023-07-20 19:32:15 +02:00
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
2023-12-02 20:11:06 +01:00
console.info('Using default TTS Provider settings');
2023-07-20 19:32:15 +02:00
}
// Only accept keys defined in defaultSettings
2023-12-02 20:11:06 +01:00
this.settings = this.defaultSettings;
2023-07-20 19:32:15 +02:00
for (const key in settings) {
if (key in this.settings) {
2023-12-02 20:11:06 +01:00
this.settings[key] = settings[key];
2023-07-20 19:32:15 +02:00
} else {
2023-12-02 20:11:06 +01:00
throw `Invalid setting passed to TTS Provider: ${key}`;
2023-07-20 19:32:15 +02:00
}
}
$('#edge_tts_rate').val(this.settings.rate || 0);
$('#edge_tts_rate_output').text(this.settings.rate || 0);
$('#edge_tts_rate').on('input', () => { this.onSettingsChange(); });
$('#edge_tts_provider').val(this.settings.provider || EDGE_TTS_PROVIDER.extras);
$('#edge_tts_provider').on('change', () => { this.onSettingsChange(); });
2023-12-02 20:11:06 +01:00
await this.checkReady();
2023-08-22 15:30:33 +02:00
2023-12-02 20:11:06 +01:00
console.debug('EdgeTTS: Settings loaded');
2023-07-20 19:32:15 +02:00
}
/**
* Perform a simple readiness check by trying to fetch voiceIds
*/
async checkReady() {
await this.throwIfModuleMissing();
2023-12-02 20:11:06 +01:00
await this.fetchTtsVoiceObjects();
2023-08-22 15:30:33 +02:00
}
2023-08-26 05:52:26 +02:00
async onRefreshClick() {
2023-12-02 20:11:06 +01:00
return;
2023-07-20 19:32:15 +02:00
}
//#################//
// TTS Interfaces //
//#################//
/**
* Get a voice from the TTS provider.
* @param {string} voiceName Voice name to get
* @returns {Promise<Object>} Voice object
*/
2023-07-20 19:32:15 +02:00
async getVoice(voiceName) {
if (this.voices.length == 0) {
2023-12-02 20:11:06 +01:00
this.voices = await this.fetchTtsVoiceObjects();
2023-07-20 19:32:15 +02:00
}
const match = this.voices.filter(
2023-12-02 21:06:57 +01:00
voice => voice.name == voiceName,
2023-12-02 20:11:06 +01:00
)[0];
2023-07-20 19:32:15 +02:00
if (!match) {
2023-12-02 20:11:06 +01:00
throw `TTS Voice name ${voiceName} not found`;
2023-07-20 19:32:15 +02:00
}
2023-12-02 20:11:06 +01:00
return match;
2023-07-20 19:32:15 +02:00
}
/**
* Generate TTS for a given text.
* @param {string} text Text to generate TTS for
* @param {string} voiceId Voice ID to use
* @returns {Promise<Response>} Fetch response
*/
2023-07-20 19:32:15 +02:00
async generateTts(text, voiceId) {
2023-12-02 20:11:06 +01:00
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
2023-07-20 19:32:15 +02:00
}
//###########//
// API CALLS //
//###########//
2023-08-26 05:52:26 +02:00
async fetchTtsVoiceObjects() {
await this.throwIfModuleMissing();
2023-07-20 19:32:15 +02:00
const url = this.getVoicesUrl();
const response = await this.doFetch(url);
2023-07-20 19:32:15 +02:00
if (!response.ok) {
2023-12-02 20:11:06 +01:00
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
2023-07-20 19:32:15 +02:00
}
2023-12-02 20:11:06 +01:00
let responseJson = await response.json();
2023-07-20 19:32:15 +02:00
responseJson = responseJson
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
2023-12-02 20:11:06 +01:00
return responseJson;
2023-07-20 19:32:15 +02:00
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
2023-07-20 19:32:15 +02:00
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang);
2023-12-02 20:11:06 +01:00
const response = await this.fetchTtsGeneration(text, id);
2023-07-20 19:32:15 +02:00
if (!response.ok) {
2023-12-02 20:11:06 +01:00
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
2023-07-20 19:32:15 +02:00
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
2024-06-17 19:28:19 +02:00
this.audioElement.onended = () => URL.revokeObjectURL(url);
2023-07-20 19:32:15 +02:00
}
/**
* Fetch TTS generation from the API.
* @param {string} inputText Text to generate TTS for
* @param {string} voiceId Voice ID to use
* @returns {Promise<Response>} Fetch response
*/
2023-07-20 19:32:15 +02:00
async fetchTtsGeneration(inputText, voiceId) {
await this.throwIfModuleMissing();
2023-07-20 19:32:15 +02:00
2023-12-02 20:11:06 +01:00
console.info(`Generating new TTS for voice_id ${voiceId}`);
const url = this.getGenerateUrl();
const response = await this.doFetch(url,
2023-07-20 19:32:15 +02:00
{
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
2023-12-02 19:04:51 +01:00
'text': inputText,
'voice': voiceId,
'rate': Number(this.settings.rate),
2023-12-02 21:06:57 +01:00
}),
},
2023-12-02 20:11:06 +01:00
);
2023-07-20 19:32:15 +02:00
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
2023-12-02 20:11:06 +01:00
return response;
2023-07-20 19:32:15 +02:00
}
/**
* Perform a fetch request using the configured provider.
* @param {string} url URL string
* @param {any} options Request options
* @returns {Promise<Response>} Fetch response
*/
doFetch(url, options) {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
return doExtrasFetch(url, options);
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
return fetch(url, options);
}
throw new Error('Invalid TTS Provider');
}
/**
* Get the URL for the TTS generation endpoint.
* @returns {string} URL string
*/
getGenerateUrl() {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
const url = new URL(getApiUrl());
url.pathname = '/api/edge-tts/generate';
return url.toString();
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
return '/api/plugins/edge-tts/generate';
}
throw new Error('Invalid TTS Provider');
}
/**
* Get the URL for the TTS voices endpoint.
* @returns {string} URL object or string
*/
getVoicesUrl() {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras) {
const url = new URL(getApiUrl());
url.pathname = '/api/edge-tts/list';
return url.toString();
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin) {
return '/api/plugins/edge-tts/list';
}
throw new Error('Invalid TTS Provider');
}
async throwIfModuleMissing() {
if (this.settings.provider === EDGE_TTS_PROVIDER.extras && !modules.includes('edge-tts')) {
const message = 'Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.';
// toastr.error(message)
throw new Error(message);
}
if (this.settings.provider === EDGE_TTS_PROVIDER.plugin && !this.isPluginAvailable()) {
const message = 'Edge TTS Server plugin not loaded. Install it from https://github.com/SillyTavern/SillyTavern-EdgeTTS-Plugin and restart the SillyTavern server.';
// toastr.error(message)
throw new Error(message);
}
}
async isPluginAvailable() {
try {
const result = await fetch('/api/plugins/edge-tts/probe', {
method: 'POST',
headers: getRequestHeaders(),
});
return result.ok;
} catch (e) {
return false;
}
2023-07-20 19:32:15 +02:00
}
}