Merge pull request #1021 from ouoertheo/ouoertheo/tts-ui-voicemap

TTS Voice map UI and Coqui UI improvements
This commit is contained in:
Cohee 2023-08-28 23:57:22 +03:00 committed by GitHub
commit f1b91620b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 638 additions and 271 deletions

View File

@ -1,4 +1,3 @@
body.tts .mes[is_user="true"] .mes_narrate,
body.tts .mes[is_system="true"] .mes_narrate { body.tts .mes[is_system="true"] .mes_narrate {
display: none; display: none;
} }
@ -364,4 +363,4 @@ body.movingUI #groupMemberListPopout {
body.noShadows * { body.noShadows * {
text-shadow: none !important; text-shadow: none !important;
} }

View File

@ -5,13 +5,16 @@ TODO:
*/ */
import { doExtrasFetch, extension_settings, getApiUrl, getContext, modules, ModuleWorkerWrapper } from "../../extensions.js" import { doExtrasFetch, extension_settings, getApiUrl, getContext, modules, ModuleWorkerWrapper } from "../../extensions.js"
import { callPopup } from "../../../script.js"
import { initVoiceMap } from "./index.js"
export { CoquiTtsProvider } export { CoquiTtsProvider }
const DEBUG_PREFIX = "<Coqui TTS module> "; const DEBUG_PREFIX = "<Coqui TTS module> ";
const UPDATE_INTERVAL = 1000; const UPDATE_INTERVAL = 1000;
let charactersList = []; // Updated with module worker let inApiCall = false;
let voiceIdList = []; // Updated with module worker
let coquiApiModels = {}; // Initialized only once let coquiApiModels = {}; // Initialized only once
let coquiApiModelsFull = {}; // Initialized only once let coquiApiModelsFull = {}; // Initialized only once
let coquiLocalModels = []; // Initialized only once let coquiLocalModels = []; // Initialized only once
@ -39,16 +42,11 @@ const languageLabels = {
"ja": "Japanese" "ja": "Japanese"
} }
const defaultSettings = {
voiceMap: "",
voiceMapDict: {}
}
function throwIfModuleMissing() { function throwIfModuleMissing() {
if (!modules.includes('coqui-tts')) { if (!modules.includes('coqui-tts')) {
toastr.error(`Add coqui-tts to enable-modules and restart the Extras API.`, "Coqui TTS module not loaded.", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); const message = `Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.`
throw new Error(DEBUG_PREFIX, `Coqui TTS module not loaded.`); // toastr.error(message, { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(DEBUG_PREFIX, message);
} }
} }
@ -57,46 +55,18 @@ function resetModelSettings() {
$("#coqui_api_model_settings_speaker").val("none"); $("#coqui_api_model_settings_speaker").val("none");
} }
function updateCharactersList() {
let currentcharacters = new Set();
const context = getContext();
for (const i of context.characters) {
currentcharacters.add(i.name);
}
currentcharacters = Array.from(currentcharacters);
currentcharacters.unshift(context.name1);
if (JSON.stringify(charactersList) !== JSON.stringify(currentcharacters)) {
charactersList = currentcharacters
$('#coqui_character_select')
.find('option')
.remove()
.end()
.append('<option value="none">Select Character</option>')
.val('none')
for (const charName of charactersList) {
$("#coqui_character_select").append(new Option(charName, charName));
}
console.debug(DEBUG_PREFIX, "Updated character list to:", charactersList);
}
}
class CoquiTtsProvider { class CoquiTtsProvider {
//#############################// //#############################//
// Extension UI and Settings // // Extension UI and Settings //
//#############################// //#############################//
static instance; settings
settings = {};
// Singleton to allow acces to instance in event functions defaultSettings = {
constructor() { voiceMap: {},
if (CoquiTtsProvider.instance === undefined) customVoices: {},
CoquiTtsProvider.instance = this; voiceIds: [],
voiceMapDict: {}
} }
get settingsHtml() { get settingsHtml() {
@ -104,13 +74,15 @@ class CoquiTtsProvider {
<div class="flex wide100p flexGap10 alignitemscenter"> <div class="flex wide100p flexGap10 alignitemscenter">
<div> <div>
<div style="flex: 50%;"> <div style="flex: 50%;">
<label for="coqui_character_select">Character:</label> <small>To use CoquiTTS, select the origin, language, and model, then click Add Voice. The voice will then be available to add to a character. Voices are saved globally. </small><br>
<select id="coqui_character_select"> <label for="coqui_voicename_select">Select Saved Voice:</label>
<select id="coqui_voicename_select">
<!-- Populated by JS --> <!-- Populated by JS -->
</select> </select>
<div class="tts_block">
<input id="coqui_remove_char_mapping" class="menu_button" type="button" value="Remove from Voice Map" /> <input id="coqui_remove_voiceId_mapping" class="menu_button" type="button" value="Remove Voice" />
<input id="coqui_add_voiceId_mapping" class="menu_button" type="button" value="Add Voice" />
</div>
<label for="coqui_model_origin">Models:</label> <label for="coqui_model_origin">Models:</label>
<select id="coqui_model_origin">gpu_mode <select id="coqui_model_origin">gpu_mode
<option value="none">Select Origin</option> <option value="none">Select Origin</option>
@ -139,7 +111,7 @@ class CoquiTtsProvider {
<span id="coqui_api_model_install_status">Model installed on extras server</span> <span id="coqui_api_model_install_status">Model installed on extras server</span>
<input id="coqui_api_model_install_button" class="menu_button" type="button" value="Install" /> <input id="coqui_api_model_install_button" class="menu_button" type="button" value="Install" />
</div> </div>
<div id="coqui_local_model_div"> <div id="coqui_local_model_div">
<select id="coqui_local_model_name"> <select id="coqui_local_model_name">
<!-- Populated by JS and request --> <!-- Populated by JS and request -->
@ -153,13 +125,9 @@ class CoquiTtsProvider {
return html return html
} }
loadSettings(settings) { async loadSettings(settings) {
if (Object.keys(this.settings).length === 0) {
Object.assign(this.settings, defaultSettings)
}
// Only accept keys defined in defaultSettings // Only accept keys defined in defaultSettings
this.settings = defaultSettings; this.settings = this.defaultSettings
for (const key in settings) { for (const key in settings) {
if (key in this.settings) { if (key in this.settings) {
@ -169,7 +137,8 @@ class CoquiTtsProvider {
} }
} }
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification await initLocalModels();
this.updateCustomVoices(); // Overide any manual modification
$("#coqui_api_model_div").hide(); $("#coqui_api_model_div").hide();
$("#coqui_local_model_div").hide(); $("#coqui_local_model_div").hide();
@ -180,70 +149,123 @@ class CoquiTtsProvider {
$("#coqui_api_model_install_status").hide(); $("#coqui_api_model_install_status").hide();
$("#coqui_api_model_install_button").hide(); $("#coqui_api_model_install_button").hide();
$("#coqui_model_origin").on("change", CoquiTtsProvider.onModelOriginChange); let that = this
$("#coqui_api_language").on("change", CoquiTtsProvider.onModelLanguageChange); $("#coqui_model_origin").on("change", function () { that.onModelOriginChange() });
$("#coqui_api_model_name").on("change", CoquiTtsProvider.onModelNameChange); $("#coqui_api_language").on("change", function () { that.onModelLanguageChange() });
$("#coqui_remove_char_mapping").on("click", CoquiTtsProvider.onRemoveClick); $("#coqui_api_model_name").on("change", function () { that.onModelNameChange() });
updateCharactersList(); $("#coqui_remove_voiceId_mapping").on("click", function () { that.onRemoveClick() });
$("#coqui_add_voiceId_mapping").on("click", function () { that.onAddClick() });
// Load coqui-api settings from json file // Load coqui-api settings from json file
fetch("/scripts/extensions/tts/coqui_api_models_settings.json") await fetch("/scripts/extensions/tts/coqui_api_models_settings.json")
.then(response => response.json()) .then(response => response.json())
.then(json => { .then(json => {
coquiApiModels = json; coquiApiModels = json;
console.debug(DEBUG_PREFIX,"initialized coqui-api model list to", coquiApiModels); console.debug(DEBUG_PREFIX,"initialized coqui-api model list to", coquiApiModels);
/*
$('#coqui_api_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select model language</option>')
.val('none');
for(let language in coquiApiModels) {
$("#coqui_api_language").append(new Option(languageLabels[language],language));
console.log(DEBUG_PREFIX,"added language",language);
}*/
}); });
// Load coqui-api FULL settings from json file // Load coqui-api FULL settings from json file
fetch("/scripts/extensions/tts/coqui_api_models_settings_full.json") await fetch("/scripts/extensions/tts/coqui_api_models_settings_full.json")
.then(response => response.json()) .then(response => response.json())
.then(json => { .then(json => {
coquiApiModelsFull = json; coquiApiModelsFull = json;
console.debug(DEBUG_PREFIX,"initialized coqui-api full model list to", coquiApiModelsFull); console.debug(DEBUG_PREFIX,"initialized coqui-api full model list to", coquiApiModelsFull);
/*
$('#coqui_api_full_language')
.find('option')
.remove()
.end()
.append('<option value="none">Select model language</option>')
.val('none');
for(let language in coquiApiModelsFull) {
$("#coqui_api_full_language").append(new Option(languageLabels[language],language));
console.log(DEBUG_PREFIX,"added language",language);
}*/
}); });
} }
static updateVoiceMap() { // Perform a simple readiness check by trying to fetch voiceIds
CoquiTtsProvider.instance.settings.voiceMap = ""; async checkReady(){
for (let i in CoquiTtsProvider.instance.settings.voiceMapDict) { throwIfModuleMissing()
const voice_settings = CoquiTtsProvider.instance.settings.voiceMapDict[i]; await this.fetchTtsVoiceObjects()
CoquiTtsProvider.instance.settings.voiceMap += i + ":" + voice_settings["model_id"]; }
if (voice_settings["model_language"] != null) updateCustomVoices() {
CoquiTtsProvider.instance.settings.voiceMap += "[" + voice_settings["model_language"] + "]"; // Takes voiceMapDict and converts it to a string to save to voiceMap
this.settings.customVoices = {};
for (let voiceName in this.settings.voiceMapDict) {
const voiceId = this.settings.voiceMapDict[voiceName];
this.settings.customVoices[voiceName] = voiceId["model_id"];
if (voice_settings["model_speaker"] != null) if (voiceId["model_language"] != null)
CoquiTtsProvider.instance.settings.voiceMap += "[" + voice_settings["model_speaker"] + "]"; this.settings.customVoices[voiceName] += "[" + voiceId["model_language"] + "]";
CoquiTtsProvider.instance.settings.voiceMap += ","; if (voiceId["model_speaker"] != null)
this.settings.customVoices[voiceName] += "[" + voiceId["model_speaker"] + "]";
} }
$("#tts_voice_map").val(CoquiTtsProvider.instance.settings.voiceMap);
//extension_settings.tts.Coqui = extension_settings.tts.Coqui; // Update UI select list with voices
$("#coqui_voicename_select").empty()
$('#coqui_voicename_select')
.find('option')
.remove()
.end()
.append('<option value="none">Select Voice</option>')
.val('none')
for (const voiceName in this.settings.voiceMapDict) {
$("#coqui_voicename_select").append(new Option(voiceName, voiceName));
}
this.onSettingsChange()
} }
onSettingsChange() { onSettingsChange() {
//console.debug(DEBUG_PREFIX, "Settings changes", CoquiTtsProvider.instance.settings); console.debug(DEBUG_PREFIX, "Settings changes", this.settings);
CoquiTtsProvider.updateVoiceMap(); extension_settings.tts.Coqui = this.settings;
} }
async onApplyClick() { async onRefreshClick() {
const character = $("#coqui_character_select").val(); this.checkReady()
}
async onAddClick() {
if (inApiCall) {
return; //TODO: block dropdown
}
// Ask user for voiceId name to save voice
const voiceName = await callPopup('<h3>Name of Coqui voice to add to voice select dropdown:</h3>', 'input')
const model_origin = $("#coqui_model_origin").val(); const model_origin = $("#coqui_model_origin").val();
const model_language = $("#coqui_api_language").val(); const model_language = $("#coqui_api_language").val();
const model_name = $("#coqui_api_model_name").val(); const model_name = $("#coqui_api_model_name").val();
let model_setting_language = $("#coqui_api_model_settings_language").val(); let model_setting_language = $("#coqui_api_model_settings_language").val();
let model_setting_speaker = $("#coqui_api_model_settings_speaker").val(); let model_setting_speaker = $("#coqui_api_model_settings_speaker").val();
if (character === "none") {
toastr.error(`Character not selected, please select one.`, DEBUG_PREFIX + " voice mapping character", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); if (!voiceName) {
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification toastr.error(`Voice name empty, please enter one.`, DEBUG_PREFIX + " voice mapping voice name", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
this.updateCustomVoices(); // Overide any manual modification
return; return;
} }
if (model_origin == "none") { if (model_origin == "none") {
toastr.error(`Origin not selected, please select one.`, DEBUG_PREFIX + " voice mapping origin", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.error(`Origin not selected, please select one.`, DEBUG_PREFIX + " voice mapping origin", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification this.updateCustomVoices(); // Overide any manual modification
return; return;
} }
@ -252,25 +274,25 @@ class CoquiTtsProvider {
if (model_name == "none") { if (model_name == "none") {
toastr.error(`Model not selected, please select one.`, DEBUG_PREFIX + " voice mapping model", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.error(`Model not selected, please select one.`, DEBUG_PREFIX + " voice mapping model", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification this.updateCustomVoices(); // Overide any manual modification
return; return;
} }
CoquiTtsProvider.instance.settings.voiceMapDict[character] = { model_type: "local", model_id: "local/" + model_id }; this.settings.voiceMapDict[voiceName] = { model_type: "local", model_id: "local/" + model_id };
console.debug(DEBUG_PREFIX, "Registered new voice map: ", character, ":", CoquiTtsProvider.instance.settings.voiceMapDict[character]); console.debug(DEBUG_PREFIX, "Registered new voice map: ", voiceName, ":", this.settings.voiceMapDict[voiceName]);
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification this.updateCustomVoices(); // Overide any manual modification
return; return;
} }
if (model_language == "none") { if (model_language == "none") {
toastr.error(`Language not selected, please select one.`, DEBUG_PREFIX + " voice mapping language", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.error(`Language not selected, please select one.`, DEBUG_PREFIX + " voice mapping language", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification this.updateCustomVoices(); // Overide any manual modification
return; return;
} }
if (model_name == "none") { if (model_name == "none") {
toastr.error(`Model not selected, please select one.`, DEBUG_PREFIX + " voice mapping model", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.error(`Model not selected, please select one.`, DEBUG_PREFIX + " voice mapping model", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
CoquiTtsProvider.updateVoiceMap(); // Overide any manual modification this.updateCustomVoices(); // Overide any manual modification
return; return;
} }
@ -299,45 +321,51 @@ class CoquiTtsProvider {
return; return;
} }
console.debug(DEBUG_PREFIX, "Current voice map: ", CoquiTtsProvider.instance.settings.voiceMap); console.debug(DEBUG_PREFIX, "Current custom voices: ", this.settings.customVoices);
CoquiTtsProvider.instance.settings.voiceMapDict[character] = { model_type: "coqui-api", model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker }; this.settings.voiceMapDict[voiceName] = { model_type: "coqui-api", model_id: model_id, model_language: model_setting_language, model_speaker: model_setting_speaker };
console.debug(DEBUG_PREFIX, "Registered new voice map: ", character, ":", CoquiTtsProvider.instance.settings.voiceMapDict[character]); console.debug(DEBUG_PREFIX, "Registered new voice map: ", voiceName, ":", this.settings.voiceMapDict[voiceName]);
CoquiTtsProvider.updateVoiceMap(); this.updateCustomVoices();
initVoiceMap() // Update TTS extension voiceMap
let successMsg = character + ":" + model_id; let successMsg = voiceName + ":" + model_id;
if (model_setting_language != null) if (model_setting_language != null)
successMsg += "[" + model_setting_language + "]"; successMsg += "[" + model_setting_language + "]";
if (model_setting_speaker != null) if (model_setting_speaker != null)
successMsg += "[" + model_setting_speaker + "]"; successMsg += "[" + model_setting_speaker + "]";
toastr.info(successMsg, DEBUG_PREFIX + " voice map updated", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.info(successMsg, DEBUG_PREFIX + " voice map updated", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return return
} }
// DBG: assume voiceName is correct
// TODO: check voice is correct
async getVoice(voiceName) { async getVoice(voiceName) {
console.log(DEBUG_PREFIX, "getVoice", voiceName); let match = await this.fetchTtsVoiceObjects()
const output = { voice_id: voiceName }; match = match.filter(
return output; voice => voice.name == voiceName
)[0]
if (!match) {
throw `TTS Voice name ${voiceName} not found in CoquiTTS Provider voice list`
}
return match;
} }
static async onRemoveClick() { async onRemoveClick() {
const character = $("#coqui_character_select").val(); const voiceName = $("#coqui_voicename_select").val();
if (character === "none") { if (voiceName === "none") {
toastr.error(`Character not selected, please select one.`, DEBUG_PREFIX + " voice mapping character", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.error(`Voice not selected, please select one.`, DEBUG_PREFIX + " voice mapping voiceId", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return; return;
} }
// Todo erase from voicemap // Todo erase from voicemap
delete (CoquiTtsProvider.instance.settings.voiceMapDict[character]); delete (this.settings.voiceMapDict[voiceName]);
CoquiTtsProvider.updateVoiceMap(); // TODO this.updateCustomVoices();
initVoiceMap() // Update TTS extension voiceMap
} }
static async onModelOriginChange() { async onModelOriginChange() {
throwIfModuleMissing() throwIfModuleMissing()
resetModelSettings(); resetModelSettings();
const model_origin = $('#coqui_model_origin').val(); const model_origin = $('#coqui_model_origin').val();
@ -346,13 +374,10 @@ class CoquiTtsProvider {
$("#coqui_local_model_div").hide(); $("#coqui_local_model_div").hide();
$("#coqui_api_model_div").hide(); $("#coqui_api_model_div").hide();
} }
// show coqui model selected list (SAFE) // show coqui model selected list (SAFE)
if (model_origin == "coqui-api") { if (model_origin == "coqui-api") {
$("#coqui_local_model_div").hide(); $("#coqui_local_model_div").hide();
$("#coqui_api_model_div").hide();
$("#coqui_api_model_name").hide();
$("#coqui_api_model_settings").hide();
$('#coqui_api_language') $('#coqui_api_language')
.find('option') .find('option')
@ -375,9 +400,6 @@ class CoquiTtsProvider {
// show coqui model full list (UNSAFE) // show coqui model full list (UNSAFE)
if (model_origin == "coqui-api-full") { if (model_origin == "coqui-api-full") {
$("#coqui_local_model_div").hide(); $("#coqui_local_model_div").hide();
$("#coqui_api_model_div").hide();
$("#coqui_api_model_name").hide();
$("#coqui_api_model_settings").hide();
$('#coqui_api_language') $('#coqui_api_language')
.find('option') .find('option')
@ -405,7 +427,7 @@ class CoquiTtsProvider {
} }
} }
static async onModelLanguageChange() { async onModelLanguageChange() {
throwIfModuleMissing(); throwIfModuleMissing();
resetModelSettings(); resetModelSettings();
$("#coqui_api_model_settings").hide(); $("#coqui_api_model_settings").hide();
@ -438,7 +460,7 @@ class CoquiTtsProvider {
} }
} }
static async onModelNameChange() { async onModelNameChange() {
throwIfModuleMissing(); throwIfModuleMissing();
resetModelSettings(); resetModelSettings();
$("#coqui_api_model_settings").hide(); $("#coqui_api_model_settings").hide();
@ -529,6 +551,8 @@ class CoquiTtsProvider {
$("#coqui_api_model_install_status").text("Model not found on extras server"); $("#coqui_api_model_install_status").text("Model not found on extras server");
} }
const onModelNameChange_pointer = this.onModelNameChange;
$("#coqui_api_model_install_button").off("click").on("click", async function () { $("#coqui_api_model_install_button").off("click").on("click", async function () {
try { try {
$("#coqui_api_model_install_status").text("Downloading model..."); $("#coqui_api_model_install_status").text("Downloading model...");
@ -542,7 +566,7 @@ class CoquiTtsProvider {
if (apiResult["status"] == "done") { if (apiResult["status"] == "done") {
$("#coqui_api_model_install_status").text("Model installed and ready to use!"); $("#coqui_api_model_install_status").text("Model installed and ready to use!");
$("#coqui_api_model_install_button").hide(); $("#coqui_api_model_install_button").hide();
CoquiTtsProvider.onModelNameChange(); onModelNameChange_pointer();
} }
if (apiResult["status"] == "downloading") { if (apiResult["status"] == "downloading") {
@ -553,7 +577,7 @@ class CoquiTtsProvider {
} catch (error) { } catch (error) {
console.error(error) console.error(error)
toastr.error(error, DEBUG_PREFIX + " error with model download", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); toastr.error(error, DEBUG_PREFIX + " error with model download", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
CoquiTtsProvider.onModelNameChange(); onModelNameChange_pointer();
} }
// will refresh model status // will refresh model status
}); });
@ -656,6 +680,8 @@ class CoquiTtsProvider {
// ts_models/ja/kokoro/tacotron2-DDC // ts_models/ja/kokoro/tacotron2-DDC
async generateTts(text, voiceId) { async generateTts(text, voiceId) {
throwIfModuleMissing() throwIfModuleMissing()
voiceId = this.settings.customVoices[voiceId]
const url = new URL(getApiUrl()); const url = new URL(getApiUrl());
url.pathname = '/api/text-to-speech/coqui/generate-tts'; url.pathname = '/api/text-to-speech/coqui/generate-tts';
@ -703,8 +729,11 @@ class CoquiTtsProvider {
} }
// Dirty hack to say not implemented // Dirty hack to say not implemented
async fetchTtsVoiceIds() { async fetchTtsVoiceObjects() {
return [{ name: "Voice samples not implemented for coqui TTS yet, search for the model samples online", voice_id: "", lang: "", }] const voiceIds = Object
.keys(this.settings.voiceMapDict)
.map(voice => ({ name: voice, voice_id: voice, preview_url: false }));
return voiceIds
} }
// Do nothing // Do nothing
@ -717,13 +746,7 @@ class CoquiTtsProvider {
} }
} }
//#############################// async function initLocalModels() {
// Module Worker //
//#############################//
async function moduleWorker() {
updateCharactersList();
if (!modules.includes('coqui-tts')) if (!modules.includes('coqui-tts'))
return return
@ -748,9 +771,3 @@ async function moduleWorker() {
coquiLocalModelsReceived = true; coquiLocalModelsReceived = true;
} }
} }
$(document).ready(function () {
const wrapper = new ModuleWorkerWrapper(moduleWorker);
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL);
moduleWorker();
})

View File

@ -2,6 +2,7 @@ import { getRequestHeaders } from "../../../script.js"
import { getApiUrl } from "../../extensions.js" import { getApiUrl } from "../../extensions.js"
import { doExtrasFetch, modules } from "../../extensions.js" import { doExtrasFetch, modules } from "../../extensions.js"
import { getPreviewString } from "./index.js" import { getPreviewString } from "./index.js"
import { saveTtsProviderSettings } from "./index.js"
export { EdgeTtsProvider } export { EdgeTtsProvider }
@ -30,9 +31,10 @@ class EdgeTtsProvider {
onSettingsChange() { onSettingsChange() {
this.settings.rate = Number($('#edge_tts_rate').val()); this.settings.rate = Number($('#edge_tts_rate').val());
$('#edge_tts_rate_output').text(this.settings.rate); $('#edge_tts_rate_output').text(this.settings.rate);
saveTtsProviderSettings()
} }
loadSettings(settings) { async loadSettings(settings) {
// Pupulate Provider UI given input settings // Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) { if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings") console.info("Using default TTS Provider settings")
@ -51,12 +53,20 @@ class EdgeTtsProvider {
$('#edge_tts_rate').val(this.settings.rate || 0); $('#edge_tts_rate').val(this.settings.rate || 0);
$('#edge_tts_rate_output').text(this.settings.rate || 0); $('#edge_tts_rate_output').text(this.settings.rate || 0);
$('#edge_tts_rate').on("input", () => {this.onSettingsChange()})
await this.checkReady()
console.info("Settings loaded") console.info("Settings loaded")
} }
async onApplyClick() { // Perform a simple readiness check by trying to fetch voiceIds
async checkReady(){
throwIfModuleMissing()
await this.fetchTtsVoiceObjects()
}
async onRefreshClick() {
return return
} }
@ -66,7 +76,7 @@ class EdgeTtsProvider {
async getVoice(voiceName) { async getVoice(voiceName) {
if (this.voices.length == 0) { if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceIds() this.voices = await this.fetchTtsVoiceObjects()
} }
const match = this.voices.filter( const match = this.voices.filter(
voice => voice.name == voiceName voice => voice.name == voiceName
@ -85,7 +95,7 @@ class EdgeTtsProvider {
//###########// //###########//
// API CALLS // // API CALLS //
//###########// //###########//
async fetchTtsVoiceIds() { async fetchTtsVoiceObjects() {
throwIfModuleMissing() throwIfModuleMissing()
const url = new URL(getApiUrl()); const url = new URL(getApiUrl());
@ -144,8 +154,9 @@ class EdgeTtsProvider {
} }
function throwIfModuleMissing() { function throwIfModuleMissing() {
if (!modules.includes('edge-tts')) { if (!modules.includes('edge-tts')) {
toastr.error(`Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.`) const message = `Edge TTS module not loaded. Add edge-tts to enable-modules and restart the Extras API.`
throw new Error(`Edge TTS module not loaded.`) // toastr.error(message)
throw new Error(message)
} }
} }

View File

@ -1,5 +1,4 @@
import { deepClone } from "../../utils.js"; import { saveTtsProviderSettings } from "./index.js"
export { ElevenLabsTtsProvider } export { ElevenLabsTtsProvider }
class ElevenLabsTtsProvider { class ElevenLabsTtsProvider {
@ -25,16 +24,19 @@ class ElevenLabsTtsProvider {
get settingsHtml() { get settingsHtml() {
let html = ` let html = `
<label for="elevenlabs_tts_api_key">API Key</label> <div class="elevenlabs_tts_settings">
<input id="elevenlabs_tts_api_key" type="text" class="text_pole" placeholder="<API Key>"/> <label for="elevenlabs_tts_api_key">API Key</label>
<label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label> <input id="elevenlabs_tts_api_key" type="text" class="text_pole" placeholder="<API Key>"/>
<input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.05" /> <input id="eleven_labs_connect" class="menu_button" type="button" value="Connect" />
<label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label> <label for="elevenlabs_tts_stability">Stability: <span id="elevenlabs_tts_stability_output"></span></label>
<input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.05" /> <input id="elevenlabs_tts_stability" type="range" value="${this.defaultSettings.stability}" min="0" max="1" step="0.05" />
<label class="checkbox_label" for="elevenlabs_tts_multilingual"> <label for="elevenlabs_tts_similarity_boost">Similarity Boost: <span id="elevenlabs_tts_similarity_boost_output"></span></label>
<input id="elevenlabs_tts_multilingual" type="checkbox" value="${this.defaultSettings.multilingual}" /> <input id="elevenlabs_tts_similarity_boost" type="range" value="${this.defaultSettings.similarity_boost}" min="0" max="1" step="0.05" />
Enable Multilingual <label class="checkbox_label" for="elevenlabs_tts_multilingual">
</label> <input id="elevenlabs_tts_multilingual" type="checkbox" value="${this.defaultSettings.multilingual}" />
Enable Multilingual
</label>
</div>
` `
return html return html
} }
@ -44,39 +46,49 @@ class ElevenLabsTtsProvider {
this.settings.stability = $('#elevenlabs_tts_stability').val() this.settings.stability = $('#elevenlabs_tts_stability').val()
this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val() this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val()
this.settings.multilingual = $('#elevenlabs_tts_multilingual').prop('checked') this.settings.multilingual = $('#elevenlabs_tts_multilingual').prop('checked')
saveTtsProviderSettings()
} }
loadSettings(settings) { async loadSettings(settings) {
// Pupulate Provider UI given input settings // Pupulate Provider UI given input settings
if (!settings || Object.keys(settings).length == 0) { if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings") console.info("Using default TTS Provider settings")
} }
// Only accept keys defined in defaultSettings // Only accept keys defined in defaultSettings
this.settings = deepClone(this.defaultSettings); this.settings = this.defaultSettings
if (settings) { for (const key in settings){
for (const key in settings) { if (key in this.settings){
if (key in this.settings) { this.settings[key] = settings[key]
this.settings[key] = settings[key] } else {
} else { throw `Invalid setting passed to TTS Provider: ${key}`
throw `Invalid setting passed to TTS Provider: ${key}`
}
} }
} }
$('#elevenlabs_tts_stability').val(this.settings.stability) $('#elevenlabs_tts_stability').val(this.settings.stability)
$('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost) $('#elevenlabs_tts_similarity_boost').val(this.settings.similarity_boost)
$('#elevenlabs_tts_api_key').val(this.settings.apiKey) $('#elevenlabs_tts_api_key').val(this.settings.apiKey)
$('#tts_auto_generation').prop('checked', this.settings.multilingual) $('#tts_auto_generation').prop('checked', this.settings.multilingual)
$('#eleven_labs_connect').on('click', () => {this.onConnectClick()})
$('#elevenlabs_tts_settings').on('input',this.onSettingsChange)
await this.checkReady()
console.info("Settings loaded") console.info("Settings loaded")
} }
async onApplyClick() { // Perform a simple readiness check by trying to fetch voiceIds
async checkReady(){
await this.fetchTtsVoiceObjects()
}
async onRefreshClick() {
}
async onConnectClick() {
// Update on Apply click // Update on Apply click
return await this.updateApiKey().catch( (error) => { return await this.updateApiKey().catch( (error) => {
throw error toastr.error(`ElevenLabs: ${error}`)
}) })
} }
@ -85,11 +97,12 @@ class ElevenLabsTtsProvider {
// Using this call to validate API key // Using this call to validate API key
this.settings.apiKey = $('#elevenlabs_tts_api_key').val() this.settings.apiKey = $('#elevenlabs_tts_api_key').val()
await this.fetchTtsVoiceIds().catch(error => { await this.fetchTtsVoiceObjects().catch(error => {
throw `TTS API key validation failed` throw `TTS API key validation failed`
}) })
this.settings.apiKey = this.settings.apiKey this.settings.apiKey = this.settings.apiKey
console.debug(`Saved new API_KEY: ${this.settings.apiKey}`) console.debug(`Saved new API_KEY: ${this.settings.apiKey}`)
this.onSettingsChange()
} }
//#################// //#################//
@ -98,7 +111,7 @@ class ElevenLabsTtsProvider {
async getVoice(voiceName) { async getVoice(voiceName) {
if (this.voices.length == 0) { if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceIds() this.voices = await this.fetchTtsVoiceObjects()
} }
const match = this.voices.filter( const match = this.voices.filter(
elevenVoice => elevenVoice.name == voiceName elevenVoice => elevenVoice.name == voiceName
@ -145,7 +158,7 @@ class ElevenLabsTtsProvider {
//###########// //###########//
// API CALLS // // API CALLS //
//###########// //###########//
async fetchTtsVoiceIds() { async fetchTtsVoiceObjects() {
const headers = { const headers = {
'xi-api-key': this.settings.apiKey 'xi-api-key': this.settings.apiKey
} }

View File

@ -13,6 +13,7 @@ export { talkingAnimation };
const UPDATE_INTERVAL = 1000 const UPDATE_INTERVAL = 1000
let voiceMapEntries = []
let voiceMap = {} // {charName:voiceid, charName2:voiceid2} let voiceMap = {} // {charName:voiceid, charName2:voiceid2}
let audioControl let audioControl
let storedvalue = false; let storedvalue = false;
@ -224,8 +225,8 @@ function debugTtsPlayback() {
console.log(JSON.stringify( console.log(JSON.stringify(
{ {
"ttsProviderName": ttsProviderName, "ttsProviderName": ttsProviderName,
"voiceMap": voiceMap,
"currentMessageNumber": currentMessageNumber, "currentMessageNumber": currentMessageNumber,
"isWorkerBusy": isWorkerBusy,
"audioPaused": audioPaused, "audioPaused": audioPaused,
"audioJobQueue": audioJobQueue, "audioJobQueue": audioJobQueue,
"currentAudioJob": currentAudioJob, "currentAudioJob": currentAudioJob,
@ -285,7 +286,7 @@ async function onTtsVoicesClick() {
let popupText = '' let popupText = ''
try { try {
const voiceIds = await ttsProvider.fetchTtsVoiceIds() const voiceIds = await ttsProvider.fetchTtsVoiceObjects()
for (const voice of voiceIds) { for (const voice of voiceIds) {
popupText += ` popupText += `
@ -486,6 +487,12 @@ function loadSettings() {
if (Object.keys(extension_settings.tts).length === 0) { if (Object.keys(extension_settings.tts).length === 0) {
Object.assign(extension_settings.tts, defaultSettings) Object.assign(extension_settings.tts, defaultSettings)
} }
for (const key in defaultSettings) {
if (!(key in extension_settings.tts)) {
extension_settings.tts[key] = defaultSettings[key]
}
}
$('#tts_provider').val(extension_settings.tts.currentProvider)
$('#tts_enabled').prop( $('#tts_enabled').prop(
'checked', 'checked',
extension_settings.tts.enabled extension_settings.tts.enabled
@ -513,59 +520,17 @@ function setTtsStatus(status, success) {
} }
} }
function parseVoiceMap(voiceMapString) { function onRefreshClick() {
let parsedVoiceMap = {}
for (const [charName, voiceId] of voiceMapString
.split(',')
.map(s => s.split(':'))) {
if (charName && voiceId) {
parsedVoiceMap[charName.trim()] = voiceId.trim()
}
}
return parsedVoiceMap
}
async function voicemapIsValid(parsedVoiceMap) {
let valid = true
for (const characterName in parsedVoiceMap) {
const parsedVoiceName = parsedVoiceMap[characterName]
try {
await ttsProvider.getVoice(parsedVoiceName)
} catch (error) {
console.error(error)
valid = false
}
}
return valid
}
async function updateVoiceMap() {
let isValidResult = false
const value = $('#tts_voice_map').val()
const parsedVoiceMap = parseVoiceMap(value)
isValidResult = await voicemapIsValid(parsedVoiceMap)
if (isValidResult) {
ttsProvider.settings.voiceMap = String(value)
// console.debug(`ttsProvider.voiceMap: ${ttsProvider.settings.voiceMap}`)
voiceMap = parsedVoiceMap
console.debug(`Saved new voiceMap: ${value}`)
saveSettingsDebounced()
} else {
throw 'Voice map is invalid, check console for errors'
}
}
function onApplyClick() {
Promise.all([ Promise.all([
ttsProvider.onApplyClick(), ttsProvider.onRefreshClick(),
updateVoiceMap() // updateVoiceMap()
]).then(() => { ]).then(() => {
extension_settings.tts[ttsProviderName] = ttsProvider.settings extension_settings.tts[ttsProviderName] = ttsProvider.settings
saveSettingsDebounced() saveSettingsDebounced()
setTtsStatus('Successfully applied settings', true) setTtsStatus('Successfully applied settings', true)
console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`) console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`)
initVoiceMap()
updateVoiceMap()
}).catch(error => { }).catch(error => {
console.error(error) console.error(error)
setTtsStatus(error, false) setTtsStatus(error, false)
@ -608,13 +573,14 @@ function onNarrateTranslatedOnlyClick() {
// TTS Provider // // TTS Provider //
//##############// //##############//
function loadTtsProvider(provider) { async function loadTtsProvider(provider) {
//Clear the current config and add new config //Clear the current config and add new config
$("#tts_provider_settings").html("") $("#tts_provider_settings").html("")
if (!provider) { if (!provider) {
provider return
} }
// Init provider references // Init provider references
extension_settings.tts.currentProvider = provider extension_settings.tts.currentProvider = provider
ttsProviderName = provider ttsProviderName = provider
@ -626,38 +592,210 @@ function loadTtsProvider(provider) {
console.warn(`Provider ${ttsProviderName} not in Extension Settings, initiatilizing provider in settings`) console.warn(`Provider ${ttsProviderName} not in Extension Settings, initiatilizing provider in settings`)
extension_settings.tts[ttsProviderName] = {} extension_settings.tts[ttsProviderName] = {}
} }
await ttsProvider.loadSettings(extension_settings.tts[ttsProviderName])
// Load voicemap settings await initVoiceMap()
let voiceMapFromSettings
if ("voiceMap" in extension_settings.tts[ttsProviderName]) {
voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap
voiceMap = parseVoiceMap(voiceMapFromSettings)
} else {
voiceMapFromSettings = ""
voiceMap = {}
}
$('#tts_voice_map').val(voiceMapFromSettings)
$('#tts_provider').val(ttsProviderName)
ttsProvider.loadSettings(extension_settings.tts[ttsProviderName])
} }
function onTtsProviderChange() { function onTtsProviderChange() {
const ttsProviderSelection = $('#tts_provider').val() const ttsProviderSelection = $('#tts_provider').val()
extension_settings.tts.currentProvider = ttsProviderSelection
loadTtsProvider(ttsProviderSelection) loadTtsProvider(ttsProviderSelection)
} }
function onTtsProviderSettingsInput() { // Ensure that TTS provider settings are saved to extension settings.
ttsProvider.onSettingsChange() export function saveTtsProviderSettings() {
updateVoiceMap()
// Persist changes to SillyTavern tts extension settings
extension_settings.tts[ttsProviderName] = ttsProvider.settings extension_settings.tts[ttsProviderName] = ttsProvider.settings
saveSettingsDebounced() saveSettingsDebounced()
console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`) console.info(`Saved settings ${ttsProviderName} ${JSON.stringify(ttsProvider.settings)}`)
} }
//###################//
// voiceMap Handling //
//###################//
async function onChatChanged() {
await resetTtsPlayback()
await initVoiceMap()
}
function getCharacters(){
const context = getContext()
let characters = []
if (context.groupId === null){
// Single char chat
characters.push(context.name1)
characters.push(context.name2)
} else {
// Group chat
characters.push(context.name1)
const group = context.groups.find(group => context.groupId == group.id)
for (let member of group.members) {
// Remove suffix
if (member.endsWith('.png')){
member = member.slice(0, -4)
}
characters.push(member)
}
}
return characters
}
function sanitizeId(input) {
// Remove any non-alphanumeric characters except underscore (_) and hyphen (-)
let sanitized = input.replace(/[^a-zA-Z0-9-_]/g, '');
// Ensure first character is always a letter
if (!/^[a-zA-Z]/.test(sanitized)) {
sanitized = 'element_' + sanitized;
}
return sanitized;
}
function parseVoiceMap(voiceMapString) {
let parsedVoiceMap = {}
for (const [charName, voiceId] of voiceMapString
.split(',')
.map(s => s.split(':'))) {
if (charName && voiceId) {
parsedVoiceMap[charName.trim()] = voiceId.trim()
}
}
return parsedVoiceMap
}
/**
* Apply voiceMap based on current voiceMapEntries
*/
function updateVoiceMap() {
const tempVoiceMap = {}
for (const voice of voiceMapEntries){
if (voice.voiceId === null){
continue
}
tempVoiceMap[voice.name] = voice.voiceId
}
if (Object.keys(tempVoiceMap).length !== 0){
voiceMap = tempVoiceMap
console.log(`Voicemap updated to ${JSON.stringify(voiceMap)}`)
}
Object.assign(extension_settings.tts[ttsProviderName].voiceMap, voiceMap)
saveSettingsDebounced()
}
class VoiceMapEntry {
name
voiceId
selectElement
constructor (name, voiceId='disabled') {
this.name = name
this.voiceId = voiceId
this.selectElement = null
}
addUI(voiceIds){
let sanitizedName = sanitizeId(this.name)
let template = `
<div class='tts_voicemap_block_char flex-container flexGap5'>
<span id='tts_voicemap_char_${sanitizedName}'>${this.name}</span>
<select id='tts_voicemap_char_${sanitizedName}_voice'>
<option>disabled</option>
</select>
</div>
`
$('#tts_voicemap_block').append(template)
// Populate voice ID select list
for (const voiceId of voiceIds){
const option = document.createElement('option');
option.innerText = voiceId.name;
option.value = voiceId.name;
$(`#tts_voicemap_char_${sanitizedName}_voice`).append(option)
}
this.selectElement = $(`#tts_voicemap_char_${sanitizedName}_voice`)
this.selectElement.on('change', args => this.onSelectChange(args))
this.selectElement.val(this.voiceId)
}
onSelectChange(args) {
this.voiceId = this.selectElement.find(':selected').val()
updateVoiceMap()
}
}
/**
* Init voiceMapEntries for character select list.
*
*/
export async function initVoiceMap(){
// Clear existing voiceMap state
$('#tts_voicemap_block').empty()
voiceMapEntries = []
// Gate initialization if not enabled or TTS Provider not ready. Prevents error popups.
const enabled = $('#tts_enabled').is(':checked')
if (!enabled){
return
}
// Keep errors inside extension UI rather than toastr. Toastr errors for TTS are annoying.
try {
await ttsProvider.checkReady()
} catch (error) {
const message = `TTS Provider not ready. ${error}`
setTtsStatus(message, false)
return
}
setTtsStatus("TTS Provider Loaded", true)
// Get characters in current chat
const characters = getCharacters()
// Get saved voicemap from provider settings, handling new and old representations
let voiceMapFromSettings = {}
if ("voiceMap" in extension_settings.tts[ttsProviderName]) {
// Handle previous representation
if (typeof extension_settings.tts[ttsProviderName].voiceMap === "string"){
voiceMapFromSettings = parseVoiceMap(extension_settings.tts[ttsProviderName].voiceMap)
// Handle new representation
} else if (typeof extension_settings.tts[ttsProviderName].voiceMap === "object"){
voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap
}
}
// Get voiceIds from provider
let voiceIdsFromProvider
try {
voiceIdsFromProvider = await ttsProvider.fetchTtsVoiceObjects()
}
catch {
toastr.error("TTS Provider failed to return voice ids.")
}
// Build UI using VoiceMapEntry objects
for (const character of characters){
if (character === "SillyTavern System"){
continue
}
// Check provider settings for voiceIds
let voiceId
if (character in voiceMapFromSettings){
voiceId = voiceMapFromSettings[character]
} else {
voiceId = 'disabled'
}
const voiceMapEntry = new VoiceMapEntry(character, voiceId)
voiceMapEntry.addUI(voiceIdsFromProvider)
voiceMapEntries.push(voiceMapEntry)
}
updateVoiceMap()
}
$(document).ready(function () { $(document).ready(function () {
function addExtensionControls() { function addExtensionControls() {
@ -669,10 +807,13 @@ $(document).ready(function () {
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div> <div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
</div> </div>
<div class="inline-drawer-content"> <div class="inline-drawer-content">
<div> <div id="tts_status">
<span>Select TTS Provider</span> </br> </div>
<select id="tts_provider"> <span>Select TTS Provider</span> </br>
<div class="tts_block">
<select id="tts_provider" class="flex1">
</select> </select>
<input id="tts_refresh" class="menu_button" type="submit" value="Reload" />
</div> </div>
<div> <div>
<label class="checkbox_label" for="tts_enabled"> <label class="checkbox_label" for="tts_enabled">
@ -696,16 +837,12 @@ $(document).ready(function () {
<small>Narrate only the translated text</small> <small>Narrate only the translated text</small>
</label> </label>
</div> </div>
<label>Voice Map</label> <div id="tts_voicemap_block">
<textarea id="tts_voice_map" type="text" class="text_pole textarea_compact" rows="4"
placeholder="Enter comma separated map of charName:ttsName. Example: \nAqua:Bella,\nYou:Josh,"></textarea>
<div id="tts_status">
</div> </div>
<hr>
<form id="tts_provider_settings" class="inline-drawer-content"> <form id="tts_provider_settings" class="inline-drawer-content">
</form> </form>
<div class="tts_buttons"> <div class="tts_buttons">
<input id="tts_apply" class="menu_button" type="submit" value="Apply" />
<input id="tts_voices" class="menu_button" type="submit" value="Available voices" /> <input id="tts_voices" class="menu_button" type="submit" value="Available voices" />
</div> </div>
</div> </div>
@ -714,14 +851,13 @@ $(document).ready(function () {
</div> </div>
` `
$('#extensions_settings').append(settingsHtml) $('#extensions_settings').append(settingsHtml)
$('#tts_apply').on('click', onApplyClick) $('#tts_refresh').on('click', onRefreshClick)
$('#tts_enabled').on('click', onEnableClick) $('#tts_enabled').on('click', onEnableClick)
$('#tts_narrate_dialogues').on('click', onNarrateDialoguesClick); $('#tts_narrate_dialogues').on('click', onNarrateDialoguesClick);
$('#tts_narrate_quoted').on('click', onNarrateQuotedClick); $('#tts_narrate_quoted').on('click', onNarrateQuotedClick);
$('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick); $('#tts_narrate_translated_only').on('click', onNarrateTranslatedOnlyClick);
$('#tts_auto_generation').on('click', onAutoGenerationClick); $('#tts_auto_generation').on('click', onAutoGenerationClick);
$('#tts_voices').on('click', onTtsVoicesClick) $('#tts_voices').on('click', onTtsVoicesClick)
$('#tts_provider_settings').on('input', onTtsProviderSettingsInput)
for (const provider in ttsProviders) { for (const provider in ttsProviders) {
$('#tts_provider').append($("<option />").val(provider).text(provider)) $('#tts_provider').append($("<option />").val(provider).text(provider))
} }
@ -735,4 +871,6 @@ $(document).ready(function () {
const wrapper = new ModuleWorkerWrapper(moduleWorker); const wrapper = new ModuleWorkerWrapper(moduleWorker);
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL) // Init depends on all the things setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL) // Init depends on all the things
eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback); eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
eventSource.on(event_types.CHAT_CHANGED, onChatChanged)
eventSource.on(event_types.GROUP_UPDATED, onChatChanged)
}) })

View File

@ -1,5 +1,6 @@
import { getRequestHeaders } from "../../../script.js" import { getRequestHeaders, callPopup } from "../../../script.js"
import { getPreviewString } from "./index.js" import { getPreviewString, saveTtsProviderSettings } from "./index.js"
import { initVoiceMap } from "./index.js"
export { NovelTtsProvider } export { NovelTtsProvider }
@ -14,24 +15,69 @@ class NovelTtsProvider {
audioElement = document.createElement('audio') audioElement = document.createElement('audio')
defaultSettings = { defaultSettings = {
voiceMap: {} voiceMap: {},
customVoices: []
} }
get settingsHtml() { get settingsHtml() {
let html = `Use NovelAI's TTS engine.<br> let html = `
The Voice IDs in the preview list are only examples, as it can be any string of text. Feel free to try different options!<br> <div class="novel_tts_hints">
<small><i>Hint: Save an API key in the NovelAI API settings to use it here.</i></small>`; <div>Use NovelAI's TTS engine.</div>
<div>
The default Voice IDs are only examples. Add custom voices and Novel will create a new random voice for it.
Feel free to try different options!
</div>
<i>Hint: Save an API key in the NovelAI API settings to use it here.</i>
</div>
<label for="tts-novel-custom-voices-add">Custom Voices</label>
<div class="tts_custom_voices">
<select id="tts-novel-custom-voices-select"><select>
<i id="tts-novel-custom-voices-add" class="tts-button fa-solid fa-plus fa-xl success" title="Add"></i>
<i id="tts-novel-custom-voices-delete" class="tts-button fa-solid fa-xmark fa-xl failure" title="Delete"></i>
</div>
`;
return html; return html;
} }
onSettingsChange() {
// Add a new Novel custom voice to provider
async addCustomVoice(){
const voiceName = await callPopup('<h3>Custom Voice name:</h3>', 'input')
this.settings.customVoices.push(voiceName)
this.populateCustomVoices()
initVoiceMap() // Update TTS extension voiceMap
saveTtsProviderSettings()
} }
loadSettings(settings) { // Delete selected custom voice from provider
deleteCustomVoice() {
const selected = $("#tts-novel-custom-voices-select").find(':selected').val();
const voiceIndex = this.settings.customVoices.indexOf(selected);
if (voiceIndex !== -1) {
this.settings.customVoices.splice(voiceIndex, 1);
}
this.populateCustomVoices()
initVoiceMap() // Update TTS extension voiceMap
saveTtsProviderSettings()
}
// Create the UI dropdown list of voices in provider
populateCustomVoices(){
let voiceSelect = $("#tts-novel-custom-voices-select")
voiceSelect.empty()
this.settings.customVoices.forEach(voice => {
voiceSelect.append(`<option>${voice}</option>`)
})
}
async loadSettings(settings) {
// Populate Provider UI given input settings // Populate Provider UI given input settings
if (Object.keys(settings).length == 0) { if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings") console.info("Using default TTS Provider settings")
} }
$("#tts-novel-custom-voices-add").on('click', () => (this.addCustomVoice()))
$("#tts-novel-custom-voices-delete").on('click',() => (this.deleteCustomVoice()))
// Only accept keys defined in defaultSettings // Only accept keys defined in defaultSettings
this.settings = this.defaultSettings this.settings = this.defaultSettings
@ -44,11 +90,18 @@ class NovelTtsProvider {
} }
} }
this.populateCustomVoices()
await this.checkReady()
console.info("Settings loaded") console.info("Settings loaded")
} }
// Perform a simple readiness check by trying to fetch voiceIds
// Doesnt really do much for Novel, not seeing a good way to test this at the moment.
async checkReady(){
await this.fetchTtsVoiceObjects()
}
async onApplyClick() { async onRefreshClick() {
return return
} }
@ -72,8 +125,8 @@ class NovelTtsProvider {
//###########// //###########//
// API CALLS // // API CALLS //
//###########// //###########//
async fetchTtsVoiceIds() { async fetchTtsVoiceObjects() {
const voices = [ let voices = [
{ name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false }, { name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false },
{ name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false }, { name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false },
{ name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false }, { name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false },
@ -89,6 +142,12 @@ class NovelTtsProvider {
{ name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false }, { name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false },
]; ];
// Add in custom voices to the map
let addVoices = this.settings.customVoices.map(voice =>
({ name: voice, voice_id: voice, lang: 'en-US', preview_url: false })
)
voices = voices.concat(addVoices)
return voices; return voices;
} }

View File

@ -0,0 +1,71 @@
# Provider Requirements.
Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate.
### class YourTtsProvider
#### Required
Exported for use in extension index.js, and added to providers list in index.js
1. generateTts(text, voiceId)
2. fetchTtsVoiceObjects()
3. onRefreshClick()
4. checkReady()
5. loadSettings(settingsObject)
6. settings field
7. settingsHtml field
#### Optional
1. previewTtsVoice()
2. separator field
# Requirement Descriptions
### generateTts(text, voiceId)
Must return `audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']`
Must take text to be rendered and the voiceId to identify the voice to be used
### fetchTtsVoiceObjects()
Required.
Used by the TTS extension to get a list of voice objects from the provider.
Must return an list of voice objects representing the available voices.
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
3. preview_url: a URL to a local audio file that will be used to sample voices
4. lang: OPTIONAL language string
### getVoice(voiceName)
Required.
Must return a single voice object matching the provided voiceName. The voice object must have the following at least:
1. name: a friendly user facing name to assign to characters. Shows in dropdown list next to user.
2. voice_id: the provider specific id of the voice used in fetchTtsGeneration() call
3. preview_url: a URL to a local audio file that will be used to sample voices
4. lang: OPTIONAL language indicator
### onRefreshClick()
Required.
Users click this button to reconnect/reinit the selected provider.
Responds to the user clicking the refresh button, which is intended to re-initialize the Provider into a working state, like retrying connections or checking if everything is loaded.
### checkReady()
Required.
Return without error to let TTS extension know that the provider is ready.
Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly.
### loadSettings(settingsObject)
Required.
Handle the input settings from the TTS extension on provider load.
Put code in here to load your provider settings.
### settings field
Required, used for storing any provider state that needs to be saved.
Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`.
TTS extension doesn't expect any specific contents.
### settingsHtml field
Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly.
### previewTtsVoice()
Optional.
Function to handle playing previews of voice samples if no direct preview_url is available in fetchTtsVoiceObjects() response
### separator field
Optional.
Used when narrate quoted text is enabled.
Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...`

View File

@ -1,4 +1,5 @@
import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js" import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
import { saveTtsProviderSettings } from "./index.js"
export { SileroTtsProvider } export { SileroTtsProvider }
@ -8,6 +9,7 @@ class SileroTtsProvider {
//########// //########//
settings settings
ready = false
voices = [] voices = []
separator = ' .. ' separator = ' .. '
@ -29,9 +31,10 @@ class SileroTtsProvider {
onSettingsChange() { onSettingsChange() {
// Used when provider settings are updated from UI // Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#silero_tts_endpoint').val() this.settings.provider_endpoint = $('#silero_tts_endpoint').val()
saveTtsProviderSettings()
} }
loadSettings(settings) { async loadSettings(settings) {
// Pupulate Provider UI given input settings // Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) { if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings") console.info("Using default TTS Provider settings")
@ -60,11 +63,19 @@ class SileroTtsProvider {
}, 2000); }, 2000);
$('#silero_tts_endpoint').val(this.settings.provider_endpoint) $('#silero_tts_endpoint').val(this.settings.provider_endpoint)
$('#silero_tts_endpoint').on("input", () => {this.onSettingsChange()})
await this.checkReady()
console.info("Settings loaded") console.info("Settings loaded")
} }
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady(){
await this.fetchTtsVoiceObjects()
}
async onApplyClick() { async onRefreshClick() {
return return
} }
@ -74,7 +85,7 @@ class SileroTtsProvider {
async getVoice(voiceName) { async getVoice(voiceName) {
if (this.voices.length == 0) { if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceIds() this.voices = await this.fetchTtsVoiceObjects()
} }
const match = this.voices.filter( const match = this.voices.filter(
sileroVoice => sileroVoice.name == voiceName sileroVoice => sileroVoice.name == voiceName
@ -93,7 +104,7 @@ class SileroTtsProvider {
//###########// //###########//
// API CALLS // // API CALLS //
//###########// //###########//
async fetchTtsVoiceIds() { async fetchTtsVoiceObjects() {
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`) const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`)
if (!response.ok) { if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`) throw new Error(`HTTP ${response.status}: ${await response.json()}`)

View File

@ -50,4 +50,41 @@
.voice_preview .fa-play { .voice_preview .fa-play {
cursor: pointer; cursor: pointer;
} }
.tts-button {
margin: 0;
outline: none;
border: none;
cursor: pointer;
transition: 0.3s;
opacity: 0.7;
align-items: center;
justify-content: center;
}
.tts-button:hover {
opacity: 1;
}
.tts_block {
display: flex;
align-items: baseline;
column-gap: 5px;
flex-wrap: wrap;
}
.tts_custom_voices {
display: flex;
align-items: baseline;
gap: 5px;
}
.novel_tts_hints {
font-size: calc(0.9 * var(--mainFontSize));
display: flex;
flex-direction: column;
gap: 5px;
margin-bottom: 5px;
}

View File

@ -1,7 +1,7 @@
import { isMobile } from "../../RossAscends-mods.js"; import { isMobile } from "../../RossAscends-mods.js";
import { getPreviewString } from "./index.js"; import { getPreviewString } from "./index.js";
import { talkingAnimation } from './index.js'; import { talkingAnimation } from './index.js';
import { saveTtsProviderSettings } from "./index.js"
export { SystemTtsProvider } export { SystemTtsProvider }
/** /**
@ -80,6 +80,7 @@ class SystemTtsProvider {
//########// //########//
settings settings
ready = false
voices = [] voices = []
separator = ' ... ' separator = ' ... '
@ -106,10 +107,10 @@ class SystemTtsProvider {
this.settings.pitch = Number($('#system_tts_pitch').val()); this.settings.pitch = Number($('#system_tts_pitch').val());
$('#system_tts_pitch_output').text(this.settings.pitch); $('#system_tts_pitch_output').text(this.settings.pitch);
$('#system_tts_rate_output').text(this.settings.rate); $('#system_tts_rate_output').text(this.settings.rate);
console.log('Save changes'); saveTtsProviderSettings()
} }
loadSettings(settings) { async loadSettings(settings) {
// Populate Provider UI given input settings // Populate Provider UI given input settings
if (Object.keys(settings).length == 0) { if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings"); console.info("Using default TTS Provider settings");
@ -143,19 +144,29 @@ class SystemTtsProvider {
$('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate); $('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
$('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch); $('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
// Trigger updates
$('#system_tts_rate').on("input", () =>{this.onSettingsChange()})
$('#system_tts_rate').on("input", () => {this.onSettingsChange()})
$('#system_tts_pitch_output').text(this.settings.pitch); $('#system_tts_pitch_output').text(this.settings.pitch);
$('#system_tts_rate_output').text(this.settings.rate); $('#system_tts_rate_output').text(this.settings.rate);
console.info("Settings loaded"); console.info("Settings loaded");
} }
async onApplyClick() { // Perform a simple readiness check by trying to fetch voiceIds
async checkReady(){
await this.fetchTtsVoiceObjects()
}
async onRefreshClick() {
return return
} }
//#################// //#################//
// TTS Interfaces // // TTS Interfaces //
//#################// //#################//
fetchTtsVoiceIds() { fetchTtsVoiceObjects() {
if (!('speechSynthesis' in window)) { if (!('speechSynthesis' in window)) {
return []; return [];
} }

View File

@ -4894,7 +4894,7 @@ async function readAllChunks(readableStream) {
}); });
readableStream.on('end', () => { readableStream.on('end', () => {
console.log('Finished reading the stream.'); //console.log('Finished reading the stream.');
resolve(chunks); resolve(chunks);
}); });