SillyTavern/public/scripts/extensions/tts/coquitts.js

400 lines
15 KiB
JavaScript
Raw Normal View History

2023-07-26 21:57:05 +02:00
import { eventSource, event_types } from "../../../script.js"
2023-07-25 02:59:08 +02:00
import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
export { CoquiTtsProvider }
2023-07-26 21:57:05 +02:00
function throwIfModuleMissing() {
if (!modules.includes('coqui-tts')) {
toastr.error(`Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.`)
throw new Error(`Coqui TTS module not loaded.`)
}
}
2023-07-25 02:59:08 +02:00
class CoquiTtsProvider {
//########//
// Config //
//########//
settings
voices = []
separator = ' .. '
defaultSettings = {
voiceMap: {}
}
get settingsHtml() {
let html = `
2023-07-26 21:57:05 +02:00
<div class="flex wide100p flexGap10 alignitemscenter">
<div style="flex: 80%;">
<label for="coqui_model">Model:</label>
<select id="coqui_model">
<option value="none">Select Model</option>
<!-- Add more model options here -->
</select>
</div>
<div class="flex justifyCenter" style="flex: 20%;">
<button id="coqui_preview" class="menu_button menu_button_icon wide100p" type="button">
</button>
</div>
2023-07-25 02:59:08 +02:00
</div>
2023-07-26 21:57:05 +02:00
<div class="flex wide100p flexGap10">
<div class="flex1">
<label for="coqui_speaker">Speaker:</label>
<select id="coqui_speaker">
<!-- Add more speaker options here -->
</select>
</div>
<div class="flex1">
<label for="coqui_language">Language:</label>
<select id="coqui_language">
<!-- Add more language options here -->
</select>
</div>
</div>
2023-07-25 02:59:08 +02:00
`
return html
}
onSettingsChange() {
}
loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings")
}
2023-07-26 21:57:05 +02:00
const modelSelect = document.getElementById('coqui_model');
const previewButton = document.getElementById('coqui_preview');
2023-07-25 02:59:08 +02:00
previewButton.addEventListener('click', () => {
const selectedModel = modelSelect.value;
this.sampleTtsVoice(selectedModel);
});//add event listener to button
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
previewButton.disabled = true;
previewButton.innerText = "Select Model";
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings
2023-07-26 21:57:05 +02:00
for (const key in settings) {
if (key in this.settings) {
2023-07-25 02:59:08 +02:00
this.settings[key] = settings[key]
} else {
throw `Invalid setting passed to TTS Provider: ${key}`
}
}
const textexample = document.getElementById('tts_voice_map');
textexample.placeholder = 'Enter comma separated map of charName:ttsName[speakerID][langID]. Example: \nAqua:tts_models--en--ljspeech--glow-tts\model_file.pth,\nDarkness:tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][3]';
//Load models function
2023-07-26 21:57:05 +02:00
eventSource.on(event_types.EXTRAS_CONNECTED, () => {
this.getModels();
});
2023-07-25 02:59:08 +02:00
this.onttsCoquiHideButtons();
console.info("Settings loaded")
}
2023-07-26 21:57:05 +02:00
async onttsCoquiHideButtons() {
2023-07-25 02:59:08 +02:00
// Get references to the select element and the two input elements
const ttsProviderSelect = document.getElementById('tts_provider');
const ttsVoicesInput = document.getElementById('tts_voices');
const ttsPreviewInput = document.getElementById('tts_preview');
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
// Add an event listener to the 'change' event of the tts_provider select element
ttsProviderSelect.addEventListener('change', () => {
2023-07-26 21:57:05 +02:00
// Check if the selected value is 'Coqui'
if (ttsProviderSelect.value === 'Coqui') {
ttsVoicesInput.style.display = 'none'; // Hide the tts_voices input
ttsPreviewInput.style.display = ''; // Show the tts_preview input
} else {
ttsVoicesInput.style.display = ''; // Show the tts_voices input
ttsPreviewInput.style.display = 'none'; // Hide the tts_preview input
}
2023-07-25 02:59:08 +02:00
});
2023-07-26 21:57:05 +02:00
}
2023-07-25 02:59:08 +02:00
async onApplyClick() {
return
}
async getLang() {
try {
2023-07-26 21:57:05 +02:00
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/multlang`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceData = await response.json();
const languageSelect = document.getElementById('coqui_language');
languageSelect.innerHTML = ''; // Clear existing options
if (Object.keys(voiceData).length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
languageSelect.appendChild(option);
} else {
for (const [key, value] of Object.entries(voiceData)) {
const option = document.createElement('option');
option.value = key;
option.textContent = key + ": " + value;
languageSelect.appendChild(option);
}
}
} catch (error) {
//console.error('Error fetching voice data:', error);
// Remove all options except "None"
const languageSelect = document.getElementById('coqui_language');
languageSelect.innerHTML = '';
2023-07-25 02:59:08 +02:00
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
2023-07-26 21:57:05 +02:00
languageSelect.appendChild(option);
}
}
async getSpeakers() {
try {
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/multspeaker`);
2023-07-25 02:59:08 +02:00
if (!response.ok) {
2023-07-26 21:57:05 +02:00
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
2023-07-25 02:59:08 +02:00
}
const voiceData = await response.json();
2023-07-26 21:57:05 +02:00
const speakerSelect = document.getElementById('coqui_speaker');
speakerSelect.innerHTML = ''; // Clear existing options
2023-07-25 02:59:08 +02:00
if (Object.keys(voiceData).length === 0) {
const option = document.createElement('option');
2023-07-26 21:57:05 +02:00
option.value = 'none';
option.textContent = 'None';
speakerSelect.appendChild(option);
} else {
for (const [index, name] of Object.entries(voiceData)) {
const option = document.createElement('option');
option.value = index;
option.textContent = index + ": " + name;
speakerSelect.appendChild(option);
}
2023-07-25 02:59:08 +02:00
}
2023-07-26 21:57:05 +02:00
} catch (error) {
2023-07-25 02:59:08 +02:00
//console.error('Error fetching voice data:', error);
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
// Remove all options except "None"
2023-07-26 21:57:05 +02:00
const speakerSelect = document.getElementById('coqui_speaker');
speakerSelect.innerHTML = '';
2023-07-25 02:59:08 +02:00
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
2023-07-26 21:57:05 +02:00
speakerSelect.appendChild(option);
}
}
async getModels() {
2023-07-25 02:59:08 +02:00
try {
2023-07-26 21:57:05 +02:00
throwIfModuleMissing();
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/list`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceIds = await response.json();
const modelSelect = document.getElementById('coqui_model');
if (voiceIds.length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'Select Model';
modelSelect.appendChild(option);
} else {
voiceIds.forEach(voiceId => {
const option = document.createElement('option');
option.value = voiceId;
option.textContent = voiceId;
modelSelect.appendChild(option);
});
}
// Update provider endpoint on model selection change
modelSelect.addEventListener('change', () => {
const selectedModel = modelSelect.value;
this.LoadModel(selectedModel);
});
} catch (error) {
console.error('Error fetching voice IDs:', error);
// Add "None" option when the request fails or the response is empty
const modelSelect = document.getElementById('coqui_model');
2023-07-25 02:59:08 +02:00
const option = document.createElement('option');
option.value = 'none';
2023-07-26 21:57:05 +02:00
option.textContent = 'None';
2023-07-25 02:59:08 +02:00
modelSelect.appendChild(option);
}
2023-07-26 21:57:05 +02:00
}
2023-07-25 02:59:08 +02:00
2023-07-26 21:57:05 +02:00
async LoadModel(selectedModel) {
const previewButton = document.getElementById('coqui_preview');
2023-07-25 02:59:08 +02:00
previewButton.disabled = true;
previewButton.innerText = "Loading";
try {
2023-07-26 21:57:05 +02:00
throwIfModuleMissing();
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/load?_model=${selectedModel}`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
this.getSpeakers();
this.getLang();
2023-07-25 02:59:08 +02:00
2023-07-26 21:57:05 +02:00
const previewButton = document.getElementById('coqui_preview');
previewButton.disabled = false;
previewButton.innerText = "Play";
2023-07-25 02:59:08 +02:00
} catch (error) {
2023-07-26 21:57:05 +02:00
console.error('Error updating provider endpoint:', error);
2023-07-25 02:59:08 +02:00
}
2023-07-26 21:57:05 +02:00
}
2023-07-25 02:59:08 +02:00
2023-07-26 21:57:05 +02:00
async getVoice(voiceName) {
2023-07-25 02:59:08 +02:00
//tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][1]
//tts_models--en--ljspeech--glow-tts\model_file.pth
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
let _voiceNameOrg = voiceName; // Store the original voiceName in a variable _voiceNameOrg
voiceName = voiceName.replace(/(\[\d+\])+$/, ''); // For example, converts 'model[2][1]' to 'model'
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
this.voices = []; //reset for follow up runs
if (this.voices.length === 0) { this.voices = await this.fetchCheckMap(); }
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
// Search for a voice object in the 'this.voices' array where the 'name' property matches the provided 'voiceName'
2023-07-26 21:57:05 +02:00
2023-07-25 02:59:08 +02:00
//const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
// If no match is found, throw an error indicating that the TTS Voice name was not found
if (!match) {
2023-07-26 21:57:05 +02:00
throw new Error(`TTS Voice name ${voiceName} not found`);
2023-07-25 02:59:08 +02:00
} else {
2023-07-26 21:57:05 +02:00
match.name = _voiceNameOrg;
match.voice_id = _voiceNameOrg;
2023-07-25 02:59:08 +02:00
}
// Return the matched voice object (with the 'name' property updated if a match was found)
return match;
2023-07-26 21:57:05 +02:00
}
2023-07-25 02:59:08 +02:00
async fetchCheckMap() {
2023-07-26 21:57:05 +02:00
const endpoint = `${getApiUrl()}/api/coqui-tts/checkmap`;
const response = await doExtrasFetch(endpoint);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const voiceData = await response.json();
const voices = voiceData.map((voice) => ({
id: voice.name,
name: voice.id, // this is the issue!!!
voice_id: voice.id, // this is the issue!!!
//preview_url: false,
lang: voice.lang,
}));
return voices;
2023-07-25 02:59:08 +02:00
}
2023-07-26 21:57:05 +02:00
async fetchTtsVoiceIds() {
throwIfModuleMissing();
const endpoint = `${getApiUrl()}/api/coqui-tts/speaker_id`;
const response = await doExtrasFetch(endpoint);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
2023-07-25 02:59:08 +02:00
}
2023-07-26 21:57:05 +02:00
const voiceData = await response.json();
const voices = voiceData.map((voice) => ({
id: voice.name,
name: voice.id, //add filename here
voice_id: voice.id,
//preview_url: false,
//preview_url: `${getApiUrl()}/api/coqui-tts/download?model=${voice.id}`,
//http://localhost:5100/api/coqui-tts/speaker_id/tts_models/en/ljspeech/speedy-speech
lang: voice.lang,
}));
return voices;
}
sampleTtsVoice(voiceId) {
// Get the selected values of speaker and language
const speakerSelect = document.getElementById('coqui_speaker');
const languageSelect = document.getElementById('coqui_language');
const selectedSpeaker = speakerSelect.value;
const selectedLanguage = languageSelect.value;
// Construct the URL with the selected values
const url = `${getApiUrl()}/api/coqui-tts/tts?text=The%20Quick%20Brown%20Fox%20Jumps%20Over%20the%20Lazy%20Dog.&speaker_id=${voiceId}&style_wav=&language_id=${selectedLanguage}&mspker=${selectedSpeaker}`;
doExtrasFetch(url)
.then(response => response.blob())
.then(blob => {
const audioUrl = URL.createObjectURL(blob);
// Play the audio
const audio = new Audio(audioUrl);
audio.play();
})
.catch(error => {
console.error('Error performing TTS request:', error);
});
}
previewTtsVoice(voiceId) { //button on avail voices
throwIfModuleMissing();
const url = `${getApiUrl()}/api/coqui-tts/download?model=${voiceId}`;
doExtrasFetch(url)
.then(response => response.text()) // Expecting a text response
.then(responseText => {
const isResponseTrue = responseText.trim().toLowerCase() === 'true';
if (isResponseTrue) {
console.log("Downloading Model") //if true
} else {
console.error('Already Installed'); //if false
}
})
.catch(error => {
console.error('Error performing download:', error);
});
}
async generateTts(text, voiceId) {
2023-07-25 02:59:08 +02:00
const response = await this.fetchTtsGeneration(text, voiceId)
return response
}
2023-07-26 21:57:05 +02:00
async fetchTtsGeneration(inputText, voiceId) {
throwIfModuleMissing();
console.info(`Generating new TTS for voice_id ${voiceId}`);
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/tts?text=${encodeURIComponent(inputText)}&speaker_id=${voiceId}`);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
2023-07-25 02:59:08 +02:00
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response
}
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}