Coqui TTS Addition

2025-06-05 21:59:27 +02:00 · 2023-07-25 09:59:08 +09:00
parent c4209b9448
commit ce3d76d662
2 changed files with 407 additions and 0 deletions
--- a/public/scripts/extensions/tts/coquitts.js
+++ b/public/scripts/extensions/tts/coquitts.js
@@ -0,0 +1,405 @@
 import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
 export { CoquiTtsProvider }
 class CoquiTtsProvider {
    //########//
    // Config //
    //########//
    settings
    voices = []
    separator = ' .. '
    defaultSettings = {
        provider_endpoint: "http://localhost:5100",
        voiceMap: {}
    }
    get settingsHtml() {
        let html = `
        <div style="display: flex; width: 100%;">
        <div style="flex: 80%;">
          <label for="model">Model:</label>
          <select id="model">
            <option value="none">Select Model</option>
            <!-- Add more model options here -->
          </select>
        </div>
        <div style="flex: 20%; display: flex; justify-content: center;">
            <button id="preview" class="menu_button" type="button" style="width: 100%;">Play</button>
        </div>
      </div>
        <div style="display: flex; width: 100%;">
        <div style="flex: 1; margin-right: 10px;">
            <label for="speaker">Speaker:</label>
            <select id="speaker">
                <!-- Add more speaker options here -->
            </select>
        </div>
        <div style="flex: 1;">
            <label for="language">Language:</label>
            <select id="language">
                <!-- Add more language options here -->
            </select>
        </div>
    </div>
        <label for="Coqui_tts_endpoint">Provider Endpoint:</label>
        <input id="Coqui_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
        `
        return html
    }
    onSettingsChange() {
        // Used when provider settings are updated from UI
        this.settings.provider_endpoint = $('#Coqui_tts_endpoint').val()
    }
    loadSettings(settings) {
        // Pupulate Provider UI given input settings
        if (Object.keys(settings).length == 0) {
            console.info("Using default TTS Provider settings")
        }
        const modelSelect = document.getElementById('model');
        const previewButton = document.getElementById('preview');
        previewButton.addEventListener('click', () => {
            const selectedModel = modelSelect.value;
            this.sampleTtsVoice(selectedModel);
        });//add event listener to button
        previewButton.disabled = true;
        previewButton.innerText = "Select Model";
        // Only accept keys defined in defaultSettings
        this.settings = this.defaultSettings
        for (const key in settings){
            if (key in this.settings){
                this.settings[key] = settings[key]
            } else {
                throw `Invalid setting passed to TTS Provider: ${key}`
            }
        }
        const apiCheckInterval = setInterval(() => {
            // Use Extras API if TTS support is enabled
            if (modules.includes('tts') || modules.includes('Coqui-tts')) {
                const baseUrl = new URL(getApiUrl());
                baseUrl.pathname = '/api/coqui-tts/coqui-tts';
                this.settings.provider_endpoint = baseUrl.toString();
                $('#Coqui_tts_endpoint').val(this.settings.provider_endpoint);
                clearInterval(apiCheckInterval);
            }
        }, 2000);
        $('#Coqui_tts_endpoint').val(this.settings.provider_endpoint)
        const textexample = document.getElementById('tts_voice_map');
        textexample.placeholder = 'Enter comma separated map of charName:ttsName[speakerID][langID]. Example: \nAqua:tts_models--en--ljspeech--glow-tts\model_file.pth,\nDarkness:tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][3]';
        //Load models function
        this.getModels();
        this.onttsCoquiHideButtons();
        console.info("Settings loaded")
    }
    async onttsCoquiHideButtons(){
        // Get references to the select element and the two input elements
        const ttsProviderSelect = document.getElementById('tts_provider');
        const ttsVoicesInput = document.getElementById('tts_voices');
        const ttsPreviewInput = document.getElementById('tts_preview');
        // Add an event listener to the 'change' event of the tts_provider select element
        ttsProviderSelect.addEventListener('change', () => {
        // Check if the selected value is 'Coqui'
        if (ttsProviderSelect.value === 'Coqui') {
            ttsVoicesInput.style.display = 'none'; // Hide the tts_voices input
            ttsPreviewInput.style.display = ''; // Show the tts_preview input
        } else {
            ttsVoicesInput.style.display = ''; // Show the tts_voices input
            ttsPreviewInput.style.display = 'none'; // Hide the tts_preview input
        }
        });
 }
    async onApplyClick() {
        return
    }
    async getLang() {
        try {
          const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/multlang`);
          if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
          }
          const voiceData = await response.json();
          const modelSelect = document.getElementById('language');
          modelSelect.innerHTML = ''; // Clear existing options
          if (Object.keys(voiceData).length === 0) {
            const option = document.createElement('option');
            option.value = 'none';
            option.textContent = 'None';
            modelSelect.appendChild(option);
          } else {
            for (const [key, value] of Object.entries(voiceData)) {
              const option = document.createElement('option');
              option.value = key;
              option.textContent = key + ": " + value;
              modelSelect.appendChild(option);
            }
          }
        } catch (error) {
          //console.error('Error fetching voice data:', error);
          // Remove all options except "None"
          const modelSelect = document.getElementById('language');
          modelSelect.innerHTML = '';
          const option = document.createElement('option');
          option.value = 'none';
          option.textContent = 'None';
          modelSelect.appendChild(option);
        } 
      }
      async getSpeakers() {
          try {
            const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/multspeaker`);
            if (!response.ok) {
              throw new Error(`HTTP ${response.status}: ${response.statusText}`);
            }
            const voiceData = await response.json();
            const modelSelect = document.getElementById('speaker');
            modelSelect.innerHTML = ''; // Clear existing options
            if (Object.keys(voiceData).length === 0) {
              const option = document.createElement('option');
              option.value = 'none';
              option.textContent = 'None';
              modelSelect.appendChild(option);
            } else {
              for (const [index, name] of Object.entries(voiceData)) {
                const option = document.createElement('option');
                option.value = index;
                option.textContent = index + ": " + name;
                modelSelect.appendChild(option);
              }
            }
          } catch (error) {
            //console.error('Error fetching voice data:', error);
            // Remove all options except "None"
            const modelSelect = document.getElementById('speaker');
            modelSelect.innerHTML = '';
            const option = document.createElement('option');
            option.value = 'none';
            option.textContent = 'None';
            modelSelect.appendChild(option);
          }           
      }
      async getModels() {
        try {
          const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/list`);
          if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
          }
          const voiceIds = await response.json();
          const modelSelect = document.getElementById('model');
          if (voiceIds.length === 0) {
            const option = document.createElement('option');
            option.value = 'none';
            option.textContent = 'Select Model';
            modelSelect.appendChild(option);
          } else {
            voiceIds.forEach(voiceId => {
              const option = document.createElement('option');
              option.value = voiceId;
              option.textContent = voiceId;
              modelSelect.appendChild(option);
            });
          }
          // Update provider endpoint on model selection change
          modelSelect.addEventListener('change', () => {
            const selectedModel = modelSelect.value;
            this.LoadModel(selectedModel);
          });
        } catch (error) {
          console.error('Error fetching voice IDs:', error);
          // Add "None" option when the request fails or the response is empty
          const modelSelect = document.getElementById('model');
          const option = document.createElement('option');
          option.value = 'none';
          option.textContent = 'None';
          modelSelect.appendChild(option);
        }
      }
      async LoadModel(selectedModel) {
        const previewButton = document.getElementById('preview');
        previewButton.disabled = true;
        previewButton.innerText = "Loading";
        try {
          const response = await fetch(`${this.defaultSettings.provider_endpoint}/api/coqui-tts/load?_model=${selectedModel}`);
          if (!response.ok) {
            throw new Error(`HTTP ${response.status}: ${response.statusText}`);
          }
          this.getSpeakers();
          this.getLang();
          const previewButton = document.getElementById('preview');
          previewButton.disabled = false;
          previewButton.innerText = "Play";
        } catch (error) {
          console.error('Error updating provider endpoint:', error);
        }
      }
      async getVoice(voiceName) {
        //tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][1]
        //tts_models--en--ljspeech--glow-tts\model_file.pth
        let _voiceNameOrg = voiceName; // Store the original voiceName in a variable _voiceNameOrg
        voiceName = voiceName.replace(/(\[\d+\])+$/, ''); // For example, converts 'model[2][1]' to 'model'
        this.voices = []; //reset for follow up runs
        if (this.voices.length === 0) { this.voices = await this.fetchCheckMap(); }
        // Search for a voice object in the 'this.voices' array where the 'name' property matches the provided 'voiceName'
        //const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
        const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
        // If no match is found, throw an error indicating that the TTS Voice name was not found
        if (!match) {
          throw new Error(`TTS Voice name ${voiceName} not found`);
        } else {
          match.name = _voiceNameOrg;
          match.voice_id = _voiceNameOrg;
        }
        // Return the matched voice object (with the 'name' property updated if a match was found)
        return match;
      }
    async fetchCheckMap() {
      const endpoint = `${this.settings.provider_endpoint}/api/coqui-tts/checkmap`;
      const response = await fetch(endpoint);
      if (!response.ok) {
      throw new Error(`HTTP ${response.status}: ${await response.json()}`);
      }
      const voiceData = await response.json();
      const voices = voiceData.map((voice) => ({
      id: voice.name,
      name: voice.id, // this is the issue!!!
      voice_id: voice.id, // this is the issue!!!
      //preview_url: false, 
      lang: voice.lang,
      }));
      return voices;
    }
 	async fetchTtsVoiceIds() {
 	  const endpoint = `${this.settings.provider_endpoint}/api/coqui-tts/speaker_id`;
 	  const response = await fetch(endpoint);
 	  if (!response.ok) {
 		throw new Error(`HTTP ${response.status}: ${await response.json()}`);
 	  }
 	  const voiceData = await response.json();
 	  const voices = voiceData.map((voice) => ({
 		id: voice.name,
 		name: voice.id, //add filename here
 		voice_id: voice.id, 
    //preview_url: false,
    //preview_url: `${this.settings.provider_endpoint}/api/coqui-tts/download?model=${voice.id}`, 
    //http://localhost:5100/api/coqui-tts/speaker_id/tts_models/en/ljspeech/speedy-speech
 		lang: voice.lang,
 	  }));
 	  return voices;
 	}
  sampleTtsVoice(voiceId) {
    // Get the selected values of speaker and language
    const speakerSelect = document.getElementById('speaker');
    const languageSelect = document.getElementById('language');
    const selectedSpeaker = speakerSelect.value;
    const selectedLanguage = languageSelect.value;
    // Construct the URL with the selected values
    const url = `${this.settings.provider_endpoint}/api/coqui-tts/tts?text=The%20Quick%20Brown%20Fox%20Jumps%20Over%20the%20Lazy%20Dog.&speaker_id=${voiceId}&style_wav=&language_id=${selectedLanguage}&mspker=${selectedSpeaker}`;
    fetch(url)
      .then(response => response.blob())
      .then(blob => {
        const audioUrl = URL.createObjectURL(blob);
        // Play the audio
        const audio = new Audio(audioUrl);
        audio.play();
      })
      .catch(error => {
        console.error('Error performing TTS request:', error);
      });
  }
  previewTtsVoice(voiceId) { //button on avail voices
    const url = `${this.settings.provider_endpoint}/api/coqui-tts/download?model=${voiceId}`;
    fetch(url)
      .then(response => response.text()) // Expecting a text response
      .then(responseText => {
        const isResponseTrue = responseText.trim().toLowerCase() === 'true';
        if (isResponseTrue) {
          console.log("Downloading Model") //if true
        } else {
          console.error('Already Installed'); //if false
        }
      })
      .catch(error => {
        console.error('Error performing download:', error);
      });
  }
 	async generateTts(text, voiceId){
        const response = await this.fetchTtsGeneration(text, voiceId)
        return response
    }
 	async fetchTtsGeneration(inputText, voiceId) {
    console.info(`Generating new TTS for voice_id ${voiceId}`);
    const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/tts?text=${encodeURIComponent(inputText)}&speaker_id=${voiceId}`);
    if (!response.ok) {
        toastr.error(response.statusText, 'TTS Generation Failed');
        throw new Error(`HTTP ${response.status}: ${await response.text()}`);
    }
        if (!response.ok) {
            toastr.error(response.statusText, 'TTS Generation Failed');
            throw new Error(`HTTP ${response.status}: ${await response.text()}`);
        }
        return response
    }
    async fetchTtsFromHistory(history_item_id) {
        return Promise.resolve(history_item_id);
    }
 }
--- a/public/scripts/extensions/tts/index.js
+++ b/public/scripts/extensions/tts/index.js
@@ -4,6 +4,7 @@ import { escapeRegex, getStringHash } from '../../utils.js'
 import { EdgeTtsProvider } from './edge.js'
 import { ElevenLabsTtsProvider } from './elevenlabs.js'
 import { SileroTtsProvider } from './silerotts.js'
 import { CoquiTtsProvider } from './coquitts.js'
 import { SystemTtsProvider } from './system.js'
 import { NovelTtsProvider } from './novel.js'
 import { isMobile } from '../../RossAscends-mods.js'
@@ -64,6 +65,7 @@ let ttsProviders = {
    ElevenLabs: ElevenLabsTtsProvider,
    Silero: SileroTtsProvider,
    System: SystemTtsProvider,
    Coqui: CoquiTtsProvider,
    Edge: EdgeTtsProvider,
    Novel: NovelTtsProvider,
 }