mirror of
				https://github.com/SillyTavern/SillyTavern.git
				synced 2025-06-05 21:59:27 +02:00 
			
		
		
		
	Add system TTS provider to the extension
This commit is contained in:
		@@ -3,6 +3,7 @@ import { extension_settings, getContext } from '../../extensions.js'
 | 
			
		||||
import { getStringHash } from '../../utils.js'
 | 
			
		||||
import { ElevenLabsTtsProvider } from './elevenlabs.js'
 | 
			
		||||
import { SileroTtsProvider } from './silerotts.js'
 | 
			
		||||
import { SystemTtsProvider } from './system.js'
 | 
			
		||||
 | 
			
		||||
const UPDATE_INTERVAL = 1000
 | 
			
		||||
 | 
			
		||||
@@ -17,7 +18,8 @@ let lastMessageHash = null
 | 
			
		||||
 | 
			
		||||
let ttsProviders = {
 | 
			
		||||
    ElevenLabs: ElevenLabsTtsProvider,
 | 
			
		||||
    Silero: SileroTtsProvider
 | 
			
		||||
    Silero: SileroTtsProvider,
 | 
			
		||||
    System: SystemTtsProvider,
 | 
			
		||||
}
 | 
			
		||||
let ttsProvider
 | 
			
		||||
let ttsProviderName
 | 
			
		||||
@@ -112,7 +114,13 @@ async function playAudioData(audioBlob) {
 | 
			
		||||
 | 
			
		||||
window['tts_preview'] = function (id) {
 | 
			
		||||
    const audio = document.getElementById(id)
 | 
			
		||||
    audio.play()
 | 
			
		||||
 | 
			
		||||
    if (!audio.hidden) {
 | 
			
		||||
        audio.play()
 | 
			
		||||
    }
 | 
			
		||||
    else {
 | 
			
		||||
        ttsProvider.previewTtsVoice(id)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onTtsVoicesClick() {
 | 
			
		||||
@@ -123,7 +131,7 @@ async function onTtsVoicesClick() {
 | 
			
		||||
 | 
			
		||||
        for (const voice of voiceIds) {
 | 
			
		||||
            popupText += `<div class="voice_preview"><b>${voice.name}</b> <i onclick="tts_preview('${voice.voice_id}')" class="fa-solid fa-play"></i></div>`
 | 
			
		||||
            popupText += `<audio id="${voice.voice_id}" src="${voice.preview_url}"></audio>`
 | 
			
		||||
            popupText += `<audio id="${voice.voice_id}" src="${voice.preview_url}" hidden="${!!voice.preview_url}"></audio>`
 | 
			
		||||
        }
 | 
			
		||||
    } catch {
 | 
			
		||||
        popupText = 'Could not load voices list. Check your API key.'
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										140
									
								
								public/scripts/extensions/tts/system.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								public/scripts/extensions/tts/system.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,140 @@
 | 
			
		||||
export { SystemTtsProvider }
 | 
			
		||||
 | 
			
		||||
class SystemTtsProvider {
 | 
			
		||||
    //########//
 | 
			
		||||
    // Config //
 | 
			
		||||
    //########//
 | 
			
		||||
 | 
			
		||||
    previewStrings = {
 | 
			
		||||
        'en-US': 'The quick brown fox jumps over the lazy dog',
 | 
			
		||||
        'en-GB': 'Sphinx of black quartz, judge my vow',
 | 
			
		||||
        'fr-FR': 'Portez ce vieux whisky au juge blond qui fume',
 | 
			
		||||
        'de-DE': 'Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich',
 | 
			
		||||
        'it-IT': "Pranzo d'acqua fa volti sghembi",
 | 
			
		||||
        'es-ES': 'Quiere la boca exhausta vid, kiwi, piña y fugaz jamón',
 | 
			
		||||
        'es-MX': 'Fabio me exige, sin tapujos, que añada cerveza al whisky',
 | 
			
		||||
        'ru-RU': 'В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!',
 | 
			
		||||
        'pt-BR': 'Vejo xá gritando que fez show sem playback.',
 | 
			
		||||
        'pt-PR': 'Todo pajé vulgar faz boquinha sexy com kiwi.',
 | 
			
		||||
        'uk-UA': "Фабрикуймо гідність, лящім їжею, ґав хапаймо, з'єднавці чаш!",
 | 
			
		||||
    }
 | 
			
		||||
    fallbackPreview = 'Neque porro quisquam est qui dolorem ipsum quia dolor sit amet'
 | 
			
		||||
    settings
 | 
			
		||||
    voices = []
 | 
			
		||||
 | 
			
		||||
    defaultSettings = {
 | 
			
		||||
        voiceMap: {},
 | 
			
		||||
        rate: 1,
 | 
			
		||||
        pitch: 1,
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    get settingsHtml() {
 | 
			
		||||
        if (!window.speechSynthesis) {
 | 
			
		||||
            return "Your browser or operating system doesn't support speech synthesis";
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return `<p>Uses the voices provided by your operating system</p>
 | 
			
		||||
        <label for="system_tts_rate">Rate: <span id="system_tts_rate_output"></span></label>
 | 
			
		||||
        <input id="system_tts_rate" type="range" value="${this.defaultSettings.rate}" min="0.5" max="2" step="0.1" />
 | 
			
		||||
        <label for="system_tts_pitch">Pitch: <span id="system_tts_pitch_output"></span></label>
 | 
			
		||||
        <input id="system_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0" max="2" step="0.1" />`;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    onSettingsChange() {
 | 
			
		||||
        this.settings.rate = Number($('#system_tts_rate').val());
 | 
			
		||||
        this.settings.pitch = Number($('#system_tts_pitch').val());
 | 
			
		||||
        $('#system_tts_pitch_output').text(this.settings.pitch);
 | 
			
		||||
        $('#system_tts_rate_output').text(this.settings.rate);
 | 
			
		||||
        console.log('Save changes');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    loadSettings(settings) {
 | 
			
		||||
        // Populate Provider UI given input settings
 | 
			
		||||
        if (Object.keys(settings).length == 0) {
 | 
			
		||||
            console.info("Using default TTS Provider settings");
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Only accept keys defined in defaultSettings
 | 
			
		||||
        this.settings = this.defaultSettings;
 | 
			
		||||
 | 
			
		||||
        for (const key in settings) {
 | 
			
		||||
            if (key in this.settings) {
 | 
			
		||||
                this.settings[key] = settings[key];
 | 
			
		||||
            } else {
 | 
			
		||||
                throw `Invalid setting passed to TTS Provider: ${key}`;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        $('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
 | 
			
		||||
        $('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
 | 
			
		||||
        $('#system_tts_pitch_output').text(this.settings.pitch);
 | 
			
		||||
        $('#system_tts_rate_output').text(this.settings.rate);
 | 
			
		||||
        console.info("Settings loaded");
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async onApplyClick() {
 | 
			
		||||
        return
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    //#################//
 | 
			
		||||
    //  TTS Interfaces //
 | 
			
		||||
    //#################//
 | 
			
		||||
    fetchTtsVoiceIds() {
 | 
			
		||||
        if (!window.speechSynthesis) {
 | 
			
		||||
            return [];
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return speechSynthesis.getVoices().map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: '' }));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    previewTtsVoice(voiceId) {
 | 
			
		||||
        const voice = speechSynthesis.getVoices().find(x => x.voiceURI === voiceId);
 | 
			
		||||
 | 
			
		||||
        if (!voice) {
 | 
			
		||||
            throw `TTS Voice name ${voiceName} not found`
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        speechSynthesis.cancel();
 | 
			
		||||
        const text = this.previewStrings[voice.lang] ?? this.fallbackPreview;
 | 
			
		||||
        const utterance = new SpeechSynthesisUtterance(text);
 | 
			
		||||
        utterance.voice = voice;
 | 
			
		||||
        utterance.rate = 1;
 | 
			
		||||
        utterance.pitch = 1;
 | 
			
		||||
        speechSynthesis.speak(utterance);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async getVoice(voiceName) {
 | 
			
		||||
        if (!window.speechSynthesis) {
 | 
			
		||||
            return { voice_id: null }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const voices = window.speechSynthesis.getVoices();
 | 
			
		||||
        const match = voices.find(x => x.name == voiceName);
 | 
			
		||||
 | 
			
		||||
        if (!match) {
 | 
			
		||||
            throw `TTS Voice name ${voiceName} not found`
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        return { voice_id: match.voiceURI, name: match.name };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    async generateTts(text, voiceId) {
 | 
			
		||||
        if (!window.speechSynthesis) {
 | 
			
		||||
            throw 'Speech synthesis API is not supported';
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const silence = await fetch('/sounds/silence.mp3');
 | 
			
		||||
 | 
			
		||||
        return new Promise((resolve, reject) => {
 | 
			
		||||
            const voices = speechSynthesis.getVoices();
 | 
			
		||||
            const voice = voices.find(x => x.voiceURI === voiceId);
 | 
			
		||||
            const utterance = new SpeechSynthesisUtterance(text);
 | 
			
		||||
            utterance.voice = voice;
 | 
			
		||||
            utterance.rate = this.settings.rate || 1;
 | 
			
		||||
            utterance.pitch = this.settings.pitch || 1;
 | 
			
		||||
            utterance.onend = () => resolve(silence);
 | 
			
		||||
            utterance.onerror = () => reject();
 | 
			
		||||
            speechSynthesis.speak(utterance);
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user