mirror of
				https://github.com/SillyTavern/SillyTavern.git
				synced 2025-06-05 21:59:27 +02:00 
			
		
		
		
	Add system TTS provider to the extension
This commit is contained in:
		| @@ -3,6 +3,7 @@ import { extension_settings, getContext } from '../../extensions.js' | ||||
| import { getStringHash } from '../../utils.js' | ||||
| import { ElevenLabsTtsProvider } from './elevenlabs.js' | ||||
| import { SileroTtsProvider } from './silerotts.js' | ||||
| import { SystemTtsProvider } from './system.js' | ||||
|  | ||||
| const UPDATE_INTERVAL = 1000 | ||||
|  | ||||
| @@ -17,7 +18,8 @@ let lastMessageHash = null | ||||
|  | ||||
| let ttsProviders = { | ||||
|     ElevenLabs: ElevenLabsTtsProvider, | ||||
|     Silero: SileroTtsProvider | ||||
|     Silero: SileroTtsProvider, | ||||
|     System: SystemTtsProvider, | ||||
| } | ||||
| let ttsProvider | ||||
| let ttsProviderName | ||||
| @@ -112,7 +114,13 @@ async function playAudioData(audioBlob) { | ||||
|  | ||||
| window['tts_preview'] = function (id) { | ||||
|     const audio = document.getElementById(id) | ||||
|     audio.play() | ||||
|  | ||||
|     if (!audio.hidden) { | ||||
|         audio.play() | ||||
|     } | ||||
|     else { | ||||
|         ttsProvider.previewTtsVoice(id) | ||||
|     } | ||||
| } | ||||
|  | ||||
| async function onTtsVoicesClick() { | ||||
| @@ -123,7 +131,7 @@ async function onTtsVoicesClick() { | ||||
|  | ||||
|         for (const voice of voiceIds) { | ||||
|             popupText += `<div class="voice_preview"><b>${voice.name}</b> <i onclick="tts_preview('${voice.voice_id}')" class="fa-solid fa-play"></i></div>` | ||||
|             popupText += `<audio id="${voice.voice_id}" src="${voice.preview_url}"></audio>` | ||||
|             popupText += `<audio id="${voice.voice_id}" src="${voice.preview_url}" hidden="${!!voice.preview_url}"></audio>` | ||||
|         } | ||||
|     } catch { | ||||
|         popupText = 'Could not load voices list. Check your API key.' | ||||
|   | ||||
							
								
								
									
										140
									
								
								public/scripts/extensions/tts/system.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										140
									
								
								public/scripts/extensions/tts/system.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,140 @@ | ||||
| export { SystemTtsProvider } | ||||
|  | ||||
| class SystemTtsProvider { | ||||
|     //########// | ||||
|     // Config // | ||||
|     //########// | ||||
|  | ||||
|     previewStrings = { | ||||
|         'en-US': 'The quick brown fox jumps over the lazy dog', | ||||
|         'en-GB': 'Sphinx of black quartz, judge my vow', | ||||
|         'fr-FR': 'Portez ce vieux whisky au juge blond qui fume', | ||||
|         'de-DE': 'Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich', | ||||
|         'it-IT': "Pranzo d'acqua fa volti sghembi", | ||||
|         'es-ES': 'Quiere la boca exhausta vid, kiwi, piña y fugaz jamón', | ||||
|         'es-MX': 'Fabio me exige, sin tapujos, que añada cerveza al whisky', | ||||
|         'ru-RU': 'В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!', | ||||
|         'pt-BR': 'Vejo xá gritando que fez show sem playback.', | ||||
|         'pt-PR': 'Todo pajé vulgar faz boquinha sexy com kiwi.', | ||||
|         'uk-UA': "Фабрикуймо гідність, лящім їжею, ґав хапаймо, з'єднавці чаш!", | ||||
|     } | ||||
|     fallbackPreview = 'Neque porro quisquam est qui dolorem ipsum quia dolor sit amet' | ||||
|     settings | ||||
|     voices = [] | ||||
|  | ||||
|     defaultSettings = { | ||||
|         voiceMap: {}, | ||||
|         rate: 1, | ||||
|         pitch: 1, | ||||
|     } | ||||
|  | ||||
|     get settingsHtml() { | ||||
|         if (!window.speechSynthesis) { | ||||
|             return "Your browser or operating system doesn't support speech synthesis"; | ||||
|         } | ||||
|  | ||||
|         return `<p>Uses the voices provided by your operating system</p> | ||||
|         <label for="system_tts_rate">Rate: <span id="system_tts_rate_output"></span></label> | ||||
|         <input id="system_tts_rate" type="range" value="${this.defaultSettings.rate}" min="0.5" max="2" step="0.1" /> | ||||
|         <label for="system_tts_pitch">Pitch: <span id="system_tts_pitch_output"></span></label> | ||||
|         <input id="system_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0" max="2" step="0.1" />`; | ||||
|     } | ||||
|  | ||||
|     onSettingsChange() { | ||||
|         this.settings.rate = Number($('#system_tts_rate').val()); | ||||
|         this.settings.pitch = Number($('#system_tts_pitch').val()); | ||||
|         $('#system_tts_pitch_output').text(this.settings.pitch); | ||||
|         $('#system_tts_rate_output').text(this.settings.rate); | ||||
|         console.log('Save changes'); | ||||
|     } | ||||
|  | ||||
|     loadSettings(settings) { | ||||
|         // Populate Provider UI given input settings | ||||
|         if (Object.keys(settings).length == 0) { | ||||
|             console.info("Using default TTS Provider settings"); | ||||
|         } | ||||
|  | ||||
|         // Only accept keys defined in defaultSettings | ||||
|         this.settings = this.defaultSettings; | ||||
|  | ||||
|         for (const key in settings) { | ||||
|             if (key in this.settings) { | ||||
|                 this.settings[key] = settings[key]; | ||||
|             } else { | ||||
|                 throw `Invalid setting passed to TTS Provider: ${key}`; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         $('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate); | ||||
|         $('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch); | ||||
|         $('#system_tts_pitch_output').text(this.settings.pitch); | ||||
|         $('#system_tts_rate_output').text(this.settings.rate); | ||||
|         console.info("Settings loaded"); | ||||
|     } | ||||
|  | ||||
|     async onApplyClick() { | ||||
|         return | ||||
|     } | ||||
|  | ||||
|     //#################// | ||||
|     //  TTS Interfaces // | ||||
|     //#################// | ||||
|     fetchTtsVoiceIds() { | ||||
|         if (!window.speechSynthesis) { | ||||
|             return []; | ||||
|         } | ||||
|  | ||||
|         return speechSynthesis.getVoices().map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: '' })); | ||||
|     } | ||||
|  | ||||
|     previewTtsVoice(voiceId) { | ||||
|         const voice = speechSynthesis.getVoices().find(x => x.voiceURI === voiceId); | ||||
|  | ||||
|         if (!voice) { | ||||
|             throw `TTS Voice name ${voiceName} not found` | ||||
|         } | ||||
|  | ||||
|         speechSynthesis.cancel(); | ||||
|         const text = this.previewStrings[voice.lang] ?? this.fallbackPreview; | ||||
|         const utterance = new SpeechSynthesisUtterance(text); | ||||
|         utterance.voice = voice; | ||||
|         utterance.rate = 1; | ||||
|         utterance.pitch = 1; | ||||
|         speechSynthesis.speak(utterance); | ||||
|     } | ||||
|  | ||||
|     async getVoice(voiceName) { | ||||
|         if (!window.speechSynthesis) { | ||||
|             return { voice_id: null } | ||||
|         } | ||||
|  | ||||
|         const voices = window.speechSynthesis.getVoices(); | ||||
|         const match = voices.find(x => x.name == voiceName); | ||||
|  | ||||
|         if (!match) { | ||||
|             throw `TTS Voice name ${voiceName} not found` | ||||
|         } | ||||
|  | ||||
|         return { voice_id: match.voiceURI, name: match.name }; | ||||
|     } | ||||
|  | ||||
|     async generateTts(text, voiceId) { | ||||
|         if (!window.speechSynthesis) { | ||||
|             throw 'Speech synthesis API is not supported'; | ||||
|         } | ||||
|  | ||||
|         const silence = await fetch('/sounds/silence.mp3'); | ||||
|  | ||||
|         return new Promise((resolve, reject) => { | ||||
|             const voices = speechSynthesis.getVoices(); | ||||
|             const voice = voices.find(x => x.voiceURI === voiceId); | ||||
|             const utterance = new SpeechSynthesisUtterance(text); | ||||
|             utterance.voice = voice; | ||||
|             utterance.rate = this.settings.rate || 1; | ||||
|             utterance.pitch = this.settings.pitch || 1; | ||||
|             utterance.onend = () => resolve(silence); | ||||
|             utterance.onerror = () => reject(); | ||||
|             speechSynthesis.speak(utterance); | ||||
|         }); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										
											BIN
										
									
								
								public/sounds/silence.mp3
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								public/sounds/silence.mp3
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user