mirror of
				https://github.com/SillyTavern/SillyTavern.git
				synced 2025-06-05 21:59:27 +02:00 
			
		
		
		
	
		
			
				
	
	
		
			248 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			248 lines
		
	
	
		
			8.3 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
| import { isMobile } from '../../RossAscends-mods.js';
 | |
| import { getPreviewString } from './index.js';
 | |
| import { talkingAnimation } from './index.js';
 | |
| import { saveTtsProviderSettings } from './index.js';
 | |
| export { SystemTtsProvider };
 | |
| 
 | |
| /**
 | |
|  * Chunkify
 | |
|  * Google Chrome Speech Synthesis Chunking Pattern
 | |
|  * Fixes inconsistencies with speaking long texts in speechUtterance objects
 | |
|  * Licensed under the MIT License
 | |
|  *
 | |
|  * Peter Woolley and Brett Zamir
 | |
|  * Modified by Haaris for bug fixes
 | |
|  */
 | |
| 
 | |
| var speechUtteranceChunker = function (utt, settings, callback) {
 | |
|     settings = settings || {};
 | |
|     var newUtt;
 | |
|     var txt = (settings && settings.offset !== undefined ? utt.text.substring(settings.offset) : utt.text);
 | |
|     if (utt.voice && utt.voice.voiceURI === 'native') { // Not part of the spec
 | |
|         newUtt = utt;
 | |
|         newUtt.text = txt;
 | |
|         newUtt.addEventListener('end', function () {
 | |
|             if (speechUtteranceChunker.cancel) {
 | |
|                 speechUtteranceChunker.cancel = false;
 | |
|             }
 | |
|             if (callback !== undefined) {
 | |
|                 callback();
 | |
|             }
 | |
|         });
 | |
|     }
 | |
|     else {
 | |
|         var chunkLength = (settings && settings.chunkLength) || 160;
 | |
|         var pattRegex = new RegExp('^[\\s\\S]{' + Math.floor(chunkLength / 2) + ',' + chunkLength + '}[.!?,]{1}|^[\\s\\S]{1,' + chunkLength + '}$|^[\\s\\S]{1,' + chunkLength + '} ');
 | |
|         var chunkArr = txt.match(pattRegex);
 | |
| 
 | |
|         if (chunkArr == null || chunkArr[0] === undefined || chunkArr[0].length <= 2) {
 | |
|             //call once all text has been spoken...
 | |
|             if (callback !== undefined) {
 | |
|                 callback();
 | |
|             }
 | |
|             return;
 | |
|         }
 | |
|         var chunk = chunkArr[0];
 | |
|         newUtt = new SpeechSynthesisUtterance(chunk);
 | |
|         var x;
 | |
|         for (x in utt) {
 | |
|             if (Object.hasOwn(utt, x) && x !== 'text') {
 | |
|                 newUtt[x] = utt[x];
 | |
|             }
 | |
|         }
 | |
|         newUtt.lang = utt.lang;
 | |
|         newUtt.voice = utt.voice;
 | |
|         newUtt.addEventListener('end', function () {
 | |
|             if (speechUtteranceChunker.cancel) {
 | |
|                 speechUtteranceChunker.cancel = false;
 | |
|                 return;
 | |
|             }
 | |
|             settings.offset = settings.offset || 0;
 | |
|             settings.offset += chunk.length;
 | |
|             speechUtteranceChunker(utt, settings, callback);
 | |
|         });
 | |
|     }
 | |
| 
 | |
|     if (settings.modifier) {
 | |
|         settings.modifier(newUtt);
 | |
|     }
 | |
|     console.log(newUtt); //IMPORTANT!! Do not remove: Logging the object out fixes some onend firing issues.
 | |
|     //placing the speak invocation inside a callback fixes ordering and onend issues.
 | |
|     setTimeout(function () {
 | |
|         speechSynthesis.speak(newUtt);
 | |
|         talkingAnimation(true);
 | |
|     }, 0);
 | |
| };
 | |
| 
 | |
| class SystemTtsProvider {
 | |
|     //########//
 | |
|     // Config //
 | |
|     //########//
 | |
| 
 | |
|     settings;
 | |
|     ready = false;
 | |
|     voices = [];
 | |
|     separator = ' ... ';
 | |
| 
 | |
|     defaultSettings = {
 | |
|         voiceMap: {},
 | |
|         rate: 1,
 | |
|         pitch: 1,
 | |
|     };
 | |
| 
 | |
|     get settingsHtml() {
 | |
|         if (!('speechSynthesis' in window)) {
 | |
|             return 'Your browser or operating system doesn\'t support speech synthesis';
 | |
|         }
 | |
| 
 | |
|         return `<p>Uses the voices provided by your operating system</p>
 | |
|         <label for="system_tts_rate">Rate: <span id="system_tts_rate_output"></span></label>
 | |
|         <input id="system_tts_rate" type="range" value="${this.defaultSettings.rate}" min="0.1" max="2" step="0.01" />
 | |
|         <label for="system_tts_pitch">Pitch: <span id="system_tts_pitch_output"></span></label>
 | |
|         <input id="system_tts_pitch" type="range" value="${this.defaultSettings.pitch}" min="0" max="2" step="0.01" />`;
 | |
|     }
 | |
| 
 | |
|     onSettingsChange() {
 | |
|         this.settings.rate = Number($('#system_tts_rate').val());
 | |
|         this.settings.pitch = Number($('#system_tts_pitch').val());
 | |
|         $('#system_tts_pitch_output').text(this.settings.pitch);
 | |
|         $('#system_tts_rate_output').text(this.settings.rate);
 | |
|         saveTtsProviderSettings();
 | |
|     }
 | |
| 
 | |
|     async loadSettings(settings) {
 | |
|         // Populate Provider UI given input settings
 | |
|         if (Object.keys(settings).length == 0) {
 | |
|             console.info('Using default TTS Provider settings');
 | |
|         }
 | |
| 
 | |
|         // iOS should only allows speech synthesis trigged by user interaction
 | |
|         if (isMobile()) {
 | |
|             let hasEnabledVoice = false;
 | |
| 
 | |
|             document.addEventListener('click', () => {
 | |
|                 if (hasEnabledVoice) {
 | |
|                     return;
 | |
|                 }
 | |
|                 const utterance = new SpeechSynthesisUtterance('hi');
 | |
|                 utterance.volume = 0;
 | |
|                 speechSynthesis.speak(utterance);
 | |
|                 hasEnabledVoice = true;
 | |
|             });
 | |
|         }
 | |
| 
 | |
|         // Only accept keys defined in defaultSettings
 | |
|         this.settings = this.defaultSettings;
 | |
| 
 | |
|         for (const key in settings) {
 | |
|             if (key in this.settings) {
 | |
|                 this.settings[key] = settings[key];
 | |
|             } else {
 | |
|                 throw `Invalid setting passed to TTS Provider: ${key}`;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         $('#system_tts_rate').val(this.settings.rate || this.defaultSettings.rate);
 | |
|         $('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
 | |
| 
 | |
|         // Trigger updates
 | |
|         $('#system_tts_rate').on('input', () => { this.onSettingsChange(); });
 | |
|         $('#system_tts_pitch').on('input', () => { this.onSettingsChange(); });
 | |
| 
 | |
|         $('#system_tts_pitch_output').text(this.settings.pitch);
 | |
|         $('#system_tts_rate_output').text(this.settings.rate);
 | |
|         console.debug('SystemTTS: Settings loaded');
 | |
|     }
 | |
| 
 | |
|     // Perform a simple readiness check by trying to fetch voiceIds
 | |
|     async checkReady() {
 | |
|         await this.fetchTtsVoiceObjects();
 | |
|     }
 | |
| 
 | |
|     async onRefreshClick() {
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     //#################//
 | |
|     //  TTS Interfaces //
 | |
|     //#################//
 | |
|     fetchTtsVoiceObjects() {
 | |
|         if (!('speechSynthesis' in window)) {
 | |
|             return [];
 | |
|         }
 | |
| 
 | |
|         return new Promise((resolve) => {
 | |
|             setTimeout(() => {
 | |
|                 const voices = speechSynthesis
 | |
|                     .getVoices()
 | |
|                     .sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
 | |
|                     .map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
 | |
| 
 | |
|                 resolve(voices);
 | |
|             }, 1);
 | |
|         });
 | |
|     }
 | |
| 
 | |
|     previewTtsVoice(voiceId) {
 | |
|         if (!('speechSynthesis' in window)) {
 | |
|             throw 'Speech synthesis API is not supported';
 | |
|         }
 | |
| 
 | |
|         const voice = speechSynthesis.getVoices().find(x => x.voiceURI === voiceId);
 | |
| 
 | |
|         if (!voice) {
 | |
|             throw `TTS Voice id ${voiceId} not found`;
 | |
|         }
 | |
| 
 | |
|         speechSynthesis.cancel();
 | |
|         const text = getPreviewString(voice.lang);
 | |
|         const utterance = new SpeechSynthesisUtterance(text);
 | |
|         utterance.voice = voice;
 | |
|         utterance.rate = this.settings.rate || 1;
 | |
|         utterance.pitch = this.settings.pitch || 1;
 | |
|         speechSynthesis.speak(utterance);
 | |
|     }
 | |
| 
 | |
|     async getVoice(voiceName) {
 | |
|         if (!('speechSynthesis' in window)) {
 | |
|             return { voice_id: null };
 | |
|         }
 | |
| 
 | |
|         const voices = speechSynthesis.getVoices();
 | |
|         const match = voices.find(x => x.name == voiceName);
 | |
| 
 | |
|         if (!match) {
 | |
|             throw `TTS Voice name ${voiceName} not found`;
 | |
|         }
 | |
| 
 | |
|         return { voice_id: match.voiceURI, name: match.name };
 | |
|     }
 | |
| 
 | |
|     async generateTts(text, voiceId) {
 | |
|         if (!('speechSynthesis' in window)) {
 | |
|             throw 'Speech synthesis API is not supported';
 | |
|         }
 | |
| 
 | |
|         const silence = await fetch('/sounds/silence.mp3');
 | |
| 
 | |
|         return new Promise((resolve, reject) => {
 | |
|             const voices = speechSynthesis.getVoices();
 | |
|             const voice = voices.find(x => x.voiceURI === voiceId);
 | |
|             const utterance = new SpeechSynthesisUtterance(text);
 | |
|             utterance.voice = voice;
 | |
|             utterance.rate = this.settings.rate || 1;
 | |
|             utterance.pitch = this.settings.pitch || 1;
 | |
|             utterance.onend = () => resolve(silence);
 | |
|             utterance.onerror = () => reject();
 | |
|             speechUtteranceChunker(utterance, {
 | |
|                 chunkLength: 200,
 | |
|             }, function () {
 | |
|                 //some code to execute when done
 | |
|                 resolve(silence);
 | |
|                 console.log('System TTS done');
 | |
|                 talkingAnimation(false);
 | |
|             });
 | |
|         });
 | |
|     }
 | |
| }
 |