mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-06-05 21:59:27 +02:00
Add MS Edge TTS
This commit is contained in:
124
public/scripts/extensions/tts/edge.js
Normal file
124
public/scripts/extensions/tts/edge.js
Normal file
@@ -0,0 +1,124 @@
|
||||
import { getRequestHeaders } from "../../../script.js"
|
||||
import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
|
||||
import { getPreviewString } from "./index.js"
|
||||
|
||||
export { EdgeTtsProvider }
|
||||
|
||||
class EdgeTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings
|
||||
voices = []
|
||||
separator = ' .. '
|
||||
|
||||
defaultSettings = {
|
||||
voiceMap: {}
|
||||
}
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `Microsoft Edge TTS Provider<br>`
|
||||
return html
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
}
|
||||
|
||||
loadSettings(settings) {
|
||||
// Pupulate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info("Using default TTS Provider settings")
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings
|
||||
|
||||
for (const key in settings){
|
||||
if (key in this.settings){
|
||||
this.settings[key] = settings[key]
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`
|
||||
}
|
||||
}
|
||||
|
||||
console.info("Settings loaded")
|
||||
}
|
||||
|
||||
|
||||
async onApplyClick() {
|
||||
return
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceIds()
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName
|
||||
)[0]
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId){
|
||||
const response = await this.fetchTtsGeneration(text, voiceId)
|
||||
return response
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceIds() {
|
||||
const response = await doExtrasFetch(`/edge_voices`)
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`)
|
||||
}
|
||||
let responseJson = await response.json()
|
||||
responseJson = responseJson
|
||||
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||
return responseJson
|
||||
}
|
||||
|
||||
|
||||
async previewTtsVoice(id) {
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id)
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`)
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
const audioElement = document.createElement("audio");
|
||||
audioElement.src = url;
|
||||
audioElement.play();
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(inputText, voiceId) {
|
||||
console.info(`Generating new TTS for voice_id ${voiceId}`)
|
||||
const response = await doExtrasFetch(
|
||||
`/edge_speech`,
|
||||
{
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
"text": inputText,
|
||||
"voice": voiceId
|
||||
})
|
||||
}
|
||||
)
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.json()}`)
|
||||
}
|
||||
return response
|
||||
}
|
||||
}
|
@@ -1,6 +1,7 @@
|
||||
import { callPopup, cancelTtsPlay, eventSource, event_types, isMultigenEnabled, is_send_press, saveSettingsDebounced } from '../../../script.js'
|
||||
import { ModuleWorkerWrapper, extension_settings, getContext } from '../../extensions.js'
|
||||
import { getStringHash } from '../../utils.js'
|
||||
import { EdgeTtsProvider } from './edge.js'
|
||||
import { ElevenLabsTtsProvider } from './elevenlabs.js'
|
||||
import { SileroTtsProvider } from './silerotts.js'
|
||||
import { SystemTtsProvider } from './system.js'
|
||||
@@ -15,11 +16,52 @@ let lastGroupId = null
|
||||
let lastChatId = null
|
||||
let lastMessageHash = null
|
||||
|
||||
export function getPreviewString(lang) {
|
||||
const previewStrings = {
|
||||
'en-US': 'The quick brown fox jumps over the lazy dog',
|
||||
'en-GB': 'Sphinx of black quartz, judge my vow',
|
||||
'fr-FR': 'Portez ce vieux whisky au juge blond qui fume',
|
||||
'de-DE': 'Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich',
|
||||
'it-IT': "Pranzo d'acqua fa volti sghembi",
|
||||
'es-ES': 'Quiere la boca exhausta vid, kiwi, piña y fugaz jamón',
|
||||
'es-MX': 'Fabio me exige, sin tapujos, que añada cerveza al whisky',
|
||||
'ru-RU': 'В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!',
|
||||
'pt-BR': 'Vejo xá gritando que fez show sem playback.',
|
||||
'pt-PR': 'Todo pajé vulgar faz boquinha sexy com kiwi.',
|
||||
'uk-UA': "Фабрикуймо гідність, лящім їжею, ґав хапаймо, з'єднавці чаш!",
|
||||
'pl-PL': 'Pchnąć w tę łódź jeża lub ośm skrzyń fig',
|
||||
'cs-CZ': 'Příliš žluťoučký kůň úpěl ďábelské ódy',
|
||||
'sk-SK': 'Vyhŕňme si rukávy a vyprážajme čínske ryžové cestoviny',
|
||||
'hu-HU': 'Árvíztűrő tükörfúrógép',
|
||||
'tr-TR': 'Pijamalı hasta yağız şoföre çabucak güvendi',
|
||||
'nl-NL': 'De waard heeft een kalfje en een pinkje opgegeten',
|
||||
'sv-SE': 'Yxskaftbud, ge vårbygd, zinkqvarn',
|
||||
'da-DK': 'Quizdeltagerne spiste jordbær med fløde, mens cirkusklovnen Walther spillede på xylofon',
|
||||
'ja-JP': 'いろはにほへと ちりぬるを わかよたれそ つねならむ うゐのおくやま けふこえて あさきゆめみし ゑひもせす',
|
||||
'ko-KR': '가나다라마바사아자차카타파하',
|
||||
'zh-CN': '我能吞下玻璃而不伤身体',
|
||||
'ro-RO': 'Muzicologă în bej vând whisky și tequila, preț fix',
|
||||
'bg-BG': 'Щъркелите се разпръснаха по цялото небе',
|
||||
'el-GR': 'Ταχίστη αλώπηξ βαφής ψημένη γη, δρασκελίζει υπέρ νωθρού κυνός',
|
||||
'fi-FI': 'Voi veljet, miksi juuri teille myin nämä vehkeet?',
|
||||
'he-IL': 'הקצינים צעקו: "כל הכבוד לצבא הצבאות!"',
|
||||
'id-ID': 'Jangkrik itu memang enak, apalagi kalau digoreng',
|
||||
'ms-MY': 'Muzik penyanyi wanita itu menggambarkan kehidupan yang penuh dengan duka nestapa',
|
||||
'th-TH': 'เป็นไงบ้างครับ ผมชอบกินข้าวผัดกระเพราหมูกรอบ',
|
||||
'vi-VN': 'Cô bé quàng khăn đỏ đang ngồi trên bãi cỏ xanh',
|
||||
'ar-SA': 'أَبْجَدِيَّة عَرَبِيَّة',
|
||||
'hi-IN': 'श्वेता ने श्वेता के श्वेते हाथों में श्वेता का श्वेता चावल पकड़ा',
|
||||
}
|
||||
const fallbackPreview = 'Neque porro quisquam est qui dolorem ipsum quia dolor sit amet'
|
||||
|
||||
return previewStrings[lang] ?? fallbackPreview;
|
||||
}
|
||||
|
||||
let ttsProviders = {
|
||||
ElevenLabs: ElevenLabsTtsProvider,
|
||||
Silero: SileroTtsProvider,
|
||||
System: SystemTtsProvider,
|
||||
Edge: EdgeTtsProvider,
|
||||
}
|
||||
let ttsProvider
|
||||
let ttsProviderName
|
||||
|
@@ -1,3 +1,5 @@
|
||||
import { getPreviewString } from "./index.js";
|
||||
|
||||
export { SystemTtsProvider }
|
||||
|
||||
/**
|
||||
@@ -74,20 +76,6 @@ class SystemTtsProvider {
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
previewStrings = {
|
||||
'en-US': 'The quick brown fox jumps over the lazy dog',
|
||||
'en-GB': 'Sphinx of black quartz, judge my vow',
|
||||
'fr-FR': 'Portez ce vieux whisky au juge blond qui fume',
|
||||
'de-DE': 'Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich',
|
||||
'it-IT': "Pranzo d'acqua fa volti sghembi",
|
||||
'es-ES': 'Quiere la boca exhausta vid, kiwi, piña y fugaz jamón',
|
||||
'es-MX': 'Fabio me exige, sin tapujos, que añada cerveza al whisky',
|
||||
'ru-RU': 'В чащах юга жил бы цитрус? Да, но фальшивый экземпляр!',
|
||||
'pt-BR': 'Vejo xá gritando que fez show sem playback.',
|
||||
'pt-PR': 'Todo pajé vulgar faz boquinha sexy com kiwi.',
|
||||
'uk-UA': "Фабрикуймо гідність, лящім їжею, ґав хапаймо, з'єднавці чаш!",
|
||||
}
|
||||
fallbackPreview = 'Neque porro quisquam est qui dolorem ipsum quia dolor sit amet'
|
||||
settings
|
||||
voices = []
|
||||
separator = ' ... '
|
||||
@@ -172,7 +160,7 @@ class SystemTtsProvider {
|
||||
}
|
||||
|
||||
speechSynthesis.cancel();
|
||||
const text = this.previewStrings[voice.lang] ?? this.fallbackPreview;
|
||||
const text = getPreviewString(voice.lang);
|
||||
const utterance = new SpeechSynthesisUtterance(text);
|
||||
utterance.voice = voice;
|
||||
utterance.rate = 1;
|
||||
|
Reference in New Issue
Block a user