From f5fccc0387a07d32a3916b85858facbf69c7b1bb Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Wed, 22 May 2024 01:37:51 +0300 Subject: [PATCH] Add Azure TTS service --- public/scripts/extensions/tts/azure.js | 207 +++++++++++++++++++++++++ public/scripts/extensions/tts/index.js | 2 + public/scripts/secrets.js | 1 + server.js | 3 + src/endpoints/azure.js | 92 +++++++++++ src/endpoints/secrets.js | 1 + 6 files changed, 306 insertions(+) create mode 100644 public/scripts/extensions/tts/azure.js create mode 100644 src/endpoints/azure.js diff --git a/public/scripts/extensions/tts/azure.js b/public/scripts/extensions/tts/azure.js new file mode 100644 index 000000000..abbf3ef33 --- /dev/null +++ b/public/scripts/extensions/tts/azure.js @@ -0,0 +1,207 @@ +import { callPopup, getRequestHeaders } from '../../../script.js'; +import { SECRET_KEYS, findSecret, secret_state, writeSecret } from '../../secrets.js'; +import { getPreviewString, saveTtsProviderSettings } from './index.js'; +export { AzureTtsProvider }; + +class AzureTtsProvider { + //########// + // Config // + //########// + + settings; + voices = []; + separator = ' . '; + audioElement = document.createElement('audio'); + + defaultSettings = { + region: '', + voiceMap: {}, + }; + + get settingsHtml() { + let html = ` +
+
+

+ Azure TTS Key +

+ +
+ + +
+
+ `; + return html; + } + + onSettingsChange() { + // Update dynamically + this.settings.region = String($('#azure_tts_region').val()); + // Reset voices + this.voices = []; + saveTtsProviderSettings(); + } + + async loadSettings(settings) { + // Populate Provider UI given input settings + if (Object.keys(settings).length == 0) { + console.info('Using default TTS Provider settings'); + } + + // Only accept keys defined in defaultSettings + this.settings = this.defaultSettings; + + for (const key in settings) { + if (key in this.settings) { + this.settings[key] = settings[key]; + } else { + throw `Invalid setting passed to TTS Provider: ${key}`; + } + } + + $('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange()); + $('#azure_tts_key').toggleClass('success', secret_state[SECRET_KEYS.AZURE_TTS]); + $('#azure_tts_key').on('click', async () => { + const popupText = 'Azure TTS API Key'; + const savedKey = secret_state[SECRET_KEYS.AZURE_TTS] ? await findSecret(SECRET_KEYS.AZURE_TTS) : ''; + + const key = await callPopup(popupText, 'input', savedKey); + + if (key == false || key == '') { + return; + } + + await writeSecret(SECRET_KEYS.AZURE_TTS, key); + + toastr.success('API Key saved'); + $('#azure_tts_key').addClass('success'); + await this.onRefreshClick(); + }); + + try { + await this.checkReady(); + console.debug('Azure: Settings loaded'); + } catch { + console.debug('Azure: Settings loaded, but not ready'); + } + } + + // Perform a simple readiness check by trying to fetch voiceIds + async checkReady() { + if (secret_state[SECRET_KEYS.AZURE_TTS]) { + await this.fetchTtsVoiceObjects(); + } else { + this.voices = []; + } + } + + async onRefreshClick() { + await this.checkReady(); + } + + //#################// + // TTS Interfaces // + //#################// + + async getVoice(voiceName) { + if (this.voices.length == 0) { + this.voices = await this.fetchTtsVoiceObjects(); + } + const match = this.voices.filter( + voice => voice.name == voiceName, + )[0]; + if (!match) { + throw `TTS Voice name ${voiceName} not found`; + } + return match; + } + + async generateTts(text, voiceId) { + const response = await this.fetchTtsGeneration(text, voiceId); + return response; + } + + //###########// + // API CALLS // + //###########// + async fetchTtsVoiceObjects() { + if (!secret_state[SECRET_KEYS.AZURE_TTS]) { + console.warn('Azure TTS API Key not set'); + return []; + } + + if (!this.settings.region) { + console.warn('Azure TTS region not set'); + return []; + } + + const response = await fetch('/api/azure/list', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + region: this.settings.region, + }), + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + let responseJson = await response.json(); + responseJson = responseJson + .sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName)) + .map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale })); + return responseJson; + } + + /** + * Preview TTS for a given voice ID. + * @param {string} id Voice ID + */ + async previewTtsVoice(id) { + this.audioElement.pause(); + this.audioElement.currentTime = 0; + const voice = await this.getVoice(id); + const text = getPreviewString(voice.lang); + const response = await this.fetchTtsGeneration(text, id); + if (!response.ok) { + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + const audio = await response.blob(); + const url = URL.createObjectURL(audio); + this.audioElement.src = url; + this.audioElement.play(); + URL.revokeObjectURL(url); + } + + async fetchTtsGeneration(text, voiceId) { + if (!secret_state[SECRET_KEYS.AZURE_TTS]) { + throw new Error('Azure TTS API Key not set'); + } + + if (!this.settings.region) { + throw new Error('Azure TTS region not set'); + } + + const response = await fetch('/api/azure/generate', { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + text: text, + voice: voiceId, + region: this.settings.region, + }), + }); + + if (!response.ok) { + toastr.error(response.statusText, 'TTS Generation Failed'); + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + return response; + } +} diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index f73e9d48f..1ac1edd8b 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -13,6 +13,7 @@ import { XTTSTtsProvider } from './xtts.js'; import { GSVITtsProvider } from './gsvi.js'; import { AllTalkTtsProvider } from './alltalk.js'; import { SpeechT5TtsProvider } from './speecht5.js'; +import { AzureTtsProvider } from './azure.js'; import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js'; import { SlashCommand } from '../../slash-commands/SlashCommand.js'; import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js'; @@ -83,6 +84,7 @@ const ttsProviders = { OpenAI: OpenAITtsProvider, AllTalk: AllTalkTtsProvider, SpeechT5: SpeechT5TtsProvider, + Azure: AzureTtsProvider, }; let ttsProvider; let ttsProviderName; diff --git a/public/scripts/secrets.js b/public/scripts/secrets.js index 4d7a6ebf1..cb4477a78 100644 --- a/public/scripts/secrets.js +++ b/public/scripts/secrets.js @@ -27,6 +27,7 @@ export const SECRET_KEYS = { COHERE: 'api_key_cohere', PERPLEXITY: 'api_key_perplexity', GROQ: 'api_key_groq', + AZURE_TTS: 'api_key_azure_tts', }; const INPUT_MAP = { diff --git a/server.js b/server.js index e658d7b5e..6f67dc287 100644 --- a/server.js +++ b/server.js @@ -519,6 +519,9 @@ app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt') // Speech (text-to-speech and speech-to-text) app.use('/api/speech', require('./src/endpoints/speech').router); +// Azure TTS +app.use('/api/azure', require('./src/endpoints/azure').router); + const tavernUrl = new URL( (cliArguments.ssl ? 'https://' : 'http://') + (listen ? '0.0.0.0' : '127.0.0.1') + diff --git a/src/endpoints/azure.js b/src/endpoints/azure.js new file mode 100644 index 000000000..4c3b34d5b --- /dev/null +++ b/src/endpoints/azure.js @@ -0,0 +1,92 @@ +const { readSecret, SECRET_KEYS } = require('./secrets'); +const fetch = require('node-fetch').default; +const express = require('express'); +const { jsonParser } = require('../express-common'); + +const router = express.Router(); + +router.post('/list', jsonParser, async (req, res) => { + try { + const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS); + + if (!key) { + console.error('Azure TTS API Key not set'); + return res.sendStatus(403); + } + + const region = req.body.region; + + if (!region) { + console.error('Azure TTS region not set'); + return res.sendStatus(400); + } + + const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`; + + const response = await fetch(url, { + method: 'GET', + headers: { + 'Ocp-Apim-Subscription-Key': key, + }, + }); + + if (!response.ok) { + console.error('Azure Request failed', response.status, response.statusText); + return res.sendStatus(500); + } + + const voices = await response.json(); + return res.json(voices); + } catch (error) { + console.error('Azure Request failed', error); + return res.sendStatus(500); + } +}); + +router.post('/generate', jsonParser, async (req, res) => { + try { + const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS); + + if (!key) { + console.error('Azure TTS API Key not set'); + return res.sendStatus(403); + } + + const { text, voice, region } = req.body; + if (!text || !voice || !region) { + console.error('Missing required parameters'); + return res.sendStatus(400); + } + + const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`; + const lang = String(voice).split('-').slice(0, 2).join('-'); + const escapedText = String(text).replace(/&/g, '&').replace(//g, '>'); + const ssml = `${escapedText}`; + + const response = await fetch(url, { + method: 'POST', + headers: { + 'Ocp-Apim-Subscription-Key': key, + 'Content-Type': 'application/ssml+xml', + 'X-Microsoft-OutputFormat': 'ogg-48khz-16bit-mono-opus', + }, + body: ssml, + }); + + if (!response.ok) { + console.error('Azure Request failed', response.status, response.statusText); + return res.sendStatus(500); + } + + const audio = await response.buffer(); + res.set('Content-Type', 'audio/ogg'); + return res.send(audio); + } catch (error) { + console.error('Azure Request failed', error); + return res.sendStatus(500); + } +}); + +module.exports = { + router, +}; diff --git a/src/endpoints/secrets.js b/src/endpoints/secrets.js index 1a1ac3746..9bf2eb765 100644 --- a/src/endpoints/secrets.js +++ b/src/endpoints/secrets.js @@ -39,6 +39,7 @@ const SECRET_KEYS = { COHERE: 'api_key_cohere', PERPLEXITY: 'api_key_perplexity', GROQ: 'api_key_groq', + AZURE_TTS: 'api_key_azure_tts', }; // These are the keys that are safe to expose, even if allowKeysExposure is false