Add Azure TTS service

This commit is contained in:
Cohee 2024-05-22 01:37:51 +03:00
parent 0371bf4e9f
commit f5fccc0387
6 changed files with 306 additions and 0 deletions

View File

@ -0,0 +1,207 @@
import { callPopup, getRequestHeaders } from '../../../script.js';
import { SECRET_KEYS, findSecret, secret_state, writeSecret } from '../../secrets.js';
import { getPreviewString, saveTtsProviderSettings } from './index.js';
export { AzureTtsProvider };
class AzureTtsProvider {
//########//
// Config //
//########//
settings;
voices = [];
separator = ' . ';
audioElement = document.createElement('audio');
defaultSettings = {
region: '',
voiceMap: {},
};
get settingsHtml() {
let html = `
<div class="azure_tts_settings">
<div class="flex-container alignItemsBaseline">
<h4 for="azure_tts_key" class="flex1 margin0">
<a href="https://portal.azure.com/" target="_blank">Azure TTS Key</a>
</h4>
<div id="azure_tts_key" class="menu_button menu_button_icon">
<i class="fa-solid fa-key"></i>
<span>Click to set</span>
</div>
</div>
<label for="azure_tts_region">Region:</label>
<input id="azure_tts_region" type="text" class="text_pole" placeholder="e.g. westus" />
<hr>
</div>
`;
return html;
}
onSettingsChange() {
// Update dynamically
this.settings.region = String($('#azure_tts_region').val());
// Reset voices
this.voices = [];
saveTtsProviderSettings();
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info('Using default TTS Provider settings');
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange());
$('#azure_tts_key').toggleClass('success', secret_state[SECRET_KEYS.AZURE_TTS]);
$('#azure_tts_key').on('click', async () => {
const popupText = 'Azure TTS API Key';
const savedKey = secret_state[SECRET_KEYS.AZURE_TTS] ? await findSecret(SECRET_KEYS.AZURE_TTS) : '';
const key = await callPopup(popupText, 'input', savedKey);
if (key == false || key == '') {
return;
}
await writeSecret(SECRET_KEYS.AZURE_TTS, key);
toastr.success('API Key saved');
$('#azure_tts_key').addClass('success');
await this.onRefreshClick();
});
try {
await this.checkReady();
console.debug('Azure: Settings loaded');
} catch {
console.debug('Azure: Settings loaded, but not ready');
}
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
if (secret_state[SECRET_KEYS.AZURE_TTS]) {
await this.fetchTtsVoiceObjects();
} else {
this.voices = [];
}
}
async onRefreshClick() {
await this.checkReady();
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects();
}
const match = this.voices.filter(
voice => voice.name == voiceName,
)[0];
if (!match) {
throw `TTS Voice name ${voiceName} not found`;
}
return match;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId);
return response;
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
console.warn('Azure TTS API Key not set');
return [];
}
if (!this.settings.region) {
console.warn('Azure TTS region not set');
return [];
}
const response = await fetch('/api/azure/list', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
region: this.settings.region,
}),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
let responseJson = await response.json();
responseJson = responseJson
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
return responseJson;
}
/**
* Preview TTS for a given voice ID.
* @param {string} id Voice ID
*/
async previewTtsVoice(id) {
this.audioElement.pause();
this.audioElement.currentTime = 0;
const voice = await this.getVoice(id);
const text = getPreviewString(voice.lang);
const response = await this.fetchTtsGeneration(text, id);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const audio = await response.blob();
const url = URL.createObjectURL(audio);
this.audioElement.src = url;
this.audioElement.play();
URL.revokeObjectURL(url);
}
async fetchTtsGeneration(text, voiceId) {
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
throw new Error('Azure TTS API Key not set');
}
if (!this.settings.region) {
throw new Error('Azure TTS region not set');
}
const response = await fetch('/api/azure/generate', {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
text: text,
voice: voiceId,
region: this.settings.region,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@ -13,6 +13,7 @@ import { XTTSTtsProvider } from './xtts.js';
import { GSVITtsProvider } from './gsvi.js';
import { AllTalkTtsProvider } from './alltalk.js';
import { SpeechT5TtsProvider } from './speecht5.js';
import { AzureTtsProvider } from './azure.js';
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
@ -83,6 +84,7 @@ const ttsProviders = {
OpenAI: OpenAITtsProvider,
AllTalk: AllTalkTtsProvider,
SpeechT5: SpeechT5TtsProvider,
Azure: AzureTtsProvider,
};
let ttsProvider;
let ttsProviderName;

View File

@ -27,6 +27,7 @@ export const SECRET_KEYS = {
COHERE: 'api_key_cohere',
PERPLEXITY: 'api_key_perplexity',
GROQ: 'api_key_groq',
AZURE_TTS: 'api_key_azure_tts',
};
const INPUT_MAP = {

View File

@ -519,6 +519,9 @@ app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt')
// Speech (text-to-speech and speech-to-text)
app.use('/api/speech', require('./src/endpoints/speech').router);
// Azure TTS
app.use('/api/azure', require('./src/endpoints/azure').router);
const tavernUrl = new URL(
(cliArguments.ssl ? 'https://' : 'http://') +
(listen ? '0.0.0.0' : '127.0.0.1') +

92
src/endpoints/azure.js Normal file
View File

@ -0,0 +1,92 @@
const { readSecret, SECRET_KEYS } = require('./secrets');
const fetch = require('node-fetch').default;
const express = require('express');
const { jsonParser } = require('../express-common');
const router = express.Router();
router.post('/list', jsonParser, async (req, res) => {
try {
const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS);
if (!key) {
console.error('Azure TTS API Key not set');
return res.sendStatus(403);
}
const region = req.body.region;
if (!region) {
console.error('Azure TTS region not set');
return res.sendStatus(400);
}
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
const response = await fetch(url, {
method: 'GET',
headers: {
'Ocp-Apim-Subscription-Key': key,
},
});
if (!response.ok) {
console.error('Azure Request failed', response.status, response.statusText);
return res.sendStatus(500);
}
const voices = await response.json();
return res.json(voices);
} catch (error) {
console.error('Azure Request failed', error);
return res.sendStatus(500);
}
});
router.post('/generate', jsonParser, async (req, res) => {
try {
const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS);
if (!key) {
console.error('Azure TTS API Key not set');
return res.sendStatus(403);
}
const { text, voice, region } = req.body;
if (!text || !voice || !region) {
console.error('Missing required parameters');
return res.sendStatus(400);
}
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`;
const lang = String(voice).split('-').slice(0, 2).join('-');
const escapedText = String(text).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${lang}'><voice xml:lang='${lang}' name='${voice}'>${escapedText}</voice></speak>`;
const response = await fetch(url, {
method: 'POST',
headers: {
'Ocp-Apim-Subscription-Key': key,
'Content-Type': 'application/ssml+xml',
'X-Microsoft-OutputFormat': 'ogg-48khz-16bit-mono-opus',
},
body: ssml,
});
if (!response.ok) {
console.error('Azure Request failed', response.status, response.statusText);
return res.sendStatus(500);
}
const audio = await response.buffer();
res.set('Content-Type', 'audio/ogg');
return res.send(audio);
} catch (error) {
console.error('Azure Request failed', error);
return res.sendStatus(500);
}
});
module.exports = {
router,
};

View File

@ -39,6 +39,7 @@ const SECRET_KEYS = {
COHERE: 'api_key_cohere',
PERPLEXITY: 'api_key_perplexity',
GROQ: 'api_key_groq',
AZURE_TTS: 'api_key_azure_tts',
};
// These are the keys that are safe to expose, even if allowKeysExposure is false