mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-03 04:37:40 +01:00
Add Azure TTS service
This commit is contained in:
parent
0371bf4e9f
commit
f5fccc0387
207
public/scripts/extensions/tts/azure.js
Normal file
207
public/scripts/extensions/tts/azure.js
Normal file
@ -0,0 +1,207 @@
|
||||
import { callPopup, getRequestHeaders } from '../../../script.js';
|
||||
import { SECRET_KEYS, findSecret, secret_state, writeSecret } from '../../secrets.js';
|
||||
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||
export { AzureTtsProvider };
|
||||
|
||||
class AzureTtsProvider {
|
||||
//########//
|
||||
// Config //
|
||||
//########//
|
||||
|
||||
settings;
|
||||
voices = [];
|
||||
separator = ' . ';
|
||||
audioElement = document.createElement('audio');
|
||||
|
||||
defaultSettings = {
|
||||
region: '',
|
||||
voiceMap: {},
|
||||
};
|
||||
|
||||
get settingsHtml() {
|
||||
let html = `
|
||||
<div class="azure_tts_settings">
|
||||
<div class="flex-container alignItemsBaseline">
|
||||
<h4 for="azure_tts_key" class="flex1 margin0">
|
||||
<a href="https://portal.azure.com/" target="_blank">Azure TTS Key</a>
|
||||
</h4>
|
||||
<div id="azure_tts_key" class="menu_button menu_button_icon">
|
||||
<i class="fa-solid fa-key"></i>
|
||||
<span>Click to set</span>
|
||||
</div>
|
||||
</div>
|
||||
<label for="azure_tts_region">Region:</label>
|
||||
<input id="azure_tts_region" type="text" class="text_pole" placeholder="e.g. westus" />
|
||||
<hr>
|
||||
</div>
|
||||
`;
|
||||
return html;
|
||||
}
|
||||
|
||||
onSettingsChange() {
|
||||
// Update dynamically
|
||||
this.settings.region = String($('#azure_tts_region').val());
|
||||
// Reset voices
|
||||
this.voices = [];
|
||||
saveTtsProviderSettings();
|
||||
}
|
||||
|
||||
async loadSettings(settings) {
|
||||
// Populate Provider UI given input settings
|
||||
if (Object.keys(settings).length == 0) {
|
||||
console.info('Using default TTS Provider settings');
|
||||
}
|
||||
|
||||
// Only accept keys defined in defaultSettings
|
||||
this.settings = this.defaultSettings;
|
||||
|
||||
for (const key in settings) {
|
||||
if (key in this.settings) {
|
||||
this.settings[key] = settings[key];
|
||||
} else {
|
||||
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||
}
|
||||
}
|
||||
|
||||
$('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange());
|
||||
$('#azure_tts_key').toggleClass('success', secret_state[SECRET_KEYS.AZURE_TTS]);
|
||||
$('#azure_tts_key').on('click', async () => {
|
||||
const popupText = 'Azure TTS API Key';
|
||||
const savedKey = secret_state[SECRET_KEYS.AZURE_TTS] ? await findSecret(SECRET_KEYS.AZURE_TTS) : '';
|
||||
|
||||
const key = await callPopup(popupText, 'input', savedKey);
|
||||
|
||||
if (key == false || key == '') {
|
||||
return;
|
||||
}
|
||||
|
||||
await writeSecret(SECRET_KEYS.AZURE_TTS, key);
|
||||
|
||||
toastr.success('API Key saved');
|
||||
$('#azure_tts_key').addClass('success');
|
||||
await this.onRefreshClick();
|
||||
});
|
||||
|
||||
try {
|
||||
await this.checkReady();
|
||||
console.debug('Azure: Settings loaded');
|
||||
} catch {
|
||||
console.debug('Azure: Settings loaded, but not ready');
|
||||
}
|
||||
}
|
||||
|
||||
// Perform a simple readiness check by trying to fetch voiceIds
|
||||
async checkReady() {
|
||||
if (secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
await this.fetchTtsVoiceObjects();
|
||||
} else {
|
||||
this.voices = [];
|
||||
}
|
||||
}
|
||||
|
||||
async onRefreshClick() {
|
||||
await this.checkReady();
|
||||
}
|
||||
|
||||
//#################//
|
||||
// TTS Interfaces //
|
||||
//#################//
|
||||
|
||||
async getVoice(voiceName) {
|
||||
if (this.voices.length == 0) {
|
||||
this.voices = await this.fetchTtsVoiceObjects();
|
||||
}
|
||||
const match = this.voices.filter(
|
||||
voice => voice.name == voiceName,
|
||||
)[0];
|
||||
if (!match) {
|
||||
throw `TTS Voice name ${voiceName} not found`;
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
async generateTts(text, voiceId) {
|
||||
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||
return response;
|
||||
}
|
||||
|
||||
//###########//
|
||||
// API CALLS //
|
||||
//###########//
|
||||
async fetchTtsVoiceObjects() {
|
||||
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
console.warn('Azure TTS API Key not set');
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!this.settings.region) {
|
||||
console.warn('Azure TTS region not set');
|
||||
return [];
|
||||
}
|
||||
|
||||
const response = await fetch('/api/azure/list', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
region: this.settings.region,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
let responseJson = await response.json();
|
||||
responseJson = responseJson
|
||||
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||
return responseJson;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview TTS for a given voice ID.
|
||||
* @param {string} id Voice ID
|
||||
*/
|
||||
async previewTtsVoice(id) {
|
||||
this.audioElement.pause();
|
||||
this.audioElement.currentTime = 0;
|
||||
const voice = await this.getVoice(id);
|
||||
const text = getPreviewString(voice.lang);
|
||||
const response = await this.fetchTtsGeneration(text, id);
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const audio = await response.blob();
|
||||
const url = URL.createObjectURL(audio);
|
||||
this.audioElement.src = url;
|
||||
this.audioElement.play();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
|
||||
async fetchTtsGeneration(text, voiceId) {
|
||||
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||
throw new Error('Azure TTS API Key not set');
|
||||
}
|
||||
|
||||
if (!this.settings.region) {
|
||||
throw new Error('Azure TTS region not set');
|
||||
}
|
||||
|
||||
const response = await fetch('/api/azure/generate', {
|
||||
method: 'POST',
|
||||
headers: getRequestHeaders(),
|
||||
body: JSON.stringify({
|
||||
text: text,
|
||||
voice: voiceId,
|
||||
region: this.settings.region,
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
return response;
|
||||
}
|
||||
}
|
@ -13,6 +13,7 @@ import { XTTSTtsProvider } from './xtts.js';
|
||||
import { GSVITtsProvider } from './gsvi.js';
|
||||
import { AllTalkTtsProvider } from './alltalk.js';
|
||||
import { SpeechT5TtsProvider } from './speecht5.js';
|
||||
import { AzureTtsProvider } from './azure.js';
|
||||
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
|
||||
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
|
||||
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
|
||||
@ -83,6 +84,7 @@ const ttsProviders = {
|
||||
OpenAI: OpenAITtsProvider,
|
||||
AllTalk: AllTalkTtsProvider,
|
||||
SpeechT5: SpeechT5TtsProvider,
|
||||
Azure: AzureTtsProvider,
|
||||
};
|
||||
let ttsProvider;
|
||||
let ttsProviderName;
|
||||
|
@ -27,6 +27,7 @@ export const SECRET_KEYS = {
|
||||
COHERE: 'api_key_cohere',
|
||||
PERPLEXITY: 'api_key_perplexity',
|
||||
GROQ: 'api_key_groq',
|
||||
AZURE_TTS: 'api_key_azure_tts',
|
||||
};
|
||||
|
||||
const INPUT_MAP = {
|
||||
|
@ -519,6 +519,9 @@ app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt')
|
||||
// Speech (text-to-speech and speech-to-text)
|
||||
app.use('/api/speech', require('./src/endpoints/speech').router);
|
||||
|
||||
// Azure TTS
|
||||
app.use('/api/azure', require('./src/endpoints/azure').router);
|
||||
|
||||
const tavernUrl = new URL(
|
||||
(cliArguments.ssl ? 'https://' : 'http://') +
|
||||
(listen ? '0.0.0.0' : '127.0.0.1') +
|
||||
|
92
src/endpoints/azure.js
Normal file
92
src/endpoints/azure.js
Normal file
@ -0,0 +1,92 @@
|
||||
const { readSecret, SECRET_KEYS } = require('./secrets');
|
||||
const fetch = require('node-fetch').default;
|
||||
const express = require('express');
|
||||
const { jsonParser } = require('../express-common');
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.post('/list', jsonParser, async (req, res) => {
|
||||
try {
|
||||
const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS);
|
||||
|
||||
if (!key) {
|
||||
console.error('Azure TTS API Key not set');
|
||||
return res.sendStatus(403);
|
||||
}
|
||||
|
||||
const region = req.body.region;
|
||||
|
||||
if (!region) {
|
||||
console.error('Azure TTS region not set');
|
||||
return res.sendStatus(400);
|
||||
}
|
||||
|
||||
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': key,
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error('Azure Request failed', response.status, response.statusText);
|
||||
return res.sendStatus(500);
|
||||
}
|
||||
|
||||
const voices = await response.json();
|
||||
return res.json(voices);
|
||||
} catch (error) {
|
||||
console.error('Azure Request failed', error);
|
||||
return res.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
router.post('/generate', jsonParser, async (req, res) => {
|
||||
try {
|
||||
const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS);
|
||||
|
||||
if (!key) {
|
||||
console.error('Azure TTS API Key not set');
|
||||
return res.sendStatus(403);
|
||||
}
|
||||
|
||||
const { text, voice, region } = req.body;
|
||||
if (!text || !voice || !region) {
|
||||
console.error('Missing required parameters');
|
||||
return res.sendStatus(400);
|
||||
}
|
||||
|
||||
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`;
|
||||
const lang = String(voice).split('-').slice(0, 2).join('-');
|
||||
const escapedText = String(text).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${lang}'><voice xml:lang='${lang}' name='${voice}'>${escapedText}</voice></speak>`;
|
||||
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Ocp-Apim-Subscription-Key': key,
|
||||
'Content-Type': 'application/ssml+xml',
|
||||
'X-Microsoft-OutputFormat': 'ogg-48khz-16bit-mono-opus',
|
||||
},
|
||||
body: ssml,
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
console.error('Azure Request failed', response.status, response.statusText);
|
||||
return res.sendStatus(500);
|
||||
}
|
||||
|
||||
const audio = await response.buffer();
|
||||
res.set('Content-Type', 'audio/ogg');
|
||||
return res.send(audio);
|
||||
} catch (error) {
|
||||
console.error('Azure Request failed', error);
|
||||
return res.sendStatus(500);
|
||||
}
|
||||
});
|
||||
|
||||
module.exports = {
|
||||
router,
|
||||
};
|
@ -39,6 +39,7 @@ const SECRET_KEYS = {
|
||||
COHERE: 'api_key_cohere',
|
||||
PERPLEXITY: 'api_key_perplexity',
|
||||
GROQ: 'api_key_groq',
|
||||
AZURE_TTS: 'api_key_azure_tts',
|
||||
};
|
||||
|
||||
// These are the keys that are safe to expose, even if allowKeysExposure is false
|
||||
|
Loading…
x
Reference in New Issue
Block a user