mirror of
https://github.com/SillyTavern/SillyTavern.git
synced 2025-02-03 12:47:35 +01:00
Add Azure TTS service
This commit is contained in:
parent
0371bf4e9f
commit
f5fccc0387
207
public/scripts/extensions/tts/azure.js
Normal file
207
public/scripts/extensions/tts/azure.js
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
import { callPopup, getRequestHeaders } from '../../../script.js';
|
||||||
|
import { SECRET_KEYS, findSecret, secret_state, writeSecret } from '../../secrets.js';
|
||||||
|
import { getPreviewString, saveTtsProviderSettings } from './index.js';
|
||||||
|
export { AzureTtsProvider };
|
||||||
|
|
||||||
|
class AzureTtsProvider {
|
||||||
|
//########//
|
||||||
|
// Config //
|
||||||
|
//########//
|
||||||
|
|
||||||
|
settings;
|
||||||
|
voices = [];
|
||||||
|
separator = ' . ';
|
||||||
|
audioElement = document.createElement('audio');
|
||||||
|
|
||||||
|
defaultSettings = {
|
||||||
|
region: '',
|
||||||
|
voiceMap: {},
|
||||||
|
};
|
||||||
|
|
||||||
|
get settingsHtml() {
|
||||||
|
let html = `
|
||||||
|
<div class="azure_tts_settings">
|
||||||
|
<div class="flex-container alignItemsBaseline">
|
||||||
|
<h4 for="azure_tts_key" class="flex1 margin0">
|
||||||
|
<a href="https://portal.azure.com/" target="_blank">Azure TTS Key</a>
|
||||||
|
</h4>
|
||||||
|
<div id="azure_tts_key" class="menu_button menu_button_icon">
|
||||||
|
<i class="fa-solid fa-key"></i>
|
||||||
|
<span>Click to set</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<label for="azure_tts_region">Region:</label>
|
||||||
|
<input id="azure_tts_region" type="text" class="text_pole" placeholder="e.g. westus" />
|
||||||
|
<hr>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
|
onSettingsChange() {
|
||||||
|
// Update dynamically
|
||||||
|
this.settings.region = String($('#azure_tts_region').val());
|
||||||
|
// Reset voices
|
||||||
|
this.voices = [];
|
||||||
|
saveTtsProviderSettings();
|
||||||
|
}
|
||||||
|
|
||||||
|
async loadSettings(settings) {
|
||||||
|
// Populate Provider UI given input settings
|
||||||
|
if (Object.keys(settings).length == 0) {
|
||||||
|
console.info('Using default TTS Provider settings');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only accept keys defined in defaultSettings
|
||||||
|
this.settings = this.defaultSettings;
|
||||||
|
|
||||||
|
for (const key in settings) {
|
||||||
|
if (key in this.settings) {
|
||||||
|
this.settings[key] = settings[key];
|
||||||
|
} else {
|
||||||
|
throw `Invalid setting passed to TTS Provider: ${key}`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$('#azure_tts_region').val(this.settings.region).on('input', () => this.onSettingsChange());
|
||||||
|
$('#azure_tts_key').toggleClass('success', secret_state[SECRET_KEYS.AZURE_TTS]);
|
||||||
|
$('#azure_tts_key').on('click', async () => {
|
||||||
|
const popupText = 'Azure TTS API Key';
|
||||||
|
const savedKey = secret_state[SECRET_KEYS.AZURE_TTS] ? await findSecret(SECRET_KEYS.AZURE_TTS) : '';
|
||||||
|
|
||||||
|
const key = await callPopup(popupText, 'input', savedKey);
|
||||||
|
|
||||||
|
if (key == false || key == '') {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
await writeSecret(SECRET_KEYS.AZURE_TTS, key);
|
||||||
|
|
||||||
|
toastr.success('API Key saved');
|
||||||
|
$('#azure_tts_key').addClass('success');
|
||||||
|
await this.onRefreshClick();
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.checkReady();
|
||||||
|
console.debug('Azure: Settings loaded');
|
||||||
|
} catch {
|
||||||
|
console.debug('Azure: Settings loaded, but not ready');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Perform a simple readiness check by trying to fetch voiceIds
|
||||||
|
async checkReady() {
|
||||||
|
if (secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||||
|
await this.fetchTtsVoiceObjects();
|
||||||
|
} else {
|
||||||
|
this.voices = [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async onRefreshClick() {
|
||||||
|
await this.checkReady();
|
||||||
|
}
|
||||||
|
|
||||||
|
//#################//
|
||||||
|
// TTS Interfaces //
|
||||||
|
//#################//
|
||||||
|
|
||||||
|
async getVoice(voiceName) {
|
||||||
|
if (this.voices.length == 0) {
|
||||||
|
this.voices = await this.fetchTtsVoiceObjects();
|
||||||
|
}
|
||||||
|
const match = this.voices.filter(
|
||||||
|
voice => voice.name == voiceName,
|
||||||
|
)[0];
|
||||||
|
if (!match) {
|
||||||
|
throw `TTS Voice name ${voiceName} not found`;
|
||||||
|
}
|
||||||
|
return match;
|
||||||
|
}
|
||||||
|
|
||||||
|
async generateTts(text, voiceId) {
|
||||||
|
const response = await this.fetchTtsGeneration(text, voiceId);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
//###########//
|
||||||
|
// API CALLS //
|
||||||
|
//###########//
|
||||||
|
async fetchTtsVoiceObjects() {
|
||||||
|
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||||
|
console.warn('Azure TTS API Key not set');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.settings.region) {
|
||||||
|
console.warn('Azure TTS region not set');
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch('/api/azure/list', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({
|
||||||
|
region: this.settings.region,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||||
|
}
|
||||||
|
let responseJson = await response.json();
|
||||||
|
responseJson = responseJson
|
||||||
|
.sort((a, b) => a.Locale.localeCompare(b.Locale) || a.ShortName.localeCompare(b.ShortName))
|
||||||
|
.map(x => ({ name: x.ShortName, voice_id: x.ShortName, preview_url: false, lang: x.Locale }));
|
||||||
|
return responseJson;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preview TTS for a given voice ID.
|
||||||
|
* @param {string} id Voice ID
|
||||||
|
*/
|
||||||
|
async previewTtsVoice(id) {
|
||||||
|
this.audioElement.pause();
|
||||||
|
this.audioElement.currentTime = 0;
|
||||||
|
const voice = await this.getVoice(id);
|
||||||
|
const text = getPreviewString(voice.lang);
|
||||||
|
const response = await this.fetchTtsGeneration(text, id);
|
||||||
|
if (!response.ok) {
|
||||||
|
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const audio = await response.blob();
|
||||||
|
const url = URL.createObjectURL(audio);
|
||||||
|
this.audioElement.src = url;
|
||||||
|
this.audioElement.play();
|
||||||
|
URL.revokeObjectURL(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
async fetchTtsGeneration(text, voiceId) {
|
||||||
|
if (!secret_state[SECRET_KEYS.AZURE_TTS]) {
|
||||||
|
throw new Error('Azure TTS API Key not set');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this.settings.region) {
|
||||||
|
throw new Error('Azure TTS region not set');
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch('/api/azure/generate', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: getRequestHeaders(),
|
||||||
|
body: JSON.stringify({
|
||||||
|
text: text,
|
||||||
|
voice: voiceId,
|
||||||
|
region: this.settings.region,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
toastr.error(response.statusText, 'TTS Generation Failed');
|
||||||
|
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
}
|
@ -13,6 +13,7 @@ import { XTTSTtsProvider } from './xtts.js';
|
|||||||
import { GSVITtsProvider } from './gsvi.js';
|
import { GSVITtsProvider } from './gsvi.js';
|
||||||
import { AllTalkTtsProvider } from './alltalk.js';
|
import { AllTalkTtsProvider } from './alltalk.js';
|
||||||
import { SpeechT5TtsProvider } from './speecht5.js';
|
import { SpeechT5TtsProvider } from './speecht5.js';
|
||||||
|
import { AzureTtsProvider } from './azure.js';
|
||||||
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
|
import { SlashCommandParser } from '../../slash-commands/SlashCommandParser.js';
|
||||||
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
|
import { SlashCommand } from '../../slash-commands/SlashCommand.js';
|
||||||
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
|
import { ARGUMENT_TYPE, SlashCommandArgument, SlashCommandNamedArgument } from '../../slash-commands/SlashCommandArgument.js';
|
||||||
@ -83,6 +84,7 @@ const ttsProviders = {
|
|||||||
OpenAI: OpenAITtsProvider,
|
OpenAI: OpenAITtsProvider,
|
||||||
AllTalk: AllTalkTtsProvider,
|
AllTalk: AllTalkTtsProvider,
|
||||||
SpeechT5: SpeechT5TtsProvider,
|
SpeechT5: SpeechT5TtsProvider,
|
||||||
|
Azure: AzureTtsProvider,
|
||||||
};
|
};
|
||||||
let ttsProvider;
|
let ttsProvider;
|
||||||
let ttsProviderName;
|
let ttsProviderName;
|
||||||
|
@ -27,6 +27,7 @@ export const SECRET_KEYS = {
|
|||||||
COHERE: 'api_key_cohere',
|
COHERE: 'api_key_cohere',
|
||||||
PERPLEXITY: 'api_key_perplexity',
|
PERPLEXITY: 'api_key_perplexity',
|
||||||
GROQ: 'api_key_groq',
|
GROQ: 'api_key_groq',
|
||||||
|
AZURE_TTS: 'api_key_azure_tts',
|
||||||
};
|
};
|
||||||
|
|
||||||
const INPUT_MAP = {
|
const INPUT_MAP = {
|
||||||
|
@ -519,6 +519,9 @@ app.use('/api/backends/scale-alt', require('./src/endpoints/backends/scale-alt')
|
|||||||
// Speech (text-to-speech and speech-to-text)
|
// Speech (text-to-speech and speech-to-text)
|
||||||
app.use('/api/speech', require('./src/endpoints/speech').router);
|
app.use('/api/speech', require('./src/endpoints/speech').router);
|
||||||
|
|
||||||
|
// Azure TTS
|
||||||
|
app.use('/api/azure', require('./src/endpoints/azure').router);
|
||||||
|
|
||||||
const tavernUrl = new URL(
|
const tavernUrl = new URL(
|
||||||
(cliArguments.ssl ? 'https://' : 'http://') +
|
(cliArguments.ssl ? 'https://' : 'http://') +
|
||||||
(listen ? '0.0.0.0' : '127.0.0.1') +
|
(listen ? '0.0.0.0' : '127.0.0.1') +
|
||||||
|
92
src/endpoints/azure.js
Normal file
92
src/endpoints/azure.js
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
const { readSecret, SECRET_KEYS } = require('./secrets');
|
||||||
|
const fetch = require('node-fetch').default;
|
||||||
|
const express = require('express');
|
||||||
|
const { jsonParser } = require('../express-common');
|
||||||
|
|
||||||
|
const router = express.Router();
|
||||||
|
|
||||||
|
router.post('/list', jsonParser, async (req, res) => {
|
||||||
|
try {
|
||||||
|
const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS);
|
||||||
|
|
||||||
|
if (!key) {
|
||||||
|
console.error('Azure TTS API Key not set');
|
||||||
|
return res.sendStatus(403);
|
||||||
|
}
|
||||||
|
|
||||||
|
const region = req.body.region;
|
||||||
|
|
||||||
|
if (!region) {
|
||||||
|
console.error('Azure TTS region not set');
|
||||||
|
return res.sendStatus(400);
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
|
||||||
|
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Ocp-Apim-Subscription-Key': key,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
console.error('Azure Request failed', response.status, response.statusText);
|
||||||
|
return res.sendStatus(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
const voices = await response.json();
|
||||||
|
return res.json(voices);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Azure Request failed', error);
|
||||||
|
return res.sendStatus(500);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
router.post('/generate', jsonParser, async (req, res) => {
|
||||||
|
try {
|
||||||
|
const key = readSecret(req.user.directories, SECRET_KEYS.AZURE_TTS);
|
||||||
|
|
||||||
|
if (!key) {
|
||||||
|
console.error('Azure TTS API Key not set');
|
||||||
|
return res.sendStatus(403);
|
||||||
|
}
|
||||||
|
|
||||||
|
const { text, voice, region } = req.body;
|
||||||
|
if (!text || !voice || !region) {
|
||||||
|
console.error('Missing required parameters');
|
||||||
|
return res.sendStatus(400);
|
||||||
|
}
|
||||||
|
|
||||||
|
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`;
|
||||||
|
const lang = String(voice).split('-').slice(0, 2).join('-');
|
||||||
|
const escapedText = String(text).replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||||||
|
const ssml = `<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='${lang}'><voice xml:lang='${lang}' name='${voice}'>${escapedText}</voice></speak>`;
|
||||||
|
|
||||||
|
const response = await fetch(url, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Ocp-Apim-Subscription-Key': key,
|
||||||
|
'Content-Type': 'application/ssml+xml',
|
||||||
|
'X-Microsoft-OutputFormat': 'ogg-48khz-16bit-mono-opus',
|
||||||
|
},
|
||||||
|
body: ssml,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
console.error('Azure Request failed', response.status, response.statusText);
|
||||||
|
return res.sendStatus(500);
|
||||||
|
}
|
||||||
|
|
||||||
|
const audio = await response.buffer();
|
||||||
|
res.set('Content-Type', 'audio/ogg');
|
||||||
|
return res.send(audio);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Azure Request failed', error);
|
||||||
|
return res.sendStatus(500);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
router,
|
||||||
|
};
|
@ -39,6 +39,7 @@ const SECRET_KEYS = {
|
|||||||
COHERE: 'api_key_cohere',
|
COHERE: 'api_key_cohere',
|
||||||
PERPLEXITY: 'api_key_perplexity',
|
PERPLEXITY: 'api_key_perplexity',
|
||||||
GROQ: 'api_key_groq',
|
GROQ: 'api_key_groq',
|
||||||
|
AZURE_TTS: 'api_key_azure_tts',
|
||||||
};
|
};
|
||||||
|
|
||||||
// These are the keys that are safe to expose, even if allowKeysExposure is false
|
// These are the keys that are safe to expose, even if allowKeysExposure is false
|
||||||
|
Loading…
x
Reference in New Issue
Block a user