From 6f061adc1ead8097b20bf2fbb3e85b19f51dec3d Mon Sep 17 00:00:00 2001 From: Cohee <18619528+Cohee1207@users.noreply.github.com> Date: Sun, 12 Nov 2023 02:28:03 +0200 Subject: [PATCH] Add OpenAI TTS provider --- public/scripts/extensions/tts/elevenlabs.js | 4 + public/scripts/extensions/tts/index.js | 2 + public/scripts/extensions/tts/openai.js | 148 ++++++++++++++++++++ src/openai.js | 39 ++++++ 4 files changed, 193 insertions(+) create mode 100644 public/scripts/extensions/tts/openai.js diff --git a/public/scripts/extensions/tts/elevenlabs.js b/public/scripts/extensions/tts/elevenlabs.js index 3e0c90fe2..4cb7813fe 100644 --- a/public/scripts/extensions/tts/elevenlabs.js +++ b/public/scripts/extensions/tts/elevenlabs.js @@ -45,6 +45,8 @@ class ElevenLabsTtsProvider { this.settings.stability = $('#elevenlabs_tts_stability').val() this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val() this.settings.model = $('#elevenlabs_tts_model').find(':selected').val() + $('#elevenlabs_tts_stability_output').text(this.settings.stability); + $('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost); saveTtsProviderSettings() } @@ -79,6 +81,8 @@ class ElevenLabsTtsProvider { $('#elevenlabs_tts_similarity_boost').on('input', this.onSettingsChange.bind(this)) $('#elevenlabs_tts_stability').on('input', this.onSettingsChange.bind(this)) $('#elevenlabs_tts_model').on('change', this.onSettingsChange.bind(this)) + $('#elevenlabs_tts_stability_output').text(this.settings.stability); + $('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost); try { await this.checkReady() diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 0b22753d1..2ff01190a 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -9,6 +9,7 @@ import { SystemTtsProvider } from './system.js' import { NovelTtsProvider } from './novel.js' import { power_user } from '../../power-user.js' import { registerSlashCommand } from '../../slash-commands.js' +import { OpenAITtsProvider } from './openai.js' export { talkingAnimation }; const UPDATE_INTERVAL = 1000 @@ -73,6 +74,7 @@ let ttsProviders = { Coqui: CoquiTtsProvider, Edge: EdgeTtsProvider, Novel: NovelTtsProvider, + OpenAI: OpenAITtsProvider, } let ttsProvider let ttsProviderName diff --git a/public/scripts/extensions/tts/openai.js b/public/scripts/extensions/tts/openai.js new file mode 100644 index 000000000..393a82940 --- /dev/null +++ b/public/scripts/extensions/tts/openai.js @@ -0,0 +1,148 @@ +import { getRequestHeaders } from "../../../script.js" +import { saveTtsProviderSettings } from "./index.js"; + +export { OpenAITtsProvider } + +class OpenAITtsProvider { + static voices = [ + { name: 'Alloy', voice_id: 'alloy', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav' }, + { name: 'Echo', voice_id: 'echo', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/echo.wav' }, + { name: 'Fable', voice_id: 'fable', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/fable.wav' }, + { name: 'Onyx', voice_id: 'onyx', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav' }, + { name: 'Nova', voice_id: 'nova', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/nova.wav' }, + { name: 'Shimmer', voice_id: 'shimmer', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/shimmer.wav' }, + ]; + + settings + voices = [] + separator = ' . ' + audioElement = document.createElement('audio') + + defaultSettings = { + voiceMap: {}, + customVoices: [], + model: 'tts-1', + speed: 1, + } + + get settingsHtml() { + let html = ` +
Use OpenAI's TTS engine.
+ Hint: Save an API key in the OpenAI API settings to use it here. +
+ + +
+
+ + +
`; + return html; + } + + async loadSettings(settings) { + // Populate Provider UI given input settings + if (Object.keys(settings).length == 0) { + console.info("Using default TTS Provider settings") + } + + // Only accept keys defined in defaultSettings + this.settings = this.defaultSettings; + + for (const key in settings) { + if (key in this.settings) { + this.settings[key] = settings[key]; + } else { + throw `Invalid setting passed to TTS Provider: ${key}`; + } + } + + $('#openai-tts-model').val(this.settings.model); + $('#openai-tts-model').on('change', () => { + this.onSettingsChange(); + }); + + $('#openai-tts-speed').val(this.settings.speed); + $('#openai-tts-speed').on('input', () => { + this.onSettingsChange(); + }); + + $('#openai-tts-speed-output').text(this.settings.speed); + + await this.checkReady(); + console.debug("OpenAI TTS: Settings loaded"); + } + + onSettingsChange() { + // Update dynamically + this.settings.model = String($('#openai-tts-model').find(':selected').val()); + this.settings.speed = Number($('#openai-tts-speed').val()); + $('#openai-tts-speed-output').text(this.settings.speed); + saveTtsProviderSettings(); + } + + async checkReady() { + await this.fetchTtsVoiceObjects(); + } + + async onRefreshClick() { + return; + } + + async getVoice(voiceName) { + if (!voiceName) { + throw `TTS Voice name not provided` + } + + const voice = OpenAITtsProvider.voices.find(voice => voice.voice_id === voiceName || voice.name === voiceName); + + if (!voice) { + throw `TTS Voice not found: ${voiceName}` + } + + return voice; + } + + async generateTts(text, voiceId) { + const response = await this.fetchTtsGeneration(text, voiceId) + return response + } + + async fetchTtsVoiceObjects() { + return OpenAITtsProvider.voices; + } + + async previewTtsVoice(_) { + return; + } + + async fetchTtsGeneration(inputText, voiceId) { + console.info(`Generating new TTS for voice_id ${voiceId}`) + const response = await fetch(`/api/openai/generate-voice`, { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + "text": inputText, + "voice": voiceId, + "model": this.settings.model, + "speed": this.settings.speed, + }), + }); + + if (!response.ok) { + toastr.error(response.statusText, 'TTS Generation Failed'); + throw new Error(`HTTP ${response.status}: ${await response.text()}`); + } + + return response; + } +} diff --git a/src/openai.js b/src/openai.js index aa2445ada..857c0198f 100644 --- a/src/openai.js +++ b/src/openai.js @@ -63,6 +63,45 @@ function registerEndpoints(app, jsonParser) { } }); + app.post('/api/openai/generate-voice', jsonParser, async (request, response) => { + try { + const key = readSecret(SECRET_KEYS.OPENAI); + + if (!key) { + console.log('No OpenAI key found'); + return response.sendStatus(401); + } + + const result = await fetch('https://api.openai.com/v1/audio/speech', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${key}`, + }, + body: JSON.stringify({ + input: request.body.text, + response_format: 'mp3', + voice: request.body.voice ?? 'alloy', + speed: request.body.speed ?? 1, + model: request.body.model ?? 'tts-1', + }), + }); + + if (!result.ok) { + const text = await result.text(); + console.log('OpenAI request failed', result.statusText, text); + return response.status(500).send(text); + } + + const buffer = await result.arrayBuffer(); + response.setHeader('Content-Type', 'audio/mpeg'); + return response.send(Buffer.from(buffer)); + } catch (error) { + console.error('OpenAI TTS generation failed', error); + response.status(500).send('Internal server error'); + } + }); + app.post('/api/openai/generate-image', jsonParser, async (request, response) => { try { const key = readSecret(SECRET_KEYS.OPENAI);