diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index 035d485c3..ebb83070d 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -5,6 +5,7 @@ import { EdgeTtsProvider } from './edge.js' import { ElevenLabsTtsProvider } from './elevenlabs.js' import { SileroTtsProvider } from './silerotts.js' import { SystemTtsProvider } from './system.js' +import { NovelTtsProvider } from './novel.js' const UPDATE_INTERVAL = 1000 @@ -62,6 +63,7 @@ let ttsProviders = { Silero: SileroTtsProvider, System: SystemTtsProvider, Edge: EdgeTtsProvider, + Novel: NovelTtsProvider, } let ttsProvider let ttsProviderName @@ -244,7 +246,7 @@ async function playAudioData(audioBlob) { window['tts_preview'] = function (id) { const audio = document.getElementById(id) - if (!$(audio).data('disabled')) { + if (audio && !$(audio).data('disabled')) { audio.play() } else { @@ -265,7 +267,9 @@ async function onTtsVoicesClick() { ${voice.name} ` - popupText += `` + if (voice.preview_url) { + popupText += `` + } } } catch { popupText = 'Could not load voices list. Check your API key.' @@ -327,7 +331,7 @@ function completeCurrentAudioJob() { */ async function addAudioJob(response) { const audioData = await response.blob() - if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave']) { + if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) { throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` } audioJobQueue.push(audioData) diff --git a/public/scripts/extensions/tts/novel.js b/public/scripts/extensions/tts/novel.js new file mode 100644 index 000000000..539c45eda --- /dev/null +++ b/public/scripts/extensions/tts/novel.js @@ -0,0 +1,130 @@ +import { getRequestHeaders } from "../../../script.js" +import { getPreviewString } from "./index.js" + +export { NovelTtsProvider } + +class NovelTtsProvider { + //########// + // Config // + //########// + + settings + voices = [] + separator = ' . ' + audioElement = document.createElement('audio') + + defaultSettings = { + voiceMap: {} + } + + get settingsHtml() { + let html = `Use NovelAI's TTS engine.
+ The Voice IDs in the preview list are only examples, as it can be any string of text. Feel free to try different options!
+ Hint: Save an API key in the NovelAI API settings to use it here.`; + return html; + } + + onSettingsChange() { + } + + loadSettings(settings) { + // Populate Provider UI given input settings + if (Object.keys(settings).length == 0) { + console.info("Using default TTS Provider settings") + } + + // Only accept keys defined in defaultSettings + this.settings = this.defaultSettings + + for (const key in settings) { + if (key in this.settings) { + this.settings[key] = settings[key] + } else { + throw `Invalid setting passed to TTS Provider: ${key}` + } + } + + console.info("Settings loaded") + } + + + async onApplyClick() { + return + } + + //#################// + // TTS Interfaces // + //#################// + + async getVoice(voiceName) { + if (!voiceName) { + throw `TTS Voice name not provided` + } + + return { name: voiceName, voice_id: voiceName, lang: 'en-US', preview_url: false} + } + + async generateTts(text, voiceId) { + const response = await this.fetchTtsGeneration(text, voiceId) + return response + } + + //###########// + // API CALLS // + //###########// + async fetchTtsVoiceIds() { + const voices = [ + { name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false }, + { name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false }, + { name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false }, + { name: 'Claea', voice_id: 'Claea', lang: 'en-US', preview_url: false }, + { name: 'Lim', voice_id: 'Lim', lang: 'en-US', preview_url: false }, + { name: 'Aurae', voice_id: 'Aurae', lang: 'en-US', preview_url: false }, + { name: 'Naia', voice_id: 'Naia', lang: 'en-US', preview_url: false }, + { name: 'Aulon', voice_id: 'Aulon', lang: 'en-US', preview_url: false }, + { name: 'Elei', voice_id: 'Elei', lang: 'en-US', preview_url: false }, + { name: 'Ogma', voice_id: 'Ogma', lang: 'en-US', preview_url: false }, + { name: 'Raid', voice_id: 'Raid', lang: 'en-US', preview_url: false }, + { name: 'Pega', voice_id: 'Pega', lang: 'en-US', preview_url: false }, + { name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false }, + ]; + + return voices; + } + + + async previewTtsVoice(id) { + this.audioElement.pause(); + this.audioElement.currentTime = 0; + + const text = getPreviewString('en-US') + const response = await this.fetchTtsGeneration(text, id) + if (!response.ok) { + throw new Error(`HTTP ${response.status}`) + } + + const audio = await response.blob(); + const url = URL.createObjectURL(audio); + this.audioElement.src = url; + this.audioElement.play(); + } + + async fetchTtsGeneration(inputText, voiceId) { + console.info(`Generating new TTS for voice_id ${voiceId}`) + const response = await fetch(`/novel_tts`, + { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + "text": inputText, + "voice": voiceId, + }) + } + ) + if (!response.ok) { + toastr.error(response.statusText, 'TTS Generation Failed'); + throw new Error(`HTTP ${response.status}`); + } + return response + } +} diff --git a/server.js b/server.js index 20133d1e2..7bc8ec65a 100644 --- a/server.js +++ b/server.js @@ -6,7 +6,7 @@ const { hideBin } = require('yargs/helpers'); const net = require("net"); // work around a node v20 bug: https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 if (net.setDefaultAutoSelectFamily) { - net.setDefaultAutoSelectFamily(false); + net.setDefaultAutoSelectFamily(false); } const cliArguments = yargs(hideBin(process.argv)) @@ -3197,6 +3197,40 @@ app.post('/google_translate', jsonParser, async (request, response) => { }); }); +app.post('/novel_tts', jsonParser, async (request, response) => { + const token = readSecret(SECRET_KEYS.NOVEL); + + if (!token) { + return response.sendStatus(401); + } + + const text = request.body.text; + const voice = request.body.voice; + + if (!text || !voice) { + return response.sendStatus(400); + } + + try { + const fetch = require('node-fetch').default; + const url = `${api_novelai}/ai/generate-voice?text=${encodeURIComponent(text)}&voice=-1&seed=${encodeURIComponent(voice)}&opus=false&version=v2`; + const result = await fetch(url, { method: 'GET', headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'audio/webm' } }); + + if (!result.ok) { + return response.sendStatus(result.status); + } + + const chunks = await readAllChunks(result.body); + const buffer = Buffer.concat(chunks); + response.setHeader('Content-Type', 'audio/webm'); + return response.send(buffer); + } + catch (error) { + console.error(error); + return response.sendStatus(500); + } +}); + app.post('/delete_sprite', jsonParser, async (request, response) => { const label = request.body.label; const name = request.body.name; @@ -3343,6 +3377,26 @@ function readSecret(key) { return secrets[key]; } +async function readAllChunks(readableStream) { + return new Promise((resolve, reject) => { + // Consume the readable stream + const chunks = []; + readableStream.on('data', (chunk) => { + chunks.push(chunk); + }); + + readableStream.on('end', () => { + console.log('Finished reading the stream.'); + resolve(chunks); + }); + + readableStream.on('error', (error) => { + console.error('Error while reading the stream:', error); + reject(); + }); + }); +} + async function getImageBuffers(zipFilePath) { return new Promise((resolve, reject) => { // Check if the zip file exists