diff --git a/public/index.html b/public/index.html index 35927c627..7a8327cf0 100644 --- a/public/index.html +++ b/public/index.html @@ -1382,7 +1382,7 @@ You can select multiple models.
Avoid sending sensitive information to the Horde. Learn more
+ href="https://docs.sillytavern.app/usage/guidebook/#horde">Learn more @@ -1416,7 +1416,7 @@
  1. - Follow Follow these directions to get your NovelAI API key. @@ -1437,7 +1437,7 @@

    Novel AI Model - + ?

    @@ -1490,7 +1490,7 @@ @@ -1545,8 +1545,7 @@
    1. - Follow these directions - to get your 'p-b cookie' + Follow these directions to get your 'p-b cookie'
    2. Enter it in the box below:
    @@ -1736,6 +1735,8 @@ + +
    diff --git a/public/script.js b/public/script.js index 4c9a5a8cc..b1562b462 100644 --- a/public/script.js +++ b/public/script.js @@ -481,22 +481,33 @@ function getTokenCount(str, padding = undefined) { case tokenizers.CLASSIC: return encode(str).length + padding; case tokenizers.LLAMA: - let tokenCount = 0; - jQuery.ajax({ - async: false, - type: 'POST', // - url: `/tokenize_llama`, - data: JSON.stringify({ text: str }), - dataType: "json", - contentType: "application/json", - success: function (data) { - tokenCount = data.count; - } - }); - return tokenCount + padding; + return countTokensRemote('/tokenize_llama', str, padding); + case tokenizers.NERD: + return countTokensRemote('/tokenize_nerdstash', str, padding); + case tokenizers.NERD2: + return countTokensRemote('/tokenize_nerdstash_v2', str, padding); + default: + console.warn("Unknown tokenizer type", tokenizerType); + return Math.ceil(str.length / CHARACTERS_PER_TOKEN_RATIO) + padding; } } +function countTokensRemote(endpoint, str, padding) { + let tokenCount = 0; + jQuery.ajax({ + async: false, + type: 'POST', + url: endpoint, + data: JSON.stringify({ text: str }), + dataType: "json", + contentType: "application/json", + success: function (data) { + tokenCount = data.count; + } + }); + return tokenCount + padding; +} + function reloadMarkdownProcessor(render_formulas = false) { if (render_formulas) { converter = new showdown.Converter({ @@ -2589,12 +2600,14 @@ function getMaxContextSize() { } else { this_max_context = Number(max_context); if (nai_settings.model_novel == 'krake-v2') { - this_max_context -= 160; + // Krake has a max context of 2048 + // Should be used with nerdstash tokenizer for best results + this_max_context = Math.min(max_context, 2048); } if (nai_settings.model_novel == 'clio-v1') { // Clio has a max context of 8192 - // TODO: Evaluate the relevance of nerdstash-v1 tokenizer, changes quite a bit. - this_max_context = 8192 - 60 - 160; + // Should be used with nerdstash_v2 tokenizer for best results + this_max_context = Math.min(max_context, 8192); } } } diff --git a/public/scripts/extensions/tts/index.js b/public/scripts/extensions/tts/index.js index bd7b32bcd..d06712851 100644 --- a/public/scripts/extensions/tts/index.js +++ b/public/scripts/extensions/tts/index.js @@ -5,6 +5,7 @@ import { EdgeTtsProvider } from './edge.js' import { ElevenLabsTtsProvider } from './elevenlabs.js' import { SileroTtsProvider } from './silerotts.js' import { SystemTtsProvider } from './system.js' +import { NovelTtsProvider } from './novel.js' const UPDATE_INTERVAL = 1000 @@ -62,6 +63,7 @@ let ttsProviders = { Silero: SileroTtsProvider, System: SystemTtsProvider, Edge: EdgeTtsProvider, + Novel: NovelTtsProvider, } let ttsProvider let ttsProviderName @@ -244,7 +246,7 @@ async function playAudioData(audioBlob) { window['tts_preview'] = function (id) { const audio = document.getElementById(id) - if (!$(audio).data('disabled')) { + if (audio && !$(audio).data('disabled')) { audio.play() } else { @@ -265,7 +267,9 @@ async function onTtsVoicesClick() { ${voice.name}
    ` - popupText += `` + if (voice.preview_url) { + popupText += `` + } } } catch { popupText = 'Could not load voices list. Check your API key.' @@ -327,7 +331,7 @@ function completeCurrentAudioJob() { */ async function addAudioJob(response) { const audioData = await response.blob() - if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave']) { + if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) { throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` } audioJobQueue.push(audioData) diff --git a/public/scripts/extensions/tts/novel.js b/public/scripts/extensions/tts/novel.js new file mode 100644 index 000000000..539c45eda --- /dev/null +++ b/public/scripts/extensions/tts/novel.js @@ -0,0 +1,130 @@ +import { getRequestHeaders } from "../../../script.js" +import { getPreviewString } from "./index.js" + +export { NovelTtsProvider } + +class NovelTtsProvider { + //########// + // Config // + //########// + + settings + voices = [] + separator = ' . ' + audioElement = document.createElement('audio') + + defaultSettings = { + voiceMap: {} + } + + get settingsHtml() { + let html = `Use NovelAI's TTS engine.
    + The Voice IDs in the preview list are only examples, as it can be any string of text. Feel free to try different options!
    + Hint: Save an API key in the NovelAI API settings to use it here.`; + return html; + } + + onSettingsChange() { + } + + loadSettings(settings) { + // Populate Provider UI given input settings + if (Object.keys(settings).length == 0) { + console.info("Using default TTS Provider settings") + } + + // Only accept keys defined in defaultSettings + this.settings = this.defaultSettings + + for (const key in settings) { + if (key in this.settings) { + this.settings[key] = settings[key] + } else { + throw `Invalid setting passed to TTS Provider: ${key}` + } + } + + console.info("Settings loaded") + } + + + async onApplyClick() { + return + } + + //#################// + // TTS Interfaces // + //#################// + + async getVoice(voiceName) { + if (!voiceName) { + throw `TTS Voice name not provided` + } + + return { name: voiceName, voice_id: voiceName, lang: 'en-US', preview_url: false} + } + + async generateTts(text, voiceId) { + const response = await this.fetchTtsGeneration(text, voiceId) + return response + } + + //###########// + // API CALLS // + //###########// + async fetchTtsVoiceIds() { + const voices = [ + { name: 'Ligeia', voice_id: 'Ligeia', lang: 'en-US', preview_url: false }, + { name: 'Aini', voice_id: 'Aini', lang: 'en-US', preview_url: false }, + { name: 'Orea', voice_id: 'Orea', lang: 'en-US', preview_url: false }, + { name: 'Claea', voice_id: 'Claea', lang: 'en-US', preview_url: false }, + { name: 'Lim', voice_id: 'Lim', lang: 'en-US', preview_url: false }, + { name: 'Aurae', voice_id: 'Aurae', lang: 'en-US', preview_url: false }, + { name: 'Naia', voice_id: 'Naia', lang: 'en-US', preview_url: false }, + { name: 'Aulon', voice_id: 'Aulon', lang: 'en-US', preview_url: false }, + { name: 'Elei', voice_id: 'Elei', lang: 'en-US', preview_url: false }, + { name: 'Ogma', voice_id: 'Ogma', lang: 'en-US', preview_url: false }, + { name: 'Raid', voice_id: 'Raid', lang: 'en-US', preview_url: false }, + { name: 'Pega', voice_id: 'Pega', lang: 'en-US', preview_url: false }, + { name: 'Lam', voice_id: 'Lam', lang: 'en-US', preview_url: false }, + ]; + + return voices; + } + + + async previewTtsVoice(id) { + this.audioElement.pause(); + this.audioElement.currentTime = 0; + + const text = getPreviewString('en-US') + const response = await this.fetchTtsGeneration(text, id) + if (!response.ok) { + throw new Error(`HTTP ${response.status}`) + } + + const audio = await response.blob(); + const url = URL.createObjectURL(audio); + this.audioElement.src = url; + this.audioElement.play(); + } + + async fetchTtsGeneration(inputText, voiceId) { + console.info(`Generating new TTS for voice_id ${voiceId}`) + const response = await fetch(`/novel_tts`, + { + method: 'POST', + headers: getRequestHeaders(), + body: JSON.stringify({ + "text": inputText, + "voice": voiceId, + }) + } + ) + if (!response.ok) { + toastr.error(response.statusText, 'TTS Generation Failed'); + throw new Error(`HTTP ${response.status}`); + } + return response + } +} diff --git a/public/scripts/power-user.js b/public/scripts/power-user.js index f2c701c5c..9a97f2a08 100644 --- a/public/scripts/power-user.js +++ b/public/scripts/power-user.js @@ -60,6 +60,8 @@ const tokenizers = { GPT3: 1, CLASSIC: 2, LLAMA: 3, + NERD: 4, + NERD2: 5, } const send_on_enter_options = { diff --git a/server.js b/server.js index 20133d1e2..3b3d1782a 100644 --- a/server.js +++ b/server.js @@ -6,7 +6,7 @@ const { hideBin } = require('yargs/helpers'); const net = require("net"); // work around a node v20 bug: https://github.com/nodejs/node/issues/47822#issuecomment-1564708870 if (net.setDefaultAutoSelectFamily) { - net.setDefaultAutoSelectFamily(false); + net.setDefaultAutoSelectFamily(false); } const cliArguments = yargs(hideBin(process.argv)) @@ -128,23 +128,25 @@ const delay = ms => new Promise(resolve => setTimeout(resolve, ms)) const { SentencePieceProcessor, cleanText } = require("sentencepiece-js"); -let spp; +let spp_llama; +let spp_nerd; +let spp_nerd_v2; -async function loadSentencepieceTokenizer() { +async function loadSentencepieceTokenizer(modelPath) { try { const spp = new SentencePieceProcessor(); - await spp.load("src/sentencepiece/tokenizer.model"); + await spp.load(modelPath); return spp; } catch (error) { - console.error("Sentencepiece tokenizer failed to load."); + console.error("Sentencepiece tokenizer failed to load: " + modelPath, error); return null; } }; -async function countTokensLlama(text) { +async function countSentencepieceTokens(spp, text) { // Fallback to strlen estimation if (!spp) { - return Math.ceil(v.length / 3.35); + return Math.ceil(text.length / 3.35); } let cleaned = cleanText(text); @@ -2795,14 +2797,22 @@ app.post("/savepreset_openai", jsonParser, function (request, response) { return response.send({ name }); }); -app.post("/tokenize_llama", jsonParser, async function (request, response) { - if (!request.body) { - return response.sendStatus(400); - } +function createTokenizationHandler(getTokenizerFn) { + return async function (request, response) { + if (!request.body) { + return response.sendStatus(400); + } - const count = await countTokensLlama(request.body.text); - return response.send({ count }); -}); + const text = request.body.text || ''; + const tokenizer = getTokenizerFn(); + const count = await countSentencepieceTokens(tokenizer, text); + return response.send({ count }); + }; +} + +app.post("/tokenize_llama", jsonParser, createTokenizationHandler(() => spp_llama)); +app.post("/tokenize_nerdstash", jsonParser, createTokenizationHandler(() => spp_nerd)); +app.post("/tokenize_nerdstash_v2", jsonParser, createTokenizationHandler(() => spp_nerd_v2)); // ** REST CLIENT ASYNC WRAPPERS ** @@ -2861,7 +2871,11 @@ const setupTasks = async function () { // Colab users could run the embedded tool if (!is_colab) await convertWebp(); - spp = await loadSentencepieceTokenizer(); + [spp_llama, spp_nerd, spp_nerd_v2] = await Promise.all([ + loadSentencepieceTokenizer('src/sentencepiece/tokenizer.model'), + loadSentencepieceTokenizer('src/sentencepiece/nerdstash.model'), + loadSentencepieceTokenizer('src/sentencepiece/nerdstash_v2.model'), + ]); console.log('Launching...'); @@ -3197,6 +3211,40 @@ app.post('/google_translate', jsonParser, async (request, response) => { }); }); +app.post('/novel_tts', jsonParser, async (request, response) => { + const token = readSecret(SECRET_KEYS.NOVEL); + + if (!token) { + return response.sendStatus(401); + } + + const text = request.body.text; + const voice = request.body.voice; + + if (!text || !voice) { + return response.sendStatus(400); + } + + try { + const fetch = require('node-fetch').default; + const url = `${api_novelai}/ai/generate-voice?text=${encodeURIComponent(text)}&voice=-1&seed=${encodeURIComponent(voice)}&opus=false&version=v2`; + const result = await fetch(url, { method: 'GET', headers: { 'Authorization': `Bearer ${token}`, 'Accept': 'audio/webm' } }); + + if (!result.ok) { + return response.sendStatus(result.status); + } + + const chunks = await readAllChunks(result.body); + const buffer = Buffer.concat(chunks); + response.setHeader('Content-Type', 'audio/webm'); + return response.send(buffer); + } + catch (error) { + console.error(error); + return response.sendStatus(500); + } +}); + app.post('/delete_sprite', jsonParser, async (request, response) => { const label = request.body.label; const name = request.body.name; @@ -3343,6 +3391,26 @@ function readSecret(key) { return secrets[key]; } +async function readAllChunks(readableStream) { + return new Promise((resolve, reject) => { + // Consume the readable stream + const chunks = []; + readableStream.on('data', (chunk) => { + chunks.push(chunk); + }); + + readableStream.on('end', () => { + console.log('Finished reading the stream.'); + resolve(chunks); + }); + + readableStream.on('error', (error) => { + console.error('Error while reading the stream:', error); + reject(); + }); + }); +} + async function getImageBuffers(zipFilePath) { return new Promise((resolve, reject) => { // Check if the zip file exists diff --git a/src/sentencepiece/nerdstash.model b/src/sentencepiece/nerdstash.model new file mode 100644 index 000000000..b95958a4c Binary files /dev/null and b/src/sentencepiece/nerdstash.model differ diff --git a/src/sentencepiece/nerdstash_v2.model b/src/sentencepiece/nerdstash_v2.model new file mode 100644 index 000000000..ec2453194 Binary files /dev/null and b/src/sentencepiece/nerdstash_v2.model differ