Merge branch 'staging' into silerott-add-session-handling

This commit is contained in:
Cohee
2023-11-28 18:25:31 +02:00
118 changed files with 13535 additions and 3921 deletions

View File

@ -45,6 +45,8 @@ class ElevenLabsTtsProvider {
this.settings.stability = $('#elevenlabs_tts_stability').val()
this.settings.similarity_boost = $('#elevenlabs_tts_similarity_boost').val()
this.settings.model = $('#elevenlabs_tts_model').find(':selected').val()
$('#elevenlabs_tts_stability_output').text(this.settings.stability);
$('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost);
saveTtsProviderSettings()
}
@ -79,6 +81,8 @@ class ElevenLabsTtsProvider {
$('#elevenlabs_tts_similarity_boost').on('input', this.onSettingsChange.bind(this))
$('#elevenlabs_tts_stability').on('input', this.onSettingsChange.bind(this))
$('#elevenlabs_tts_model').on('change', this.onSettingsChange.bind(this))
$('#elevenlabs_tts_stability_output').text(this.settings.stability);
$('#elevenlabs_tts_similarity_boost_output').text(this.settings.similarity_boost);
try {
await this.checkReady()

View File

@ -1,4 +1,4 @@
import { callPopup, cancelTtsPlay, eventSource, event_types, saveSettingsDebounced } from '../../../script.js'
import { callPopup, cancelTtsPlay, eventSource, event_types, name2, saveSettingsDebounced } from '../../../script.js'
import { ModuleWorkerWrapper, doExtrasFetch, extension_settings, getApiUrl, getContext, modules } from '../../extensions.js'
import { escapeRegex, getStringHash } from '../../utils.js'
import { EdgeTtsProvider } from './edge.js'
@ -8,6 +8,9 @@ import { CoquiTtsProvider } from './coqui.js'
import { SystemTtsProvider } from './system.js'
import { NovelTtsProvider } from './novel.js'
import { power_user } from '../../power-user.js'
import { registerSlashCommand } from '../../slash-commands.js'
import { OpenAITtsProvider } from './openai.js'
import {XTTSTtsProvider} from "./xtts.js"
export { talkingAnimation };
const UPDATE_INTERVAL = 1000
@ -68,14 +71,18 @@ export function getPreviewString(lang) {
let ttsProviders = {
ElevenLabs: ElevenLabsTtsProvider,
Silero: SileroTtsProvider,
XTTSv2: XTTSTtsProvider,
System: SystemTtsProvider,
Coqui: CoquiTtsProvider,
Edge: EdgeTtsProvider,
Novel: NovelTtsProvider,
OpenAI: OpenAITtsProvider,
}
let ttsProvider
let ttsProviderName
let ttsLastMessage = null;
async function onNarrateOneMessage() {
audioElement.src = '/sounds/silence.mp3';
const context = getContext();
@ -91,6 +98,36 @@ async function onNarrateOneMessage() {
moduleWorker();
}
async function onNarrateText(args, text) {
if (!text) {
return;
}
audioElement.src = '/sounds/silence.mp3';
// To load all characters in the voice map, set unrestricted to true
await initVoiceMap(true);
const baseName = args?.voice || name2;
const name = (baseName === 'SillyTavern System' ? DEFAULT_VOICE_MARKER : baseName) || DEFAULT_VOICE_MARKER;
const voiceMapEntry = voiceMap[name] === DEFAULT_VOICE_MARKER
? voiceMap[DEFAULT_VOICE_MARKER]
: voiceMap[name];
if (!voiceMapEntry || voiceMapEntry === DISABLED_VOICE_MARKER) {
toastr.info(`Specified voice for ${name} was not found. Check the TTS extension settings.`);
return;
}
resetTtsPlayback()
ttsJobQueue.push({ mes: text, name: name });
await moduleWorker();
// Return back to the chat voices
await initVoiceMap(false);
}
async function moduleWorker() {
// Primarily determining when to add new chat to the TTS queue
const enabled = $('#tts_enabled').is(':checked')
@ -122,30 +159,53 @@ async function moduleWorker() {
) {
currentMessageNumber = context.chat.length ? context.chat.length : 0
saveLastValues()
// Force to speak on the first message in the new chat
if (context.chat.length === 1) {
lastMessageHash = -1;
}
return
}
// take the count of messages
let lastMessageNumber = context.chat.length ? context.chat.length : 0
let lastMessageNumber = context.chat.length ? context.chat.length : 0;
// There's no new messages
let diff = lastMessageNumber - currentMessageNumber
let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '')
let diff = lastMessageNumber - currentMessageNumber;
let hashNew = getStringHash((chat.length && chat[chat.length - 1].mes) ?? '');
if (diff == 0 && hashNew === lastMessageHash) {
return
// if messages got deleted, diff will be < 0
if (diff < 0) {
// necessary actions will be taken by the onChatDeleted() handler
return;
}
const message = chat[chat.length - 1]
// if no new messages, or same message, or same message hash, do nothing
if (diff == 0 && hashNew === lastMessageHash) {
return;
}
// We're currently swiping or streaming. Don't generate voice
if (
!message ||
message.mes === '...' ||
message.mes === '' ||
(context.streamingProcessor && !context.streamingProcessor.isFinished)
) {
return
// If streaming, wait for streaming to finish before processing new messages
if (context.streamingProcessor && !context.streamingProcessor.isFinished) {
return;
}
// clone message object, as things go haywire if message object is altered below (it's passed by reference)
const message = structuredClone(chat[chat.length - 1]);
// if last message within current message, message got extended. only send diff to TTS.
if (ttsLastMessage !== null && message.mes.indexOf(ttsLastMessage) !== -1) {
let tmp = message.mes;
message.mes = message.mes.replace(ttsLastMessage, '');
ttsLastMessage = tmp;
} else {
ttsLastMessage = message.mes;
}
// We're currently swiping. Don't generate voice
if (!message || message.mes === '...' || message.mes === '') {
return;
}
// Don't generate if message doesn't have a display text
@ -246,6 +306,7 @@ window.debugTtsPlayback = debugTtsPlayback
//##################//
let audioElement = new Audio()
audioElement.id = 'tts_audio'
audioElement.autoplay = true
let audioJobQueue = []
@ -396,7 +457,7 @@ let currentTtsJob // Null if nothing is currently being processed
let currentMessageNumber = 0
function completeTtsJob() {
console.info(`Current TTS job for ${currentTtsJob.name} completed.`)
console.info(`Current TTS job for ${currentTtsJob?.name} completed.`)
currentTtsJob = null
}
@ -441,6 +502,14 @@ async function processTtsQueue() {
const partJoiner = (ttsProvider?.separator || ' ... ');
text = matches ? matches.join(partJoiner) : text;
}
if (typeof ttsProvider?.processText === 'function') {
text = await ttsProvider.processText(text);
}
// Collapse newlines and spaces into single space
text = text.replace(/\s+/g, ' ').trim();
console.log(`TTS: ${text}`)
const char = currentTtsJob.name
@ -628,12 +697,44 @@ export function saveTtsProviderSettings() {
async function onChatChanged() {
await resetTtsPlayback()
await initVoiceMap()
ttsLastMessage = null
}
function getCharacters(){
async function onChatDeleted() {
const context = getContext()
// update internal references to new last message
lastChatId = context.chatId
currentMessageNumber = context.chat.length ? context.chat.length : 0
// compare against lastMessageHash. If it's the same, we did not delete the last chat item, so no need to reset tts queue
let messageHash = getStringHash((context.chat.length && context.chat[context.chat.length - 1].mes) ?? '')
if (messageHash === lastMessageHash) {
return
}
lastMessageHash = messageHash
ttsLastMessage = (context.chat.length && context.chat[context.chat.length - 1].mes) ?? '';
// stop any tts playback since message might not exist anymore
await resetTtsPlayback()
}
/**
* Get characters in current chat
* @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat.
* @returns {string[]} - Array of character names
*/
function getCharacters(unrestricted) {
const context = getContext()
if (unrestricted) {
const names = context.characters.map(char => char.name);
names.unshift(DEFAULT_VOICE_MARKER);
return names;
}
let characters = []
if (context.groupId === null){
if (context.groupId === null) {
// Single char chat
characters.push(DEFAULT_VOICE_MARKER)
characters.push(context.name1)
@ -645,7 +746,7 @@ function getCharacters(){
const group = context.groups.find(group => context.groupId == group.id)
for (let member of group.members) {
// Remove suffix
if (member.endsWith('.png')){
if (member.endsWith('.png')) {
member = member.slice(0, -4)
}
characters.push(member)
@ -655,15 +756,15 @@ function getCharacters(){
}
function sanitizeId(input) {
// Remove any non-alphanumeric characters except underscore (_) and hyphen (-)
let sanitized = input.replace(/[^a-zA-Z0-9-_]/g, '');
// Remove any non-alphanumeric characters except underscore (_) and hyphen (-)
let sanitized = input.replace(/[^a-zA-Z0-9-_]/g, '');
// Ensure first character is always a letter
if (!/^[a-zA-Z]/.test(sanitized)) {
sanitized = 'element_' + sanitized;
}
// Ensure first character is always a letter
if (!/^[a-zA-Z]/.test(sanitized)) {
sanitized = 'element_' + sanitized;
}
return sanitized;
return sanitized;
}
function parseVoiceMap(voiceMapString) {
@ -685,13 +786,13 @@ function parseVoiceMap(voiceMapString) {
*/
function updateVoiceMap() {
const tempVoiceMap = {}
for (const voice of voiceMapEntries){
if (voice.voiceId === null){
for (const voice of voiceMapEntries) {
if (voice.voiceId === null) {
continue
}
tempVoiceMap[voice.name] = voice.voiceId
}
if (Object.keys(tempVoiceMap).length !== 0){
if (Object.keys(tempVoiceMap).length !== 0) {
voiceMap = tempVoiceMap
console.log(`Voicemap updated to ${JSON.stringify(voiceMap)}`)
}
@ -706,13 +807,13 @@ class VoiceMapEntry {
name
voiceId
selectElement
constructor (name, voiceId=DEFAULT_VOICE_MARKER) {
constructor(name, voiceId = DEFAULT_VOICE_MARKER) {
this.name = name
this.voiceId = voiceId
this.selectElement = null
}
addUI(voiceIds){
addUI(voiceIds) {
let sanitizedName = sanitizeId(this.name)
let defaultOption = this.name === DEFAULT_VOICE_MARKER ?
`<option>${DISABLED_VOICE_MARKER}</option>` :
@ -728,7 +829,7 @@ class VoiceMapEntry {
$('#tts_voicemap_block').append(template)
// Populate voice ID select list
for (const voiceId of voiceIds){
for (const voiceId of voiceIds) {
const option = document.createElement('option');
option.innerText = voiceId.name;
option.value = voiceId.name;
@ -748,12 +849,12 @@ class VoiceMapEntry {
/**
* Init voiceMapEntries for character select list.
*
* @param {boolean} unrestricted - If true, will include all characters in voiceMapEntries, even if they are not in the current chat.
*/
export async function initVoiceMap(){
export async function initVoiceMap(unrestricted = false) {
// Gate initialization if not enabled or TTS Provider not ready. Prevents error popups.
const enabled = $('#tts_enabled').is(':checked')
if (!enabled){
if (!enabled) {
return
}
@ -771,18 +872,18 @@ export async function initVoiceMap(){
// Clear existing voiceMap state
$('#tts_voicemap_block').empty()
voiceMapEntries = []
// Get characters in current chat
const characters = getCharacters()
const characters = getCharacters(unrestricted);
// Get saved voicemap from provider settings, handling new and old representations
let voiceMapFromSettings = {}
if ("voiceMap" in extension_settings.tts[ttsProviderName]) {
// Handle previous representation
if (typeof extension_settings.tts[ttsProviderName].voiceMap === "string"){
if (typeof extension_settings.tts[ttsProviderName].voiceMap === "string") {
voiceMapFromSettings = parseVoiceMap(extension_settings.tts[ttsProviderName].voiceMap)
// Handle new representation
} else if (typeof extension_settings.tts[ttsProviderName].voiceMap === "object"){
// Handle new representation
} else if (typeof extension_settings.tts[ttsProviderName].voiceMap === "object") {
voiceMapFromSettings = extension_settings.tts[ttsProviderName].voiceMap
}
}
@ -797,13 +898,13 @@ export async function initVoiceMap(){
}
// Build UI using VoiceMapEntry objects
for (const character of characters){
if (character === "SillyTavern System"){
for (const character of characters) {
if (character === "SillyTavern System") {
continue
}
// Check provider settings for voiceIds
let voiceId
if (character in voiceMapFromSettings){
if (character in voiceMapFromSettings) {
voiceId = voiceMapFromSettings[character]
} else if (character === DEFAULT_VOICE_MARKER) {
voiceId = DISABLED_VOICE_MARKER
@ -897,5 +998,8 @@ $(document).ready(function () {
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL) // Init depends on all the things
eventSource.on(event_types.MESSAGE_SWIPED, resetTtsPlayback);
eventSource.on(event_types.CHAT_CHANGED, onChatChanged)
eventSource.on(event_types.MESSAGE_DELETED, onChatDeleted);
eventSource.on(event_types.GROUP_UPDATED, onChatChanged)
registerSlashCommand('speak', onNarrateText, ['narrate', 'tts'], `<span class="monospace">(text)</span> narrate any text using currently selected character's voice. Use voice="Character Name" argument to set other voice from the voice map, example: <tt>/speak voice="Donald Duck" Quack!</tt>`, true, true);
document.body.appendChild(audioElement);
})

View File

@ -19,6 +19,17 @@ class NovelTtsProvider {
customVoices: []
}
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
// Novel reads tilde as a word. Replace with full stop
text = text.replace(/~/g, '.');
return text;
}
get settingsHtml() {
let html = `
<div class="novel_tts_hints">

View File

@ -0,0 +1,148 @@
import { getRequestHeaders } from "../../../script.js"
import { saveTtsProviderSettings } from "./index.js";
export { OpenAITtsProvider }
class OpenAITtsProvider {
static voices = [
{ name: 'Alloy', voice_id: 'alloy', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/alloy.wav' },
{ name: 'Echo', voice_id: 'echo', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/echo.wav' },
{ name: 'Fable', voice_id: 'fable', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/fable.wav' },
{ name: 'Onyx', voice_id: 'onyx', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/onyx.wav' },
{ name: 'Nova', voice_id: 'nova', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/nova.wav' },
{ name: 'Shimmer', voice_id: 'shimmer', lang: 'en-US', preview_url: 'https://cdn.openai.com/API/docs/audio/shimmer.wav' },
];
settings
voices = []
separator = ' . '
audioElement = document.createElement('audio')
defaultSettings = {
voiceMap: {},
customVoices: [],
model: 'tts-1',
speed: 1,
}
get settingsHtml() {
let html = `
<div>Use OpenAI's TTS engine.</div>
<small>Hint: Save an API key in the OpenAI API settings to use it here.</small>
<div>
<label for="openai-tts-model">Model:</label>
<select id="openai-tts-model">
<optgroup label="Latest">
<option value="tts-1">tts-1</option>
<option value="tts-1-hd">tts-1-hd</option>
</optgroup>
<optgroup label="Snapshots">
<option value="tts-1-1106">tts-1-1106</option>
<option value="tts-1-hd-1106">tts-1-hd-1106</option>
</optgroup>
<select>
</div>
<div>
<label for="openai-tts-speed">Speed: <span id="openai-tts-speed-output"></span></label>
<input type="range" id="openai-tts-speed" value="1" min="0.25" max="4" step="0.25">
</div>`;
return html;
}
async loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings")
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings;
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key];
} else {
throw `Invalid setting passed to TTS Provider: ${key}`;
}
}
$('#openai-tts-model').val(this.settings.model);
$('#openai-tts-model').on('change', () => {
this.onSettingsChange();
});
$('#openai-tts-speed').val(this.settings.speed);
$('#openai-tts-speed').on('input', () => {
this.onSettingsChange();
});
$('#openai-tts-speed-output').text(this.settings.speed);
await this.checkReady();
console.debug("OpenAI TTS: Settings loaded");
}
onSettingsChange() {
// Update dynamically
this.settings.model = String($('#openai-tts-model').find(':selected').val());
this.settings.speed = Number($('#openai-tts-speed').val());
$('#openai-tts-speed-output').text(this.settings.speed);
saveTtsProviderSettings();
}
async checkReady() {
await this.fetchTtsVoiceObjects();
}
async onRefreshClick() {
return;
}
async getVoice(voiceName) {
if (!voiceName) {
throw `TTS Voice name not provided`
}
const voice = OpenAITtsProvider.voices.find(voice => voice.voice_id === voiceName || voice.name === voiceName);
if (!voice) {
throw `TTS Voice not found: ${voiceName}`
}
return voice;
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId)
return response
}
async fetchTtsVoiceObjects() {
return OpenAITtsProvider.voices;
}
async previewTtsVoice(_) {
return;
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`)
const response = await fetch(`/api/openai/generate-voice`, {
method: 'POST',
headers: getRequestHeaders(),
body: JSON.stringify({
"text": inputText,
"voice": voiceId,
"model": this.settings.model,
"speed": this.settings.speed,
}),
});
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response;
}
}

View File

@ -1,8 +1,8 @@
# Provider Requirements.
# Provider Requirements.
Because I don't know how, or if you can, and/or maybe I am just too lazy to implement interfaces in JS, here's the requirements of a provider that the extension needs to operate.
### class YourTtsProvider
#### Required
#### Required
Exported for use in extension index.js, and added to providers list in index.js
1. generateTts(text, voiceId)
2. fetchTtsVoiceObjects()
@ -13,8 +13,9 @@ Exported for use in extension index.js, and added to providers list in index.js
7. settingsHtml field
#### Optional
1. previewTtsVoice()
1. previewTtsVoice()
2. separator field
3. processText(text)
# Requirement Descriptions
### generateTts(text, voiceId)
@ -49,14 +50,14 @@ Return without error to let TTS extension know that the provider is ready.
Return an error to block the main TTS extension for initializing the provider and UI. The error will be put in the TTS extension UI directly.
### loadSettings(settingsObject)
Required.
Required.
Handle the input settings from the TTS extension on provider load.
Put code in here to load your provider settings.
### settings field
Required, used for storing any provider state that needs to be saved.
Anything stored in this field is automatically persisted under extension_settings[providerName] by the main extension in `saveTtsProviderSettings()`, as well as loaded when the provider is selected in `loadTtsProvider(provider)`.
TTS extension doesn't expect any specific contents.
TTS extension doesn't expect any specific contents.
### settingsHtml field
Required, injected into the TTS extension UI. Besides adding it, not relied on by TTS extension directly.
@ -68,4 +69,8 @@ Function to handle playing previews of voice samples if no direct preview_url is
### separator field
Optional.
Used when narrate quoted text is enabled.
Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...`
Defines the string of characters used to introduce separation between between the groups of extracted quoted text sent to the provider. The provider will use this to introduce pauses by default using `...`
### processText(text)
Optional.
A function applied to the input text before passing it to the TTS generator. Can be async.

View File

@ -146,8 +146,8 @@ class SystemTtsProvider {
$('#system_tts_pitch').val(this.settings.pitch || this.defaultSettings.pitch);
// Trigger updates
$('#system_tts_rate').on("input", () =>{this.onSettingsChange()})
$('#system_tts_rate').on("input", () => {this.onSettingsChange()})
$('#system_tts_rate').on("input", () => { this.onSettingsChange() })
$('#system_tts_rate').on("input", () => { this.onSettingsChange() })
$('#system_tts_pitch_output').text(this.settings.pitch);
$('#system_tts_rate_output').text(this.settings.rate);
@ -155,7 +155,7 @@ class SystemTtsProvider {
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady(){
async checkReady() {
await this.fetchTtsVoiceObjects()
}
@ -171,10 +171,16 @@ class SystemTtsProvider {
return [];
}
return speechSynthesis
.getVoices()
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
return new Promise((resolve) => {
setTimeout(() => {
const voices = speechSynthesis
.getVoices()
.sort((a, b) => a.lang.localeCompare(b.lang) || a.name.localeCompare(b.name))
.map(x => ({ name: x.name, voice_id: x.voiceURI, preview_url: false, lang: x.lang }));
resolve(voices);
}, 1);
});
}
previewTtsVoice(voiceId) {

View File

@ -0,0 +1,207 @@
import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
import { saveTtsProviderSettings } from "./index.js"
export { XTTSTtsProvider }
class XTTSTtsProvider {
//########//
// Config //
//########//
settings
ready = false
voices = []
separator = '. '
/**
* Perform any text processing before passing to TTS engine.
* @param {string} text Input text
* @returns {string} Processed text
*/
processText(text) {
// Replace fancy ellipsis with "..."
text = text.replace(/…/g, '...');
// Remove quotes
text = text.replace(/["“”‘’]/g, '');
// Replace multiple "." with single "."
text = text.replace(/\.+/g, '.');
return text;
}
languageLabels = {
"Arabic": "ar",
"Brazilian Portuguese": "pt",
"Chinese": "zh-cn",
"Czech": "cs",
"Dutch": "nl",
"English": "en",
"French": "fr",
"German": "de",
"Italian": "it",
"Polish": "pl",
"Russian": "ru",
"Spanish": "es",
"Turkish": "tr",
"Japanese": "ja",
"Korean": "ko",
"Hungarian": "hu",
"Hindi": "hi",
}
defaultSettings = {
provider_endpoint: "http://localhost:8020",
language: "en",
voiceMap: {}
}
get settingsHtml() {
let html = `
<label for="xtts_api_language">Language</label>
<select id="xtts_api_language">`;
for (let language in this.languageLabels) {
if (this.languageLabels[language] == this.settings?.language) {
html += `<option value="${this.languageLabels[language]}" selected="selected">${language}</option>`;
continue
}
html += `<option value="${this.languageLabels[language]}">${language}</option>`;
}
html += `
</select>
<label for="xtts_tts_endpoint">Provider Endpoint:</label>
<input id="xtts_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
`;
html += `
<span>
<span>Use <a target="_blank" href="https://github.com/daswer123/xtts-api-server">XTTSv2 TTS Server</a>.</span>
`;
return html;
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#xtts_tts_endpoint').val()
this.settings.language = $('#xtts_api_language').val()
saveTtsProviderSettings()
}
async loadSettings(settings) {
// Pupulate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings")
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key]
} else {
throw `Invalid setting passed to TTS Provider: ${key}`
}
}
const apiCheckInterval = setInterval(() => {
// Use Extras API if TTS support is enabled
if (modules.includes('tts') || modules.includes('xtts-tts')) {
const baseUrl = new URL(getApiUrl());
baseUrl.pathname = '/api/tts';
this.settings.provider_endpoint = baseUrl.toString();
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint);
clearInterval(apiCheckInterval);
}
}, 2000);
$('#xtts_tts_endpoint').val(this.settings.provider_endpoint)
$('#xtts_tts_endpoint').on("input", () => { this.onSettingsChange() })
$('#xtts_api_language').val(this.settings.language)
$('#xtts_api_language').on("change", () => { this.onSettingsChange() })
await this.checkReady()
console.debug("XTTS: Settings loaded")
}
// Perform a simple readiness check by trying to fetch voiceIds
async checkReady() {
const response = await this.fetchTtsVoiceObjects()
}
async onRefreshClick() {
return
}
//#################//
// TTS Interfaces //
//#################//
async getVoice(voiceName) {
if (this.voices.length == 0) {
this.voices = await this.fetchTtsVoiceObjects()
}
const match = this.voices.filter(
XTTSVoice => XTTSVoice.name == voiceName
)[0]
if (!match) {
throw `TTS Voice name ${voiceName} not found`
}
return match
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId)
return response
}
//###########//
// API CALLS //
//###########//
async fetchTtsVoiceObjects() {
const response = await doExtrasFetch(`${this.settings.provider_endpoint}/speakers`)
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`)
}
const responseJson = await response.json()
return responseJson
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`)
const response = await doExtrasFetch(
`${this.settings.provider_endpoint}/tts_to_audio/`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache' // Added this line to disable caching of file so new files are always played - Rolyat 7/7/23
},
body: JSON.stringify({
"text": inputText,
"speaker_wav": voiceId,
"language": this.settings.language
})
}
)
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
return response
}
// Interface not used by XTTS TTS
async fetchTtsFromHistory(history_item_id) {
return Promise.resolve(history_item_id);
}
}