Merge branch 'staging' of http://github.com/cohee1207/SillyTavern into staging

This commit is contained in:
Cohee1207 2023-07-27 23:35:02 +03:00
commit 87b9da11c6
10 changed files with 1003 additions and 380 deletions

View File

@ -73,6 +73,7 @@ const extension_settings = {
fluctuation: 0.1,
enabled: false,
},
speech_recognition: {},
};
let modules = [];

View File

@ -372,8 +372,6 @@ function onEditPromptClick() {
let popupText = ''
popupText += `
<div class="objective_prompt_modal">
<div class="alignitemsflexstart flex-container">
</div>
<div>
<label for="objective-prompt-generate">Generation Prompt</label>
<textarea id="objective-prompt-generate" type="text" class="text_pole textarea_compact" rows="8"></textarea>
@ -382,12 +380,14 @@ function onEditPromptClick() {
<label for="objective-prompt-extension-prompt">Injected Prompt</label>
<textarea id="objective-prompt-extension-prompt" type="text" class="text_pole textarea_compact" rows="8"></textarea>
</div>
<div class="alignitemsflexstart flex-container">
<input id="objective-custom-prompt-name" type="text" class="flex1 heightFitContent text_pole widthNatural" maxlength="250" placeholder="Custom Prompt Name">
<input id="objective-custom-prompt-save" class="menu_button" type="submit" value="Save Custom Prompt" />
<label for="objective-prompt-load"> Load Prompt </label>
<div class="objective_prompt_block">
<input id="objective-custom-prompt-name" style="flex-grow:2" type="text" class="flex1 heightFitContent text_pole widthNatural" maxlength="250" placeholder="Custom Prompt Name">
<input id="objective-custom-prompt-save" style="flex-grow:1" class="menu_button" type="submit" value="Save Prompt" />
</div>
<div class="objective_prompt_block">
<label for="objective-prompt-load">Load Prompt</label>
<select id="objective-prompt-load"><select>
<input id="objective-custom-prompt-delete" class="menu_button" type="submit" value="Delete Custom Prompt" />
<input id="objective-custom-prompt-delete" class="menu_button" type="submit" value="Delete Prompt" />
</div>
</div>`
callPopup(popupText, 'text')

View File

@ -10,6 +10,13 @@
flex-wrap: wrap;
}
.objective_prompt_block {
display: flex;
align-items: baseline;
column-gap: 5px;
flex-wrap: wrap;
}
.objective_block_control {
align-items: baseline;
}

View File

@ -0,0 +1,233 @@
// Borrowed from Agnai (AGPLv3)
// https://github.com/agnaistic/agnai/blob/dev/web/pages/Chat/components/SpeechRecognitionRecorder.tsx
// First version by Cohee#1207
// Adapted by Tony-sama
export { BrowserSttProvider }
const DEBUG_PREFIX = "<Speech Recognition module (Browser)> "
class BrowserSttProvider {
//########//
// Config //
//########//
settings = {
language: ""
}
defaultSettings = {
language: "en-US",
}
processTranscriptFunction = null;
get settingsHtml() {
let html = ' \
<span>Language</span> </br> \
<select id="speech_recognition_browser_provider_language"> \
<option value="ar-SA">ar-SA: Arabic (Saudi Arabia)</option> \
<option value="bn-BD">bn-BD: Bangla (Bangladesh)</option> \
<option value="bn-IN">bn-IN: Bangla (India)</option> \
<option value="cs-CZ">cs-CZ: Czech (Czech Republic)</option> \
<option value="da-DK">da-DK: Danish (Denmark)</option> \
<option value="de-AT">de-AT: German (Austria)</option> \
<option value="de-CH">de-CH: German (Switzerland)</option> \
<option value="de-DE">de-DE: German (Germany)</option> \
<option value="el-GR">el-GR: Greek (Greece)</option> \
<option value="en-AU">en-AU: English (Australia)</option> \
<option value="en-CA">en-CA: English (Canada)</option> \
<option value="en-GB">en-GB: English (United Kingdom)</option> \
<option value="en-IE">en-IE: English (Ireland)</option> \
<option value="en-IN">en-IN: English (India)</option> \
<option value="en-NZ">en-NZ: English (New Zealand)</option> \
<option value="en-US">en-US: English (United States)</option> \
<option value="en-ZA">en-ZA: English (South Africa)</option> \
<option value="es-AR">es-AR: Spanish (Argentina)</option> \
<option value="es-CL">es-CL: Spanish (Chile)</option> \
<option value="es-CO">es-CO: Spanish (Columbia)</option> \
<option value="es-ES">es-ES: Spanish (Spain)</option> \
<option value="es-MX">es-MX: Spanish (Mexico)</option> \
<option value="es-US">es-US: Spanish (United States)</option> \
<option value="fi-FI">fi-FI: Finnish (Finland)</option> \
<option value="fr-BE">fr-BE: French (Belgium)</option> \
<option value="fr-CA">fr-CA: French (Canada)</option> \
<option value="fr-CH">fr-CH: French (Switzerland)</option> \
<option value="fr-FR">fr-FR: French (France)</option> \
<option value="he-IL">he-IL: Hebrew (Israel)</option> \
<option value="hi-IN">hi-IN: Hindi (India)</option> \
<option value="hu-HU">hu-HU: Hungarian (Hungary)</option> \
<option value="id-ID">id-ID: Indonesian (Indonesia)</option> \
<option value="it-CH">it-CH: Italian (Switzerland)</option> \
<option value="it-IT">it-IT: Italian (Italy)</option> \
<option value="ja-JP">ja-JP: Japanese (Japan)</option> \
<option value="ko-KR">ko-KR: Korean (Republic of Korea)</option> \
<option value="nl-BE">nl-BE: Dutch (Belgium)</option> \
<option value="nl-NL">nl-NL: Dutch (The Netherlands)</option> \
<option value="no-NO">no-NO: Norwegian (Norway)</option> \
<option value="pl-PL">pl-PL: Polish (Poland)</option> \
<option value="pt-BR">pt-BR: Portugese (Brazil)</option> \
<option value="pt-PT">pt-PT: Portugese (Portugal)</option> \
<option value="ro-RO">ro-RO: Romanian (Romania)</option> \
<option value="ru-RU">ru-RU: Russian (Russian Federation)</option> \
<option value="sk-SK">sk-SK: Slovak (Slovakia)</option> \
<option value="sv-SE">sv-SE: Swedish (Sweden)</option> \
<option value="ta-IN">ta-IN: Tamil (India)</option> \
<option value="ta-LK">ta-LK: Tamil (Sri Lanka)</option> \
<option value="th-TH">th-TH: Thai (Thailand)</option> \
<option value="tr-TR">tr-TR: Turkish (Turkey)</option> \
<option value="zh-CN">zh-CN: Chinese (China)</option> \
<option value="zh-HK">zh-HK: Chinese (Hond Kong)</option> \
<option value="zh-TW">zh-TW: Chinese (Taiwan)</option> \
</select> \
'
return html
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.language = $("#speech_recognition_browser_provider_language").val();
console.debug(DEBUG_PREFIX+"Change language to",this.settings.language);
this.loadSettings(this.settings);
}
static capitalizeInterim(interimTranscript) {
let capitalizeIndex = -1;
if (interimTranscript.length > 2 && interimTranscript[0] === ' ') capitalizeIndex = 1;
else if (interimTranscript.length > 1) capitalizeIndex = 0;
if (capitalizeIndex > -1) {
const spacing = capitalizeIndex > 0 ? ' '.repeat(capitalizeIndex - 1) : '';
const capitalized = interimTranscript[capitalizeIndex].toLocaleUpperCase();
const rest = interimTranscript.substring(capitalizeIndex + 1);
interimTranscript = spacing + capitalized + rest;
}
return interimTranscript;
}
static composeValues(previous, interim) {
let spacing = '';
if (previous.endsWith('.')) spacing = ' ';
return previous + spacing + interim;
}
loadSettings(settings) {
const processTranscript = this.processTranscriptFunction;
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.debug(DEBUG_PREFIX+"Using default browser STT settings")
}
// Initialise as defaultSettings
this.settings = this.defaultSettings;
for (const key in settings){
if (key in this.settings){
this.settings[key] = settings[key]
} else {
throw `Invalid setting passed to Speech recogniton extension (browser): ${key}`
}
}
$("#speech_recognition_browser_provider_language").val(this.settings.language);
const speechRecognitionSettings = $.extend({
grammar: '' // Custom grammar
}, options);
const speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const speechRecognitionList = window.SpeechGrammarList || window.webkitSpeechGrammarList;
if (!speechRecognition) {
console.warn(DEBUG_PREFIX+'Speech recognition is not supported in this browser.');
$("#microphone_button").hide();
toastr.error("Speech recognition is not supported in this browser, use another browser or another provider of SillyTavern-extras Speech recognition extension.", "Speech recognition activation Failed (Browser)", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
return;
}
const recognition = new speechRecognition();
if (speechRecognitionSettings.grammar && speechRecognitionList) {
speechRecognitionList.addFromString(speechRecognitionSettings.grammar, 1);
recognition.grammars = speechRecognitionList;
}
recognition.continuous = true;
recognition.interimResults = true;
recognition.lang = this.settings.language;
const textarea = $('#send_textarea');
const button = $('#microphone_button');
let listening = false;
button.off('click').on("click", function () {
if (listening) {
recognition.stop();
} else {
recognition.start();
}
listening = !listening;
});
let initialText = '';
recognition.onresult = function (speechEvent) {
let finalTranscript = '';
let interimTranscript = ''
for (let i = speechEvent.resultIndex; i < speechEvent.results.length; ++i) {
const transcript = speechEvent.results[i][0].transcript;
if (speechEvent.results[i].isFinal) {
let interim = BrowserSttProvider.capitalizeInterim(transcript);
if (interim != '') {
let final = finalTranscript;
final = BrowserSttProvider.composeValues(final, interim);
if (final.slice(-1) != '.' & final.slice(-1) != '?') final += '.';
finalTranscript = final;
recognition.abort();
listening = false;
}
interimTranscript = ' ';
} else {
interimTranscript += transcript;
}
}
interimTranscript = BrowserSttProvider.capitalizeInterim(interimTranscript);
textarea.val(initialText + finalTranscript + interimTranscript);
};
recognition.onerror = function (event) {
console.error('Error occurred in recognition:', event.error);
//if ($('#speech_recognition_debug').is(':checked'))
// toastr.error('Error occurred in recognition:'+ event.error, 'STT Generation error (Browser)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
};
recognition.onend = function () {
listening = false;
button.toggleClass('fa-microphone fa-microphone-slash');
const newText = textarea.val().substring(initialText.length);
textarea.val(textarea.val().substring(0,initialText.length));
processTranscript(newText);
};
recognition.onstart = function () {
initialText = textarea.val();
button.toggleClass('fa-microphone fa-microphone-slash');
if ($("#speech_recognition_message_mode").val() == "replace") {
textarea.val("");
initialText = ""
}
};
$("#microphone_button").show();
console.debug(DEBUG_PREFIX+"Browser STT settings loaded")
}
}

View File

@ -1,110 +1,351 @@
// Borrowed from Agnai (AGPLv3)
// https://github.com/agnaistic/agnai/blob/dev/web/pages/Chat/components/SpeechRecognitionRecorder.tsx
function capitalizeInterim(interimTranscript) {
let capitalizeIndex = -1;
if (interimTranscript.length > 2 && interimTranscript[0] === ' ') capitalizeIndex = 1;
else if (interimTranscript.length > 1) capitalizeIndex = 0;
if (capitalizeIndex > -1) {
const spacing = capitalizeIndex > 0 ? ' '.repeat(capitalizeIndex - 1) : '';
const capitalized = interimTranscript[capitalizeIndex].toLocaleUpperCase();
const rest = interimTranscript.substring(capitalizeIndex + 1);
interimTranscript = spacing + capitalized + rest;
/*
TODO:
- try pseudo streaming audio by just sending chunk every X seconds and asking VOSK if it is full text.
*/
import { saveSettingsDebounced } from "../../../script.js";
import { getContext, getApiUrl, modules, extension_settings, ModuleWorkerWrapper, doExtrasFetch } from "../../extensions.js";
import { VoskSttProvider } from './vosk.js'
import { WhisperSttProvider } from './whisper.js'
import { BrowserSttProvider } from './browser.js'
export { MODULE_NAME };
const MODULE_NAME = 'Speech Recognition';
const DEBUG_PREFIX = "<Speech Recognition module> "
let sttProviders = {
None: null,
Browser: BrowserSttProvider,
Whisper: WhisperSttProvider,
Vosk: VoskSttProvider,
}
let sttProvider = null
let sttProviderName = "None"
let audioRecording = false
const constraints = { audio: { sampleSize: 16, channelCount: 1, sampleRate: 16000 } };
let audioChunks = [];
async function processTranscript(transcript) {
try {
const transcriptOriginal = transcript;
let transcriptFormatted = transcriptOriginal.trim();
if (transcriptFormatted.length > 0)
{
console.debug(DEBUG_PREFIX+"recorded transcript: \""+transcriptFormatted+"\"");
const messageMode = extension_settings.speech_recognition.messageMode;
console.debug(DEBUG_PREFIX+"mode: "+messageMode);
let transcriptLower = transcriptFormatted.toLowerCase()
// remove punctuation
let transcriptRaw = transcriptLower.replace(/[^\w\s\']|_/g, "").replace(/\s+/g, " ");
// Check message mapping
if (extension_settings.speech_recognition.messageMappingEnabled) {
console.debug(DEBUG_PREFIX+"Start searching message mapping into:",transcriptRaw)
for (const key in extension_settings.speech_recognition.messageMapping) {
console.debug(DEBUG_PREFIX+"message mapping searching: ", key,"=>",extension_settings.speech_recognition.messageMapping[key]);
if (transcriptRaw.includes(key)) {
var message = extension_settings.speech_recognition.messageMapping[key];
console.debug(DEBUG_PREFIX+"message mapping found: ", key,"=>",extension_settings.speech_recognition.messageMapping[key]);
$("#send_textarea").val(message);
if (messageMode == "auto_send") await getContext().generate();
return;
}
}
}
console.debug(DEBUG_PREFIX+"no message mapping found, processing transcript as normal message");
switch (messageMode) {
case "auto_send":
$('#send_textarea').val("") // clear message area to avoid double message
console.debug(DEBUG_PREFIX+"Sending message")
const context = getContext();
const messageText = transcriptFormatted;
const message = {
name: context.name1,
is_user: true,
is_name: true,
send_date: Date.now(),
mes: messageText,
};
context.chat.push(message);
context.addOneMessage(message);
await context.generate();
$('#debug_output').text("<SST-module DEBUG>: message sent: \""+ transcriptFormatted +"\"");
break;
case "replace":
console.debug(DEBUG_PREFIX+"Replacing message")
$('#send_textarea').val(transcriptFormatted);
break;
case "append":
console.debug(DEBUG_PREFIX+"Appending message")
$('#send_textarea').val($('#send_textarea').val()+" "+transcriptFormatted);
break;
default:
console.debug(DEBUG_PREFIX+"Not supported stt message mode: "+messageMode)
}
}
else
{
console.debug(DEBUG_PREFIX+"Empty transcript, do nothing");
}
}
catch (error) {
console.debug(error);
}
return interimTranscript;
}
function composeValues(previous, interim) {
let spacing = '';
if (previous.endsWith('.')) spacing = ' ';
return previous + spacing + interim;
function loadNavigatorAudioRecording() {
if (navigator.mediaDevices.getUserMedia) {
console.debug(DEBUG_PREFIX+' getUserMedia supported by browser.');
let onSuccess = function(stream) {
const mediaRecorder = new MediaRecorder(stream);
$("#microphone_button").off('click').on("click", function() {
if (!audioRecording) {
mediaRecorder.start();
console.debug(mediaRecorder.state);
console.debug("recorder started");
audioRecording = true;
$("#microphone_button").toggleClass('fa-microphone fa-microphone-slash');
}
else {
mediaRecorder.stop();
console.debug(mediaRecorder.state);
console.debug("recorder stopped");
audioRecording = false;
$("#microphone_button").toggleClass('fa-microphone fa-microphone-slash');
}
});
mediaRecorder.onstop = async function() {
console.debug(DEBUG_PREFIX+"data available after MediaRecorder.stop() called: ", audioChunks.length, " chunks");
const audioBlob = new Blob(audioChunks, { type: "audio/wav; codecs=0" });
audioChunks = [];
const transcript = await sttProvider.processAudio(audioBlob);
// TODO: lock and release recording while processing?
console.debug(DEBUG_PREFIX+"received transcript:", transcript);
processTranscript(transcript);
}
mediaRecorder.ondataavailable = function(e) {
audioChunks.push(e.data);
}
}
let onError = function(err) {
console.debug(DEBUG_PREFIX+"The following error occured: " + err);
}
navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
} else {
console.debug(DEBUG_PREFIX+"getUserMedia not supported on your browser!");
toastr.error("getUserMedia not supported", DEBUG_PREFIX+"not supported for your browser.", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
}
}
(function ($) {
$.fn.speechRecognitionPlugin = function (options) {
const settings = $.extend({
grammar: '' // Custom grammar
}, options);
//##############//
// STT Provider //
//##############//
const speechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
const speechRecognitionList = window.SpeechGrammarList || window.webkitSpeechGrammarList;
function loadSttProvider(provider) {
//Clear the current config and add new config
$("#speech_recognition_provider_settings").html("");
if (!speechRecognition) {
console.warn('Speech recognition is not supported in this browser.');
return;
// Init provider references
extension_settings.speech_recognition.currentProvider = provider;
sttProviderName = provider;
if (!(sttProviderName in extension_settings.speech_recognition)) {
console.warn(`Provider ${sttProviderName} not in Extension Settings, initiatilizing provider in settings`);
extension_settings.speech_recognition[sttProviderName] = {};
}
$('#speech_recognition_provider').val(sttProviderName);
if (sttProviderName == "None") {
$("#microphone_button").hide();
$("#speech_recognition_message_mode_div").hide();
$("#speech_recognition_message_mapping_div").hide();
return;
}
$("#speech_recognition_message_mode_div").show();
$("#speech_recognition_message_mapping_div").show();
sttProvider = new sttProviders[sttProviderName]
// Init provider settings
$('#speech_recognition_provider_settings').append(sttProvider.settingsHtml);
// Use microphone button as push to talk
if (sttProviderName == "Browser") {
sttProvider.processTranscriptFunction = processTranscript;
sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
}
else {
sttProvider.loadSettings(extension_settings.speech_recognition[sttProviderName]);
loadNavigatorAudioRecording();
$("#microphone_button").show();
}
}
function onSttProviderChange() {
const sttProviderSelection = $('#speech_recognition_provider').val();
loadSttProvider(sttProviderSelection);
saveSettingsDebounced();
}
function onSttProviderSettingsInput() {
sttProvider.onSettingsChange();
// Persist changes to SillyTavern stt extension settings
extension_settings.speech_recognition[sttProviderName] = sttProvider.settings;
saveSettingsDebounced();
console.info(`Saved settings ${sttProviderName} ${JSON.stringify(sttProvider.settings)}`);
}
//#############################//
// Extension UI and Settings //
//#############################//
const defaultSettings = {
currentProvider: "None",
messageMode: "append",
messageMappingText: "",
messageMapping: [],
messageMappingEnabled: false
}
function loadSettings() {
if (Object.keys(extension_settings.speech_recognition).length === 0) {
Object.assign(extension_settings.speech_recognition, defaultSettings)
}
$('#speech_recognition_enabled').prop('checked',extension_settings.speech_recognition.enabled);
$('#speech_recognition_message_mode').val(extension_settings.speech_recognition.messageMode);
if (extension_settings.speech_recognition.messageMappingText.length > 0) {
$('#speech_recognition_message_mapping').val(extension_settings.speech_recognition.messageMappingText);
}
$('#speech_recognition_message_mapping_enabled').prop('checked',extension_settings.speech_recognition.messageMappingEnabled);
}
async function onMessageModeChange() {
extension_settings.speech_recognition.messageMode = $('#speech_recognition_message_mode').val();
if(sttProviderName != "Browser" & extension_settings.speech_recognition.messageMode == "auto_send") {
$("#speech_recognition_wait_response_div").show()
}
else {
$("#speech_recognition_wait_response_div").hide()
}
saveSettingsDebounced();
}
async function onMessageMappingChange() {
let array = $('#speech_recognition_message_mapping').val().split(",");
array = array.map(element => {return element.trim();});
array = array.filter((str) => str !== '');
extension_settings.speech_recognition.messageMapping = {};
for (const text of array) {
if (text.includes("=")) {
const pair = text.toLowerCase().split("=")
extension_settings.speech_recognition.messageMapping[pair[0].trim()] = pair[1].trim()
console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.speech_recognition.messageMapping[pair[0]]);
}
const recognition = new speechRecognition();
if (settings.grammar && speechRecognitionList) {
speechRecognitionList.addFromString(settings.grammar, 1);
recognition.grammars = speechRecognitionList;
else {
console.debug(DEBUG_PREFIX+"Wrong syntax for message mapping, no '=' found in:", text);
}
}
$("#speech_recognition_message_mapping_status").text("Message mapping updated to: "+JSON.stringify(extension_settings.speech_recognition.messageMapping))
console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.speech_recognition.messageMapping);
extension_settings.speech_recognition.messageMappingText = $('#speech_recognition_message_mapping').val()
saveSettingsDebounced();
}
recognition.continuous = true;
recognition.interimResults = true;
// TODO: This should be configurable.
recognition.lang = 'en-US'; // Set the language to English (US).
async function onMessageMappingEnabledClick() {
extension_settings.speech_recognition.messageMappingEnabled = $('#speech_recognition_message_mapping_enabled').is(':checked');
saveSettingsDebounced()
}
const $textarea = this;
const $button = $('<div class="fa-solid fa-microphone speech-toggle" title="Click to speak"></div>');
$(document).ready(function () {
function addExtensionControls() {
const settingsHtml = `
<div id="speech_recognition_settings">
<div class="inline-drawer">
<div class="inline-drawer-toggle inline-drawer-header">
<b>Speech Recognition</b>
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
</div>
<div class="inline-drawer-content">
<div>
<span>Select Speech-to-text Provider</span> </br>
<select id="speech_recognition_provider">
</select>
</div>
<div id="speech_recognition_message_mode_div">
<span>Message Mode</span> </br>
<select id="speech_recognition_message_mode">
<option value="append">Append</option>
<option value="replace">Replace</option>
<option value="auto_send">Auto send</option>
</select>
</div>
<div id="speech_recognition_message_mapping_div">
<span>Message Mapping</span>
<textarea id="speech_recognition_message_mapping" class="text_pole textarea_compact" type="text" rows="4" placeholder="Enter comma separated phrases mapping, example:\ncommand delete = /del 2,\nslash delete = /del 2,\nsystem roll = /roll 2d6,\nhey continue = /continue"></textarea>
<span id="speech_recognition_message_mapping_status"></span>
<label class="checkbox_label" for="speech_recognition_message_mapping_enabled">
<input type="checkbox" id="speech_recognition_message_mapping_enabled" name="speech_recognition_message_mapping_enabled">
<small>Enable messages mapping</small>
</label>
</div>
<form id="speech_recognition_provider_settings" class="inline-drawer-content">
</form>
</div>
</div>
</div>
`;
$('#extensions_settings').append(settingsHtml);
$('#speech_recognition_provider_settings').on('input', onSttProviderSettingsInput);
for (const provider in sttProviders) {
$('#speech_recognition_provider').append($("<option />").val(provider).text(provider));
console.debug(DEBUG_PREFIX+"added option "+provider);
}
$('#speech_recognition_provider').on('change', onSttProviderChange);
$('#speech_recognition_message_mode').on('change', onMessageModeChange);
$('#speech_recognition_message_mapping').on('change', onMessageMappingChange);
$('#speech_recognition_message_mapping_enabled').on('click', onMessageMappingEnabledClick);
const $button = $('<div id="microphone_button" class="fa-solid fa-microphone speech-toggle" title="Click to speak"></div>');
$('#send_but_sheld').prepend($button);
let listening = false;
$button.on('click', function () {
if (listening) {
recognition.stop();
} else {
recognition.start();
}
listening = !listening;
});
}
addExtensionControls(); // No init dependencies
loadSettings(); // Depends on Extension Controls and loadTtsProvider
loadSttProvider(extension_settings.speech_recognition.currentProvider); // No dependencies
let initialText = '';
recognition.onresult = function (speechEvent) {
let finalTranscript = '';
let interimTranscript = ''
for (let i = speechEvent.resultIndex; i < speechEvent.results.length; ++i) {
const transcript = speechEvent.results[i][0].transcript;
if (speechEvent.results[i].isFinal) {
let interim = capitalizeInterim(transcript);
if (interim != '') {
let final = finalTranscript;
final = composeValues(final, interim) + '.';
finalTranscript = final;
recognition.abort();
listening = false;
}
interimTranscript = ' ';
} else {
interimTranscript += transcript;
}
}
interimTranscript = capitalizeInterim(interimTranscript);
$textarea.val(initialText + finalTranscript + interimTranscript);
};
recognition.onerror = function (event) {
console.error('Error occurred in recognition:', event.error);
};
recognition.onend = function () {
listening = false;
$button.toggleClass('fa-microphone fa-microphone-slash');
};
recognition.onstart = function () {
initialText = $textarea.val();
$button.toggleClass('fa-microphone fa-microphone-slash');
};
};
}(jQuery));
jQuery(() => {
const $textarea = $('#send_textarea');
$textarea.speechRecognitionPlugin();
});
//const wrapper = new ModuleWorkerWrapper(moduleWorker);
//setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); // Init depends on all the things
//moduleWorker();
})

View File

@ -2,10 +2,13 @@
"display_name": "Speech Recognition",
"loading_order": 13,
"requires": [],
"optional": [],
"optional": [
"vosk-speech-recognition",
"whisper-speech-recognition"
],
"js": "index.js",
"css": "style.css",
"author": "Cohee#1207",
"version": "1.0.0",
"author": "Cohee#1207 and Keij#6799",
"version": "1.1.0",
"homePage": "https://github.com/SillyTavern/SillyTavern"
}

View File

@ -0,0 +1,65 @@
import { getApiUrl, doExtrasFetch } from "../../extensions.js";
export { VoskSttProvider }
const DEBUG_PREFIX = "<Speech Recognition module (Vosk)> "
class VoskSttProvider {
//########//
// Config //
//########//
settings
defaultSettings = {
}
get settingsHtml() {
let html = ""
return html
}
onSettingsChange() {
// Used when provider settings are updated from UI
}
loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.debug(DEBUG_PREFIX+"Using default vosk STT extension settings")
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings
for (const key in settings){
if (key in this.settings){
this.settings[key] = settings[key]
} else {
throw `Invalid setting passed to STT extension: ${key}`
}
}
console.debug(DEBUG_PREFIX+"Vosk STT settings loaded")
}
async processAudio(audioblob) {
var requestData = new FormData();
requestData.append('AudioFile', audioblob, 'record.wav');
const url = new URL(getApiUrl());
url.pathname = '/api/speech-recognition/vosk/process-audio';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
body: requestData,
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, 'STT Generation Failed (Vosk)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
const result = await apiResult.json();
return result.transcript;
}
}

View File

@ -0,0 +1,67 @@
import { getApiUrl, doExtrasFetch } from "../../extensions.js";
export { WhisperSttProvider }
const DEBUG_PREFIX = "<Speech Recognition module (Vosk)> "
class WhisperSttProvider {
//########//
// Config //
//########//
settings
defaultSettings = {
//model_path: "",
}
get settingsHtml() {
let html = ""
return html
}
onSettingsChange() {
// Used when provider settings are updated from UI
}
loadSettings(settings) {
// Populate Provider UI given input settings
if (Object.keys(settings).length == 0) {
console.debug(DEBUG_PREFIX+"Using default Whisper STT extension settings")
}
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings
for (const key in settings){
if (key in this.settings){
this.settings[key] = settings[key]
} else {
throw `Invalid setting passed to STT extension: ${key}`
}
}
console.debug(DEBUG_PREFIX+"Whisper STT settings loaded")
}
async processAudio(audioblob) {
var requestData = new FormData();
requestData.append('AudioFile', audioblob, 'record.wav');
const url = new URL(getApiUrl());
url.pathname = '/api/speech-recognition/whisper/process-audio';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
body: requestData,
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, 'STT Generation Failed (Whisper)', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
const result = await apiResult.json();
return result.transcript;
}
}

View File

@ -1,7 +1,15 @@
import { eventSource, event_types } from "../../../script.js"
import { doExtrasFetch, getApiUrl, modules } from "../../extensions.js"
export { CoquiTtsProvider }
function throwIfModuleMissing() {
if (!modules.includes('coqui-tts')) {
toastr.error(`Coqui TTS module not loaded. Add coqui-tts to enable-modules and restart the Extras API.`)
throw new Error(`Coqui TTS module not loaded.`)
}
}
class CoquiTtsProvider {
//########//
// Config //
@ -12,51 +20,45 @@ class CoquiTtsProvider {
separator = ' .. '
defaultSettings = {
provider_endpoint: "http://localhost:5100",
voiceMap: {}
}
get settingsHtml() {
let html = `
<div style="display: flex; width: 100%;">
<div style="flex: 80%;">
<label for="model">Model:</label>
<select id="model">
<option value="none">Select Model</option>
<!-- Add more model options here -->
</select>
<div class="flex wide100p flexGap10 alignitemscenter">
<div style="flex: 80%;">
<label for="coqui_model">Model:</label>
<select id="coqui_model">
<option value="none">Select Model</option>
<!-- Add more model options here -->
</select>
</div>
<div class="flex justifyCenter" style="flex: 20%;">
<button id="coqui_preview" class="menu_button menu_button_icon wide100p" type="button">
</button>
</div>
</div>
<div style="flex: 20%; display: flex; justify-content: center;">
<button id="preview" class="menu_button" type="button" style="width: 100%;">Play</button>
</div>
</div>
<div style="display: flex; width: 100%;">
<div style="flex: 1; margin-right: 10px;">
<label for="speaker">Speaker:</label>
<select id="speaker">
<!-- Add more speaker options here -->
</select>
</div>
<div style="flex: 1;">
<label for="language">Language:</label>
<select id="language">
<!-- Add more language options here -->
</select>
</div>
</div>
<label for="Coqui_tts_endpoint">Provider Endpoint:</label>
<input id="Coqui_tts_endpoint" type="text" class="text_pole" maxlength="250" value="${this.defaultSettings.provider_endpoint}"/>
<div class="flex wide100p flexGap10">
<div class="flex1">
<label for="coqui_speaker">Speaker:</label>
<select id="coqui_speaker">
<!-- Add more speaker options here -->
</select>
</div>
<div class="flex1">
<label for="coqui_language">Language:</label>
<select id="coqui_language">
<!-- Add more language options here -->
</select>
</div>
</div>
`
return html
}
onSettingsChange() {
// Used when provider settings are updated from UI
this.settings.provider_endpoint = $('#Coqui_tts_endpoint').val()
}
loadSettings(settings) {
@ -64,70 +66,61 @@ class CoquiTtsProvider {
if (Object.keys(settings).length == 0) {
console.info("Using default TTS Provider settings")
}
const modelSelect = document.getElementById('model');
const previewButton = document.getElementById('preview');
const modelSelect = document.getElementById('coqui_model');
const previewButton = document.getElementById('coqui_preview');
previewButton.addEventListener('click', () => {
const selectedModel = modelSelect.value;
this.sampleTtsVoice(selectedModel);
});//add event listener to button
previewButton.disabled = true;
previewButton.innerText = "Select Model";
// Only accept keys defined in defaultSettings
this.settings = this.defaultSettings
for (const key in settings){
if (key in this.settings){
for (const key in settings) {
if (key in this.settings) {
this.settings[key] = settings[key]
} else {
throw `Invalid setting passed to TTS Provider: ${key}`
}
}
const apiCheckInterval = setInterval(() => {
// Use Extras API if TTS support is enabled
if (modules.includes('tts') || modules.includes('Coqui-tts')) {
const baseUrl = new URL(getApiUrl());
baseUrl.pathname = '/api/coqui-tts/coqui-tts';
this.settings.provider_endpoint = baseUrl.toString();
$('#Coqui_tts_endpoint').val(this.settings.provider_endpoint);
clearInterval(apiCheckInterval);
}
}, 2000);
$('#Coqui_tts_endpoint').val(this.settings.provider_endpoint)
const textexample = document.getElementById('tts_voice_map');
textexample.placeholder = 'Enter comma separated map of charName:ttsName[speakerID][langID]. Example: \nAqua:tts_models--en--ljspeech--glow-tts\model_file.pth,\nDarkness:tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][3]';
//Load models function
this.getModels();
eventSource.on(event_types.EXTRAS_CONNECTED, () => {
this.getModels();
});
this.onttsCoquiHideButtons();
console.info("Settings loaded")
}
async onttsCoquiHideButtons(){
async onttsCoquiHideButtons() {
// Get references to the select element and the two input elements
const ttsProviderSelect = document.getElementById('tts_provider');
const ttsVoicesInput = document.getElementById('tts_voices');
const ttsPreviewInput = document.getElementById('tts_preview');
ttsProviderSelect.addEventListener('click', () => {
this.getModels();
});
// Add an event listener to the 'change' event of the tts_provider select element
ttsProviderSelect.addEventListener('change', () => {
// Check if the selected value is 'Coqui'
if (ttsProviderSelect.value === 'Coqui') {
ttsVoicesInput.style.display = 'none'; // Hide the tts_voices input
ttsPreviewInput.style.display = ''; // Show the tts_preview input
} else {
ttsVoicesInput.style.display = ''; // Show the tts_voices input
ttsPreviewInput.style.display = 'none'; // Hide the tts_preview input
}
// Check if the selected value is 'Coqui'
if (ttsProviderSelect.value === 'Coqui') {
ttsVoicesInput.style.display = 'none'; // Hide the tts_voices input
ttsPreviewInput.style.display = ''; // Show the tts_preview input
} else {
ttsVoicesInput.style.display = ''; // Show the tts_voices input
ttsPreviewInput.style.display = 'none'; // Hide the tts_preview input
}
});
}
}
async onApplyClick() {
return
@ -135,262 +128,267 @@ class CoquiTtsProvider {
async getLang() {
try {
const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/multlang`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceData = await response.json();
const modelSelect = document.getElementById('language');
modelSelect.innerHTML = ''; // Clear existing options
if (Object.keys(voiceData).length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
modelSelect.appendChild(option);
} else {
for (const [key, value] of Object.entries(voiceData)) {
const option = document.createElement('option');
option.value = key;
option.textContent = key + ": " + value;
modelSelect.appendChild(option);
}
}
} catch (error) {
//console.error('Error fetching voice data:', error);
// Remove all options except "None"
const modelSelect = document.getElementById('language');
modelSelect.innerHTML = '';
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
modelSelect.appendChild(option);
}
}
async getSpeakers() {
try {
const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/multspeaker`);
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/multlang`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceData = await response.json();
const modelSelect = document.getElementById('speaker');
modelSelect.innerHTML = ''; // Clear existing options
const languageSelect = document.getElementById('coqui_language');
languageSelect.innerHTML = ''; // Clear existing options
if (Object.keys(voiceData).length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
modelSelect.appendChild(option);
} else {
for (const [index, name] of Object.entries(voiceData)) {
const option = document.createElement('option');
option.value = index;
option.textContent = index + ": " + name;
modelSelect.appendChild(option);
}
option.value = 'none';
option.textContent = 'None';
languageSelect.appendChild(option);
} else {
for (const [key, value] of Object.entries(voiceData)) {
const option = document.createElement('option');
option.value = key;
option.textContent = key + ": " + value;
languageSelect.appendChild(option);
}
}
} catch (error) {
} catch (error) {
//console.error('Error fetching voice data:', error);
// Remove all options except "None"
const modelSelect = document.getElementById('speaker');
modelSelect.innerHTML = '';
const languageSelect = document.getElementById('coqui_language');
languageSelect.innerHTML = '';
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
languageSelect.appendChild(option);
}
}
async getSpeakers() {
try {
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/multspeaker`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceData = await response.json();
const speakerSelect = document.getElementById('coqui_speaker');
speakerSelect.innerHTML = ''; // Clear existing options
if (Object.keys(voiceData).length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
speakerSelect.appendChild(option);
} else {
for (const [index, name] of Object.entries(voiceData)) {
const option = document.createElement('option');
option.value = index;
option.textContent = index + ": " + name;
speakerSelect.appendChild(option);
}
}
} catch (error) {
//console.error('Error fetching voice data:', error);
// Remove all options except "None"
const speakerSelect = document.getElementById('coqui_speaker');
speakerSelect.innerHTML = '';
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
speakerSelect.appendChild(option);
}
}
async getModels() {
try {
throwIfModuleMissing();
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/list`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceIds = await response.json();
const modelSelect = document.getElementById('coqui_model');
if (voiceIds.length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'Select Model';
modelSelect.appendChild(option);
} else {
voiceIds.forEach(voiceId => {
const option = document.createElement('option');
option.value = voiceId;
option.textContent = voiceId;
modelSelect.appendChild(option);
});
}
// Update provider endpoint on model selection change
modelSelect.addEventListener('change', () => {
const selectedModel = modelSelect.value;
this.LoadModel(selectedModel);
});
} catch (error) {
console.error('Error fetching voice IDs:', error);
// Add "None" option when the request fails or the response is empty
const modelSelect = document.getElementById('coqui_model');
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
modelSelect.appendChild(option);
}
}
async getModels() {
try {
const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/list`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
const voiceIds = await response.json();
const modelSelect = document.getElementById('model');
if (voiceIds.length === 0) {
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'Select Model';
modelSelect.appendChild(option);
} else {
voiceIds.forEach(voiceId => {
const option = document.createElement('option');
option.value = voiceId;
option.textContent = voiceId;
modelSelect.appendChild(option);
});
}
// Update provider endpoint on model selection change
modelSelect.addEventListener('change', () => {
const selectedModel = modelSelect.value;
this.LoadModel(selectedModel);
});
} catch (error) {
console.error('Error fetching voice IDs:', error);
// Add "None" option when the request fails or the response is empty
const modelSelect = document.getElementById('model');
const option = document.createElement('option');
option.value = 'none';
option.textContent = 'None';
modelSelect.appendChild(option);
}
}
}
async LoadModel(selectedModel) {
const previewButton = document.getElementById('preview');
async LoadModel(selectedModel) {
const previewButton = document.getElementById('coqui_preview');
previewButton.disabled = true;
previewButton.innerText = "Loading";
try {
const response = await fetch(`${this.defaultSettings.provider_endpoint}/api/coqui-tts/load?_model=${selectedModel}`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
this.getSpeakers();
this.getLang();
throwIfModuleMissing();
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/load?_model=${selectedModel}`);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
this.getSpeakers();
this.getLang();
const previewButton = document.getElementById('preview');
previewButton.disabled = false;
previewButton.innerText = "Play";
const previewButton = document.getElementById('coqui_preview');
previewButton.disabled = false;
previewButton.innerText = "Play";
} catch (error) {
console.error('Error updating provider endpoint:', error);
console.error('Error updating provider endpoint:', error);
}
}
}
async getVoice(voiceName) {
async getVoice(voiceName) {
//tts_models--multilingual--multi-dataset--your_tts\model_file.pth[2][1]
//tts_models--en--ljspeech--glow-tts\model_file.pth
let _voiceNameOrg = voiceName; // Store the original voiceName in a variable _voiceNameOrg
voiceName = voiceName.replace(/(\[\d+\])+$/, ''); // For example, converts 'model[2][1]' to 'model'
this.voices = []; //reset for follow up runs
if (this.voices.length === 0) { this.voices = await this.fetchCheckMap(); }
// Search for a voice object in the 'this.voices' array where the 'name' property matches the provided 'voiceName'
//const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
const match = this.voices.find((CoquiVoice) => CoquiVoice.name === voiceName);
// If no match is found, throw an error indicating that the TTS Voice name was not found
if (!match) {
throw new Error(`TTS Voice name ${voiceName} not found`);
throw new Error(`TTS Voice name ${voiceName} not found`);
} else {
match.name = _voiceNameOrg;
match.voice_id = _voiceNameOrg;
match.name = _voiceNameOrg;
match.voice_id = _voiceNameOrg;
}
// Return the matched voice object (with the 'name' property updated if a match was found)
return match;
}
}
async fetchCheckMap() {
const endpoint = `${this.settings.provider_endpoint}/api/coqui-tts/checkmap`;
const response = await fetch(endpoint);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const voiceData = await response.json();
const voices = voiceData.map((voice) => ({
id: voice.name,
name: voice.id, // this is the issue!!!
voice_id: voice.id, // this is the issue!!!
//preview_url: false,
lang: voice.lang,
}));
return voices;
}
async fetchTtsVoiceIds() {
const endpoint = `${this.settings.provider_endpoint}/api/coqui-tts/speaker_id`;
const response = await fetch(endpoint);
const endpoint = `${getApiUrl()}/api/coqui-tts/checkmap`;
const response = await doExtrasFetch(endpoint);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const voiceData = await response.json();
const voices = voiceData.map((voice) => ({
id: voice.name,
name: voice.id, //add filename here
voice_id: voice.id,
//preview_url: false,
//preview_url: `${this.settings.provider_endpoint}/api/coqui-tts/download?model=${voice.id}`,
//http://localhost:5100/api/coqui-tts/speaker_id/tts_models/en/ljspeech/speedy-speech
lang: voice.lang,
}));
return voices;
}
sampleTtsVoice(voiceId) {
// Get the selected values of speaker and language
const speakerSelect = document.getElementById('speaker');
const languageSelect = document.getElementById('language');
const selectedSpeaker = speakerSelect.value;
const selectedLanguage = languageSelect.value;
// Construct the URL with the selected values
const url = `${this.settings.provider_endpoint}/api/coqui-tts/tts?text=The%20Quick%20Brown%20Fox%20Jumps%20Over%20the%20Lazy%20Dog.&speaker_id=${voiceId}&style_wav=&language_id=${selectedLanguage}&mspker=${selectedSpeaker}`;
fetch(url)
.then(response => response.blob())
.then(blob => {
const audioUrl = URL.createObjectURL(blob);
// Play the audio
const audio = new Audio(audioUrl);
audio.play();
})
.catch(error => {
console.error('Error performing TTS request:', error);
});
}
previewTtsVoice(voiceId) { //button on avail voices
const url = `${this.settings.provider_endpoint}/api/coqui-tts/download?model=${voiceId}`;
fetch(url)
.then(response => response.text()) // Expecting a text response
.then(responseText => {
const isResponseTrue = responseText.trim().toLowerCase() === 'true';
if (isResponseTrue) {
console.log("Downloading Model") //if true
} else {
console.error('Already Installed'); //if false
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
})
.catch(error => {
console.error('Error performing download:', error);
});
}
async generateTts(text, voiceId){
const voiceData = await response.json();
const voices = voiceData.map((voice) => ({
id: voice.name,
name: voice.id, // this is the issue!!!
voice_id: voice.id, // this is the issue!!!
//preview_url: false,
lang: voice.lang,
}));
return voices;
}
async fetchTtsVoiceIds() {
throwIfModuleMissing();
const endpoint = `${getApiUrl()}/api/coqui-tts/speaker_id`;
const response = await doExtrasFetch(endpoint);
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.json()}`);
}
const voiceData = await response.json();
const voices = voiceData.map((voice) => ({
id: voice.name,
name: voice.id, //add filename here
voice_id: voice.id,
//preview_url: false,
//preview_url: `${getApiUrl()}/api/coqui-tts/download?model=${voice.id}`,
//http://localhost:5100/api/coqui-tts/speaker_id/tts_models/en/ljspeech/speedy-speech
lang: voice.lang,
}));
return voices;
}
sampleTtsVoice(voiceId) {
// Get the selected values of speaker and language
const speakerSelect = document.getElementById('coqui_speaker');
const languageSelect = document.getElementById('coqui_language');
const selectedSpeaker = speakerSelect.value;
const selectedLanguage = languageSelect.value;
// Construct the URL with the selected values
const url = `${getApiUrl()}/api/coqui-tts/tts?text=The%20Quick%20Brown%20Fox%20Jumps%20Over%20the%20Lazy%20Dog.&speaker_id=${voiceId}&style_wav=&language_id=${selectedLanguage}&mspker=${selectedSpeaker}`;
doExtrasFetch(url)
.then(response => response.blob())
.then(blob => {
const audioUrl = URL.createObjectURL(blob);
// Play the audio
const audio = new Audio(audioUrl);
audio.play();
})
.catch(error => {
console.error('Error performing TTS request:', error);
});
}
previewTtsVoice(voiceId) { //button on avail voices
throwIfModuleMissing();
const url = `${getApiUrl()}/api/coqui-tts/download?model=${voiceId}`;
doExtrasFetch(url)
.then(response => response.text()) // Expecting a text response
.then(responseText => {
const isResponseTrue = responseText.trim().toLowerCase() === 'true';
if (isResponseTrue) {
console.log("Downloading Model") //if true
} else {
console.error('Already Installed'); //if false
}
})
.catch(error => {
console.error('Error performing download:', error);
});
}
async generateTts(text, voiceId) {
const response = await this.fetchTtsGeneration(text, voiceId)
return response
}
async fetchTtsGeneration(inputText, voiceId) {
console.info(`Generating new TTS for voice_id ${voiceId}`);
const response = await fetch(`${this.settings.provider_endpoint}/api/coqui-tts/tts?text=${encodeURIComponent(inputText)}&speaker_id=${voiceId}`);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
async fetchTtsGeneration(inputText, voiceId) {
throwIfModuleMissing();
console.info(`Generating new TTS for voice_id ${voiceId}`);
const response = await doExtrasFetch(`${getApiUrl()}/api/coqui-tts/tts?text=${encodeURIComponent(inputText)}&speaker_id=${voiceId}`);
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
if (!response.ok) {
toastr.error(response.statusText, 'TTS Generation Failed');
throw new Error(`HTTP ${response.status}: ${await response.text()}`);

View File

@ -4243,6 +4243,10 @@ toolcool-color-picker {
padding: 5px;
}
.flex {
display: flex;
}
.flex-container {
display: flex;
gap: 5px;
@ -4849,6 +4853,10 @@ body.waifuMode .zoomed_avatar {
gap: 5px;
}
.flexGap10 {
gap: 10px;
}
.timestamp {
font-size: calc(var(--mainFontSize) * 0.7);
font-weight: 400;