From 84845ae911fe6f1dfdcec9c567a57b5fe12bdc96 Mon Sep 17 00:00:00 2001 From: Tony Ribeiro Date: Tue, 15 Aug 2023 21:15:31 +0200 Subject: [PATCH] Updated RVC UI to allow per characters settings without manual voice map text. --- public/scripts/extensions/rvc/index.js | 324 +++++++++++++----- .../tts/coqui_api_models_settings.json | 6 +- 2 files changed, 239 insertions(+), 91 deletions(-) diff --git a/public/scripts/extensions/rvc/index.js b/public/scripts/extensions/rvc/index.js index 896757034..7764cf816 100644 --- a/public/scripts/extensions/rvc/index.js +++ b/public/scripts/extensions/rvc/index.js @@ -1,65 +1,39 @@ /* TODO: - - Allow to upload RVC model to extras server ? - - Settings per characters ? + - load RVC models list from extras + - Settings per characters */ import { saveSettingsDebounced } from "../../../script.js"; -import { getContext, getApiUrl, extension_settings, doExtrasFetch } from "../../extensions.js"; +import { getContext, getApiUrl, extension_settings, doExtrasFetch, ModuleWorkerWrapper } from "../../extensions.js"; export { MODULE_NAME, rvcVoiceConversion}; const MODULE_NAME = 'RVC'; const DEBUG_PREFIX = " " +const UPDATE_INTERVAL = 1000 -// Send an audio file to RVC to convert voice -async function rvcVoiceConversion(response, character) { - let apiResult +let charactersList = [] // Updated with module worker +let rvcModelsList = [] // Initialized only once - // Check voice map - if (extension_settings.rvc.voiceMap[character] === undefined) { - toastr.error("No model is assigned to character '"+character+"', check RVC voice map in the extension menu.", DEBUG_PREFIX+'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); - console.error("No RVC model assign in voice map for current character "+character); - return response; +function updateVoiceMapText(){ + let voiceMapText = "" + for(let i in extension_settings.rvc.voiceMap) { + const voice_settings = extension_settings.rvc.voiceMap[i]; + voiceMapText += i + ":" + + voice_settings["modelName"] + "(" + + voice_settings["pitchExtraction"] + "," + + voice_settings["pitchOffset"] + "," + + voice_settings["indexRate"] + "," + + voice_settings["filterRadius"] + "," + + voice_settings["rmsMixRate"] + "," + + voice_settings["protect"] + + "),\n" } - // Load model if different from currently loaded - //if (currentModel === null | currentModel != extension_settings.rvc.voiceMap[character]) - // await rvcLoadModel(extension_settings.rvc.voiceMap[character]); + extension_settings.rvc.voiceMapText = voiceMapText; + $('#rvc_voice_map').val(voiceMapText); - const audioData = await response.blob() - if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) { - throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` - } - console.log("Audio type received:",audioData.type) - - console.log("Sending tts audio data to RVC on extras server") - - var requestData = new FormData(); - requestData.append('AudioFile', audioData, 'record'); - requestData.append("json", JSON.stringify({ - "modelName": extension_settings.rvc.voiceMap[character], - "pitchOffset": extension_settings.rvc.pitchOffset, - "pitchExtraction": extension_settings.rvc.pitchExtraction, - "indexRate": extension_settings.rvc.indexRate, - "filterRadius": extension_settings.rvc.filterRadius, - //"rmsMixRate": extension_settings.rvc.rmsMixRate, - "protect": extension_settings.rvc.protect - })); - - const url = new URL(getApiUrl()); - url.pathname = '/api/voice-conversion/rvc/process-audio'; - - apiResult = await doExtrasFetch(url, { - method: 'POST', - body: requestData, - }); - - if (!apiResult.ok) { - toastr.error(apiResult.statusText, DEBUG_PREFIX+' RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); - throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`); - } - - return apiResult; + console.debug(DEBUG_PREFIX,"Updated voice map debug text to\n",voiceMapText) } //#############################// @@ -73,7 +47,7 @@ const defaultSettings = { pitchExtraction:"dio", indexRate:0.88, filterRadius:3, - //rmsMixRate:1, + rmsMixRate:1, protect:0.33, voicMapText: "", voiceMap: {} @@ -86,9 +60,6 @@ function loadSettings() { $('#rvc_enabled').prop('checked',extension_settings.rvc.enabled); $('#rvc_model').val(extension_settings.rvc.model); - $('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset); - $('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset); - $('#rvc_pitch_extraction').val(extension_settings.rvc.pitchExtraction); $('#rvc_pitch_extractiont_value').text(extension_settings.rvc.pitchExtraction); @@ -97,43 +68,18 @@ function loadSettings() { $('#rvc_filter_radius').val(extension_settings.rvc.filterRadius); $("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius); + + $('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset); + $('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset); + + $('#rvc_rms_mix_rate').val(extension_settings.rvc.rmsMixRate); + $("#rvc_rms_mix_rate_value").text(extension_settings.rvc.rmsMixRate); - //$('#rvc_mix_rate').val(extension_settings.rvc.rmsMixRate); $('#rvc_protect').val(extension_settings.rvc.protect); $("#rvc_protect_value").text(extension_settings.rvc.protect); $('#rvc_voice_map').val(extension_settings.rvc.voiceMapText); -} -async function onApplyClick() { - let error = false; - let array = $('#rvc_voice_map').val().split(","); - array = array.map(element => {return element.trim();}); - array = array.filter((str) => str !== ''); - extension_settings.rvc.voiceMap = {}; - for (const text of array) { - if (text.includes(":")) { - const pair = text.split(":") - extension_settings.rvc.voiceMap[pair[0].trim()] = pair[1].trim() - console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.rvc.voiceMap[pair[0]]); - } - else { - $("#rvc_status").text("Voice map is invalid, check console for errors"); - $("#rvc_status").css("color", "red"); - console.error(DEBUG_PREFIX,"Wrong syntax for message mapping, no ':' found in:", text); - toastr.error("no ':' found in: '"+text+"'", DEBUG_PREFIX+' RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); - error = true; - } - } - - if (!error) { - $("#rvc_status").text("Successfully applied settings"); - $("#rvc_status").css("color", "green"); - console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.rvc.voiceMap); - toastr.info("New map:\n"+JSON.stringify(extension_settings.rvc.voiceMap).substring(0,200)+"...", DEBUG_PREFIX+"Updated message mapping", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); - extension_settings.rvc.voiceMapText = $('#rvc_voice_map').val(); - saveSettingsDebounced(); - } } async function onEnabledClick() { @@ -164,12 +110,69 @@ async function onPitchOffsetChange() { saveSettingsDebounced() } +async function onRmsMixRateChange() { + extension_settings.rvc.rmsMixRate = Number($('#rvc_rms_mix_rate').val()); + $("#rvc_rms_mix_rate_value").text(extension_settings.rvc.rmsMixRate) + saveSettingsDebounced() +} + async function onProtectChange() { extension_settings.rvc.protect = Number($('#rvc_protect').val()); $("#rvc_protect_value").text(extension_settings.rvc.protect) saveSettingsDebounced() } +async function onApplyClick() { + let error = false; + const character = $("#rvc_character_select").val(); + const model_name = $("#rvc_model_select").val(); + const pitchExtraction = $("#rvc_pitch_extraction").val(); + const indexRate = $("#rvc_index_rate").val(); + const filterRadius = $("#rvc_filter_radius").val(); + const pitchOffset = $("#rvc_pitch_offset").val(); + const rmsMixRate = $("#rvc_rms_mix_rate").val(); + const protect = $("#rvc_protect").val(); + + if (character === "none") { + toastr.error("Character not selected.", DEBUG_PREFIX+" voice mapping apply", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); + return; + } + + if (model_name == "none") { + toastr.error("Model not selected.", DEBUG_PREFIX+" voice mapping apply", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); + return; + } + + extension_settings.rvc.voiceMap[character] = { + "modelName": model_name, + "pitchExtraction": pitchExtraction, + "indexRate": indexRate, + "filterRadius": filterRadius, + "pitchOffset": pitchOffset, + "rmsMixRate": rmsMixRate, + "protect": protect + } + + updateVoiceMapText(); + + console.debug(DEBUG_PREFIX,"Updated settings of ",character,":",extension_settings.rvc.voiceMap[character]) + saveSettingsDebounced(); +} + +async function onDeleteClick() { + const character = $("#rvc_character_select").val(); + + if (character === "none") { + toastr.error("Character not selected.", DEBUG_PREFIX+" voice mapping delete", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); + return; + } + + delete extension_settings.rvc.voiceMap[character]; + console.debug(DEBUG_PREFIX,"Deleted settings of ",character); + updateVoiceMapText(); + saveSettingsDebounced(); +} + $(document).ready(function () { function addExtensionControls() { const settingsHtml = ` @@ -185,8 +188,19 @@ $(document).ready(function () { Enabled + +
+ + + + Select Pitch Extraction
-
-
@@ -208,15 +220,17 @@ $(document).ready(function () { + + + - - +
+
@@ -225,14 +239,148 @@ $(document).ready(function () { `; $('#extensions_settings').append(settingsHtml); $("#rvc_enabled").on("click", onEnabledClick); + $("#rvc_voice_map").attr("disabled","disabled");; $('#rvc_pitch_extraction').on('change', onPitchExtractionChange); $('#rvc_index_rate').on('input', onIndexRateChange); $('#rvc_filter_radius').on('input', onFilterRadiusChange); $('#rvc_pitch_offset').on('input', onPitchOffsetChange); + $('#rvc_rms_mix_rate').on('input', onRmsMixRateChange); $('#rvc_protect').on('input', onProtectChange); $("#rvc_apply").on("click", onApplyClick); + $("#rvc_delete").on("click", onDeleteClick); } addExtensionControls(); // No init dependencies loadSettings(); // Depends on Extension Controls + + const wrapper = new ModuleWorkerWrapper(moduleWorker); + setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); + moduleWorker(); }) + +//#############################// +// API Calls // +//#############################// + +/* + Check model installation state, return one of ["installed", "corrupted", "absent"] +*/ +async function get_models_list(model_id) { + const url = new URL(getApiUrl()); + url.pathname = '/api/voice-conversion/rvc/get-models-list'; + + const apiResult = await doExtrasFetch(url, { + method: 'POST' + }); + + if (!apiResult.ok) { + toastr.error(apiResult.statusText, DEBUG_PREFIX+' Check model state request failed'); + throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`); + } + + return apiResult +} + +/* + Send an audio file to RVC to convert voice +*/ +async function rvcVoiceConversion(response, character) { + let apiResult + + // Check voice map + if (extension_settings.rvc.voiceMap[character] === undefined) { + toastr.error("No model is assigned to character '"+character+"', check RVC voice map in the extension menu.", DEBUG_PREFIX+'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); + console.error("No RVC model assign in voice map for current character "+character); + return response; + } + + const audioData = await response.blob() + if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) { + throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` + } + console.log("Audio type received:",audioData.type) + + const voice_settings = extension_settings.rvc.voiceMap[character]; + + console.log("Sending tts audio data to RVC on extras server") + + var requestData = new FormData(); + requestData.append('AudioFile', audioData, 'record'); + requestData.append("json", JSON.stringify({ + "modelName": voice_settings["modelName"], + "pitchExtraction": voice_settings["pitchExtraction"], + "pitchOffset": voice_settings["pitchOffset"], + "indexRate": voice_settings["indexRate"], + "filterRadius": voice_settings["filterRadius"], + "rmsMixRate": voice_settings["rmsMixRate"], + "protect": voice_settings["protect"] + })); + + const url = new URL(getApiUrl()); + url.pathname = '/api/voice-conversion/rvc/process-audio'; + + apiResult = await doExtrasFetch(url, { + method: 'POST', + body: requestData, + }); + + if (!apiResult.ok) { + toastr.error(apiResult.statusText, DEBUG_PREFIX+' RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); + throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`); + } + + return apiResult; +} + +//#############################// +// Module Worker // +//#############################// + +async function moduleWorker() { + updateCharactersList(); + + if (rvcModelsList.length == 0) { + let result = await get_models_list(); + result = await result.json(); + rvcModelsList = result["models_list"] + + $('#rvc_model_select') + .find('option') + .remove() + .end() + .append('') + .val('none') + + for(const modelName of rvcModelsList) { + $("#rvc_model_select").append(new Option(modelName,modelName)); + } + + console.debug(DEBUG_PREFIX,"Updated model list to:", rvcModelsList); + } +} + +function updateCharactersList() { + let currentcharacters = new Set(); + for (const i of getContext().characters) { + currentcharacters.add(i.name); + } + + currentcharacters = Array.from(currentcharacters) + + if (JSON.stringify(charactersList) !== JSON.stringify(currentcharacters)) { + charactersList = currentcharacters + + $('#rvc_character_select') + .find('option') + .remove() + .end() + .append('') + .val('none') + + for(const charName of charactersList) { + $("#rvc_character_select").append(new Option(charName,charName)); + } + + console.debug(DEBUG_PREFIX,"Updated character list to:", charactersList); + } +} \ No newline at end of file diff --git a/public/scripts/extensions/tts/coqui_api_models_settings.json b/public/scripts/extensions/tts/coqui_api_models_settings.json index 381ac9e32..e2e9f5841 100644 --- a/public/scripts/extensions/tts/coqui_api_models_settings.json +++ b/public/scripts/extensions/tts/coqui_api_models_settings.json @@ -27,11 +27,11 @@ "glow-tts": { "id": "tts_models/en/ljspeech/glow-tts" }, - "vits": { - "id": "tts_models/en/ljspeech/vits" - }, "speedy-speech": { "id": "tts_models/en/ljspeech/speedy-speech" + }, + "vits": { + "id": "tts_models/en/ljspeech/vits" } }, "vctk": {