Merge pull request #946 from Tony-sama/staging
RVC extension new UI and per character settings
This commit is contained in:
commit
1fa281b03c
|
@ -1,65 +1,39 @@
|
|||
/*
|
||||
TODO:
|
||||
- Allow to upload RVC model to extras server ?
|
||||
- Settings per characters ?
|
||||
- load RVC models list from extras
|
||||
- Settings per characters
|
||||
*/
|
||||
|
||||
import { saveSettingsDebounced } from "../../../script.js";
|
||||
import { getContext, getApiUrl, extension_settings, doExtrasFetch } from "../../extensions.js";
|
||||
import { getContext, getApiUrl, extension_settings, doExtrasFetch, ModuleWorkerWrapper } from "../../extensions.js";
|
||||
export { MODULE_NAME, rvcVoiceConversion};
|
||||
|
||||
const MODULE_NAME = 'RVC';
|
||||
const DEBUG_PREFIX = "<RVC module> "
|
||||
const UPDATE_INTERVAL = 1000
|
||||
|
||||
// Send an audio file to RVC to convert voice
|
||||
async function rvcVoiceConversion(response, character) {
|
||||
let apiResult
|
||||
let charactersList = [] // Updated with module worker
|
||||
let rvcModelsList = [] // Initialized only once
|
||||
|
||||
// Check voice map
|
||||
if (extension_settings.rvc.voiceMap[character] === undefined) {
|
||||
toastr.error("No model is assigned to character '"+character+"', check RVC voice map in the extension menu.", DEBUG_PREFIX+'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
console.error("No RVC model assign in voice map for current character "+character);
|
||||
return response;
|
||||
function updateVoiceMapText(){
|
||||
let voiceMapText = ""
|
||||
for(let i in extension_settings.rvc.voiceMap) {
|
||||
const voice_settings = extension_settings.rvc.voiceMap[i];
|
||||
voiceMapText += i + ":"
|
||||
+ voice_settings["modelName"] + "("
|
||||
+ voice_settings["pitchExtraction"] + ","
|
||||
+ voice_settings["pitchOffset"] + ","
|
||||
+ voice_settings["indexRate"] + ","
|
||||
+ voice_settings["filterRadius"] + ","
|
||||
+ voice_settings["rmsMixRate"] + ","
|
||||
+ voice_settings["protect"]
|
||||
+ "),\n"
|
||||
}
|
||||
|
||||
// Load model if different from currently loaded
|
||||
//if (currentModel === null | currentModel != extension_settings.rvc.voiceMap[character])
|
||||
// await rvcLoadModel(extension_settings.rvc.voiceMap[character]);
|
||||
extension_settings.rvc.voiceMapText = voiceMapText;
|
||||
$('#rvc_voice_map').val(voiceMapText);
|
||||
|
||||
const audioData = await response.blob()
|
||||
if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) {
|
||||
throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}`
|
||||
}
|
||||
console.log("Audio type received:",audioData.type)
|
||||
|
||||
console.log("Sending tts audio data to RVC on extras server")
|
||||
|
||||
var requestData = new FormData();
|
||||
requestData.append('AudioFile', audioData, 'record');
|
||||
requestData.append("json", JSON.stringify({
|
||||
"modelName": extension_settings.rvc.voiceMap[character],
|
||||
"pitchOffset": extension_settings.rvc.pitchOffset,
|
||||
"pitchExtraction": extension_settings.rvc.pitchExtraction,
|
||||
"indexRate": extension_settings.rvc.indexRate,
|
||||
"filterRadius": extension_settings.rvc.filterRadius,
|
||||
//"rmsMixRate": extension_settings.rvc.rmsMixRate,
|
||||
"protect": extension_settings.rvc.protect
|
||||
}));
|
||||
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/voice-conversion/rvc/process-audio';
|
||||
|
||||
apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
body: requestData,
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX+' RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
console.debug(DEBUG_PREFIX,"Updated voice map debug text to\n",voiceMapText)
|
||||
}
|
||||
|
||||
//#############################//
|
||||
|
@ -73,7 +47,7 @@ const defaultSettings = {
|
|||
pitchExtraction:"dio",
|
||||
indexRate:0.88,
|
||||
filterRadius:3,
|
||||
//rmsMixRate:1,
|
||||
rmsMixRate:1,
|
||||
protect:0.33,
|
||||
voicMapText: "",
|
||||
voiceMap: {}
|
||||
|
@ -86,9 +60,6 @@ function loadSettings() {
|
|||
$('#rvc_enabled').prop('checked',extension_settings.rvc.enabled);
|
||||
$('#rvc_model').val(extension_settings.rvc.model);
|
||||
|
||||
$('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset);
|
||||
$('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset);
|
||||
|
||||
$('#rvc_pitch_extraction').val(extension_settings.rvc.pitchExtraction);
|
||||
$('#rvc_pitch_extractiont_value').text(extension_settings.rvc.pitchExtraction);
|
||||
|
||||
|
@ -97,43 +68,18 @@ function loadSettings() {
|
|||
|
||||
$('#rvc_filter_radius').val(extension_settings.rvc.filterRadius);
|
||||
$("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius);
|
||||
|
||||
$('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset);
|
||||
$('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset);
|
||||
|
||||
$('#rvc_rms_mix_rate').val(extension_settings.rvc.rmsMixRate);
|
||||
$("#rvc_rms_mix_rate_value").text(extension_settings.rvc.rmsMixRate);
|
||||
|
||||
//$('#rvc_mix_rate').val(extension_settings.rvc.rmsMixRate);
|
||||
$('#rvc_protect').val(extension_settings.rvc.protect);
|
||||
$("#rvc_protect_value").text(extension_settings.rvc.protect);
|
||||
|
||||
$('#rvc_voice_map').val(extension_settings.rvc.voiceMapText);
|
||||
}
|
||||
|
||||
async function onApplyClick() {
|
||||
let error = false;
|
||||
let array = $('#rvc_voice_map').val().split(",");
|
||||
array = array.map(element => {return element.trim();});
|
||||
array = array.filter((str) => str !== '');
|
||||
extension_settings.rvc.voiceMap = {};
|
||||
for (const text of array) {
|
||||
if (text.includes(":")) {
|
||||
const pair = text.split(":")
|
||||
extension_settings.rvc.voiceMap[pair[0].trim()] = pair[1].trim()
|
||||
console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.rvc.voiceMap[pair[0]]);
|
||||
}
|
||||
else {
|
||||
$("#rvc_status").text("Voice map is invalid, check console for errors");
|
||||
$("#rvc_status").css("color", "red");
|
||||
console.error(DEBUG_PREFIX,"Wrong syntax for message mapping, no ':' found in:", text);
|
||||
toastr.error("no ':' found in: '"+text+"'", DEBUG_PREFIX+' RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
error = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!error) {
|
||||
$("#rvc_status").text("Successfully applied settings");
|
||||
$("#rvc_status").css("color", "green");
|
||||
console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.rvc.voiceMap);
|
||||
toastr.info("New map:\n"+JSON.stringify(extension_settings.rvc.voiceMap).substring(0,200)+"...", DEBUG_PREFIX+"Updated message mapping", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
extension_settings.rvc.voiceMapText = $('#rvc_voice_map').val();
|
||||
saveSettingsDebounced();
|
||||
}
|
||||
}
|
||||
|
||||
async function onEnabledClick() {
|
||||
|
@ -164,12 +110,69 @@ async function onPitchOffsetChange() {
|
|||
saveSettingsDebounced()
|
||||
}
|
||||
|
||||
async function onRmsMixRateChange() {
|
||||
extension_settings.rvc.rmsMixRate = Number($('#rvc_rms_mix_rate').val());
|
||||
$("#rvc_rms_mix_rate_value").text(extension_settings.rvc.rmsMixRate)
|
||||
saveSettingsDebounced()
|
||||
}
|
||||
|
||||
async function onProtectChange() {
|
||||
extension_settings.rvc.protect = Number($('#rvc_protect').val());
|
||||
$("#rvc_protect_value").text(extension_settings.rvc.protect)
|
||||
saveSettingsDebounced()
|
||||
}
|
||||
|
||||
async function onApplyClick() {
|
||||
let error = false;
|
||||
const character = $("#rvc_character_select").val();
|
||||
const model_name = $("#rvc_model_select").val();
|
||||
const pitchExtraction = $("#rvc_pitch_extraction").val();
|
||||
const indexRate = $("#rvc_index_rate").val();
|
||||
const filterRadius = $("#rvc_filter_radius").val();
|
||||
const pitchOffset = $("#rvc_pitch_offset").val();
|
||||
const rmsMixRate = $("#rvc_rms_mix_rate").val();
|
||||
const protect = $("#rvc_protect").val();
|
||||
|
||||
if (character === "none") {
|
||||
toastr.error("Character not selected.", DEBUG_PREFIX+" voice mapping apply", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
if (model_name == "none") {
|
||||
toastr.error("Model not selected.", DEBUG_PREFIX+" voice mapping apply", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
extension_settings.rvc.voiceMap[character] = {
|
||||
"modelName": model_name,
|
||||
"pitchExtraction": pitchExtraction,
|
||||
"indexRate": indexRate,
|
||||
"filterRadius": filterRadius,
|
||||
"pitchOffset": pitchOffset,
|
||||
"rmsMixRate": rmsMixRate,
|
||||
"protect": protect
|
||||
}
|
||||
|
||||
updateVoiceMapText();
|
||||
|
||||
console.debug(DEBUG_PREFIX,"Updated settings of ",character,":",extension_settings.rvc.voiceMap[character])
|
||||
saveSettingsDebounced();
|
||||
}
|
||||
|
||||
async function onDeleteClick() {
|
||||
const character = $("#rvc_character_select").val();
|
||||
|
||||
if (character === "none") {
|
||||
toastr.error("Character not selected.", DEBUG_PREFIX+" voice mapping delete", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
return;
|
||||
}
|
||||
|
||||
delete extension_settings.rvc.voiceMap[character];
|
||||
console.debug(DEBUG_PREFIX,"Deleted settings of ",character);
|
||||
updateVoiceMapText();
|
||||
saveSettingsDebounced();
|
||||
}
|
||||
|
||||
$(document).ready(function () {
|
||||
function addExtensionControls() {
|
||||
const settingsHtml = `
|
||||
|
@ -185,8 +188,19 @@ $(document).ready(function () {
|
|||
<input type="checkbox" id="rvc_enabled" name="rvc_enabled">
|
||||
<small>Enabled</small>
|
||||
</label>
|
||||
<label>Voice Map (debug infos)</label>
|
||||
<textarea id="rvc_voice_map" type="text" class="text_pole textarea_compact" rows="4"
|
||||
placeholder="Voice map will appear here for debug purpose"></textarea>
|
||||
</div>
|
||||
<div>
|
||||
<label for="rvc_character_select">Character:</label>
|
||||
<select id="rvc_character_select">
|
||||
<!-- Populated by JS -->
|
||||
</select>
|
||||
<label for="rvc_model_select">Voice:</label>
|
||||
<select id="rvc_model_select">
|
||||
<!-- Populated by JS -->
|
||||
</select>
|
||||
<span>Select Pitch Extraction</span> </br>
|
||||
<select id="rvc_pitch_extraction">
|
||||
<option value="dio">dio</option>
|
||||
|
@ -195,8 +209,6 @@ $(document).ready(function () {
|
|||
<option value="torchcrepe">torchcrepe</option>
|
||||
<option value="rmvpe">rmvpe</option>
|
||||
</select>
|
||||
</div>
|
||||
<div>
|
||||
<label for="rvc_index_rate">
|
||||
Index rate for feature retrieval (<span id="rvc_index_rate_value"></span>)
|
||||
</label>
|
||||
|
@ -208,15 +220,17 @@ $(document).ready(function () {
|
|||
<label for="rvc_pitch_offset">Pitch offset (<span id="rvc_pitch_offset_value"></span>)</label>
|
||||
<input id="rvc_pitch_offset" type="range" min="-100" max="100" step="1" value="0" />
|
||||
|
||||
<label for="rvc_rms_mix_rate">Mix rate (<span id="rvc_rms_mix_rate_value"></span>)</label>
|
||||
<input id="rvc_rms_mix_rate" type="range" min="0" max="1" step="0.01" value="1" />
|
||||
|
||||
<label for="rvc_protect">Protect amount (<span id="rvc_protect_value"></span>)</label>
|
||||
<input id="rvc_protect" type="range" min="0" max="1" step="0.01" value="0.33" />
|
||||
<label>Voice Map</label>
|
||||
<textarea id="rvc_voice_map" type="text" class="text_pole textarea_compact" rows="4"
|
||||
placeholder="Enter comma separated map of charName:rvcModel. Example: \nAqua:Bella,\nYou:Josh,"></textarea>
|
||||
|
||||
<div id="rvc_status">
|
||||
</div>
|
||||
<div class="rvc_buttons">
|
||||
<input id="rvc_apply" class="menu_button" type="submit" value="Apply" />
|
||||
<input id="rvc_delete" class="menu_button" type="submit" value="Delete" />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
@ -225,14 +239,148 @@ $(document).ready(function () {
|
|||
`;
|
||||
$('#extensions_settings').append(settingsHtml);
|
||||
$("#rvc_enabled").on("click", onEnabledClick);
|
||||
$("#rvc_voice_map").attr("disabled","disabled");;
|
||||
$('#rvc_pitch_extraction').on('change', onPitchExtractionChange);
|
||||
$('#rvc_index_rate').on('input', onIndexRateChange);
|
||||
$('#rvc_filter_radius').on('input', onFilterRadiusChange);
|
||||
$('#rvc_pitch_offset').on('input', onPitchOffsetChange);
|
||||
$('#rvc_rms_mix_rate').on('input', onRmsMixRateChange);
|
||||
$('#rvc_protect').on('input', onProtectChange);
|
||||
$("#rvc_apply").on("click", onApplyClick);
|
||||
$("#rvc_delete").on("click", onDeleteClick);
|
||||
|
||||
}
|
||||
addExtensionControls(); // No init dependencies
|
||||
loadSettings(); // Depends on Extension Controls
|
||||
|
||||
const wrapper = new ModuleWorkerWrapper(moduleWorker);
|
||||
setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL);
|
||||
moduleWorker();
|
||||
})
|
||||
|
||||
//#############################//
|
||||
// API Calls //
|
||||
//#############################//
|
||||
|
||||
/*
|
||||
Check model installation state, return one of ["installed", "corrupted", "absent"]
|
||||
*/
|
||||
async function get_models_list(model_id) {
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/voice-conversion/rvc/get-models-list';
|
||||
|
||||
const apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST'
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX+' Check model state request failed');
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult
|
||||
}
|
||||
|
||||
/*
|
||||
Send an audio file to RVC to convert voice
|
||||
*/
|
||||
async function rvcVoiceConversion(response, character) {
|
||||
let apiResult
|
||||
|
||||
// Check voice map
|
||||
if (extension_settings.rvc.voiceMap[character] === undefined) {
|
||||
//toastr.error("No model is assigned to character '"+character+"', check RVC voice map in the extension menu.", DEBUG_PREFIX+'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
console.info(DEBUG_PREFIX,"No RVC model assign in voice map for current character "+character);
|
||||
return response;
|
||||
}
|
||||
|
||||
const audioData = await response.blob()
|
||||
if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) {
|
||||
throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}`
|
||||
}
|
||||
console.log("Audio type received:",audioData.type)
|
||||
|
||||
const voice_settings = extension_settings.rvc.voiceMap[character];
|
||||
|
||||
console.log("Sending tts audio data to RVC on extras server")
|
||||
|
||||
var requestData = new FormData();
|
||||
requestData.append('AudioFile', audioData, 'record');
|
||||
requestData.append("json", JSON.stringify({
|
||||
"modelName": voice_settings["modelName"],
|
||||
"pitchExtraction": voice_settings["pitchExtraction"],
|
||||
"pitchOffset": voice_settings["pitchOffset"],
|
||||
"indexRate": voice_settings["indexRate"],
|
||||
"filterRadius": voice_settings["filterRadius"],
|
||||
"rmsMixRate": voice_settings["rmsMixRate"],
|
||||
"protect": voice_settings["protect"]
|
||||
}));
|
||||
|
||||
const url = new URL(getApiUrl());
|
||||
url.pathname = '/api/voice-conversion/rvc/process-audio';
|
||||
|
||||
apiResult = await doExtrasFetch(url, {
|
||||
method: 'POST',
|
||||
body: requestData,
|
||||
});
|
||||
|
||||
if (!apiResult.ok) {
|
||||
toastr.error(apiResult.statusText, DEBUG_PREFIX+' RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
|
||||
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
|
||||
}
|
||||
|
||||
return apiResult;
|
||||
}
|
||||
|
||||
//#############################//
|
||||
// Module Worker //
|
||||
//#############################//
|
||||
|
||||
async function moduleWorker() {
|
||||
updateCharactersList();
|
||||
|
||||
if (rvcModelsList.length == 0) {
|
||||
let result = await get_models_list();
|
||||
result = await result.json();
|
||||
rvcModelsList = result["models_list"]
|
||||
|
||||
$('#rvc_model_select')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select Voice</option>')
|
||||
.val('none')
|
||||
|
||||
for(const modelName of rvcModelsList) {
|
||||
$("#rvc_model_select").append(new Option(modelName,modelName));
|
||||
}
|
||||
|
||||
console.debug(DEBUG_PREFIX,"Updated model list to:", rvcModelsList);
|
||||
}
|
||||
}
|
||||
|
||||
function updateCharactersList() {
|
||||
let currentcharacters = new Set();
|
||||
for (const i of getContext().characters) {
|
||||
currentcharacters.add(i.name);
|
||||
}
|
||||
|
||||
currentcharacters = Array.from(currentcharacters)
|
||||
|
||||
if (JSON.stringify(charactersList) !== JSON.stringify(currentcharacters)) {
|
||||
charactersList = currentcharacters
|
||||
|
||||
$('#rvc_character_select')
|
||||
.find('option')
|
||||
.remove()
|
||||
.end()
|
||||
.append('<option value="none">Select Character</option>')
|
||||
.val('none')
|
||||
|
||||
for(const charName of charactersList) {
|
||||
$("#rvc_character_select").append(new Option(charName,charName));
|
||||
}
|
||||
|
||||
console.debug(DEBUG_PREFIX,"Updated character list to:", charactersList);
|
||||
}
|
||||
}
|
|
@ -27,11 +27,11 @@
|
|||
"glow-tts": {
|
||||
"id": "tts_models/en/ljspeech/glow-tts"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/en/ljspeech/vits"
|
||||
},
|
||||
"speedy-speech": {
|
||||
"id": "tts_models/en/ljspeech/speedy-speech"
|
||||
},
|
||||
"vits": {
|
||||
"id": "tts_models/en/ljspeech/vits"
|
||||
}
|
||||
},
|
||||
"vctk": {
|
||||
|
|
Loading…
Reference in New Issue