mirror of
				https://github.com/SillyTavern/SillyTavern.git
				synced 2025-06-05 21:59:27 +02:00 
			
		
		
		
	Merge pull request #946 from Tony-sama/staging
RVC extension new UI and per character settings
This commit is contained in:
		| @@ -1,65 +1,39 @@ | ||||
| /* | ||||
| TODO: | ||||
|  - Allow to upload RVC model to extras server ? | ||||
|  - Settings per characters ? | ||||
|  - load RVC models list from extras | ||||
|  - Settings per characters | ||||
| */ | ||||
|  | ||||
| import { saveSettingsDebounced } from "../../../script.js"; | ||||
| import { getContext, getApiUrl, extension_settings, doExtrasFetch } from "../../extensions.js"; | ||||
| import { getContext, getApiUrl, extension_settings, doExtrasFetch, ModuleWorkerWrapper } from "../../extensions.js"; | ||||
| export { MODULE_NAME,  rvcVoiceConversion}; | ||||
|  | ||||
| const MODULE_NAME = 'RVC'; | ||||
| const DEBUG_PREFIX = "<RVC module> " | ||||
| const UPDATE_INTERVAL = 1000 | ||||
|  | ||||
| // Send an audio file to RVC to convert voice | ||||
| async function rvcVoiceConversion(response, character) { | ||||
|     let apiResult | ||||
| let charactersList = [] // Updated with module worker | ||||
| let rvcModelsList = [] // Initialized only once | ||||
|  | ||||
|     // Check voice map | ||||
|     if (extension_settings.rvc.voiceMap[character] === undefined) { | ||||
|         toastr.error("No model is assigned to character '"+character+"', check RVC voice map in the extension menu.", DEBUG_PREFIX+'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         console.error("No RVC model assign in voice map for current character "+character); | ||||
|         return response; | ||||
| function updateVoiceMapText(){ | ||||
|     let voiceMapText = "" | ||||
|     for(let i in extension_settings.rvc.voiceMap) { | ||||
|         const voice_settings = extension_settings.rvc.voiceMap[i]; | ||||
|         voiceMapText += i + ":" | ||||
|         + voice_settings["modelName"] + "(" | ||||
|         + voice_settings["pitchExtraction"] + "," | ||||
|         + voice_settings["pitchOffset"] + "," | ||||
|         + voice_settings["indexRate"] + "," | ||||
|         + voice_settings["filterRadius"] + "," | ||||
|         + voice_settings["rmsMixRate"] + "," | ||||
|         + voice_settings["protect"] | ||||
|         + "),\n" | ||||
|     } | ||||
|  | ||||
|     // Load model if different from currently loaded | ||||
|     //if (currentModel === null | currentModel != extension_settings.rvc.voiceMap[character]) | ||||
|     //    await rvcLoadModel(extension_settings.rvc.voiceMap[character]); | ||||
|     extension_settings.rvc.voiceMapText = voiceMapText; | ||||
|     $('#rvc_voice_map').val(voiceMapText); | ||||
|  | ||||
|     const audioData = await response.blob() | ||||
|     if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) { | ||||
|         throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` | ||||
|     } | ||||
|     console.log("Audio type received:",audioData.type) | ||||
|  | ||||
|     console.log("Sending tts audio data to RVC on extras server") | ||||
|  | ||||
|     var requestData = new FormData(); | ||||
|     requestData.append('AudioFile', audioData, 'record'); | ||||
|     requestData.append("json", JSON.stringify({ | ||||
|         "modelName": extension_settings.rvc.voiceMap[character], | ||||
|         "pitchOffset": extension_settings.rvc.pitchOffset, | ||||
|         "pitchExtraction": extension_settings.rvc.pitchExtraction, | ||||
|         "indexRate": extension_settings.rvc.indexRate, | ||||
|         "filterRadius": extension_settings.rvc.filterRadius, | ||||
|         //"rmsMixRate": extension_settings.rvc.rmsMixRate, | ||||
|         "protect": extension_settings.rvc.protect | ||||
|     })); | ||||
|      | ||||
|     const url = new URL(getApiUrl()); | ||||
|     url.pathname = '/api/voice-conversion/rvc/process-audio'; | ||||
|  | ||||
|     apiResult = await doExtrasFetch(url, { | ||||
|         method: 'POST', | ||||
|         body: requestData, | ||||
|     }); | ||||
|  | ||||
|     if (!apiResult.ok) { | ||||
|         toastr.error(apiResult.statusText, DEBUG_PREFIX+' RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`); | ||||
|     } | ||||
|  | ||||
|     return apiResult; | ||||
|     console.debug(DEBUG_PREFIX,"Updated voice map debug text to\n",voiceMapText) | ||||
| } | ||||
|  | ||||
| //#############################// | ||||
| @@ -73,7 +47,7 @@ const defaultSettings = { | ||||
|     pitchExtraction:"dio", | ||||
|     indexRate:0.88, | ||||
|     filterRadius:3, | ||||
|     //rmsMixRate:1, | ||||
|     rmsMixRate:1, | ||||
|     protect:0.33, | ||||
|     voicMapText: "", | ||||
|     voiceMap: {} | ||||
| @@ -86,9 +60,6 @@ function loadSettings() { | ||||
|     $('#rvc_enabled').prop('checked',extension_settings.rvc.enabled); | ||||
|     $('#rvc_model').val(extension_settings.rvc.model); | ||||
|  | ||||
|     $('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset); | ||||
|     $('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset); | ||||
|  | ||||
|     $('#rvc_pitch_extraction').val(extension_settings.rvc.pitchExtraction); | ||||
|     $('#rvc_pitch_extractiont_value').text(extension_settings.rvc.pitchExtraction); | ||||
|  | ||||
| @@ -97,43 +68,18 @@ function loadSettings() { | ||||
|  | ||||
|     $('#rvc_filter_radius').val(extension_settings.rvc.filterRadius); | ||||
|     $("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius); | ||||
|      | ||||
|     $('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset); | ||||
|     $('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset); | ||||
|  | ||||
|     $('#rvc_rms_mix_rate').val(extension_settings.rvc.rmsMixRate); | ||||
|     $("#rvc_rms_mix_rate_value").text(extension_settings.rvc.rmsMixRate); | ||||
|  | ||||
|     //$('#rvc_mix_rate').val(extension_settings.rvc.rmsMixRate); | ||||
|     $('#rvc_protect').val(extension_settings.rvc.protect); | ||||
|     $("#rvc_protect_value").text(extension_settings.rvc.protect); | ||||
|  | ||||
|     $('#rvc_voice_map').val(extension_settings.rvc.voiceMapText); | ||||
| } | ||||
|  | ||||
| async function onApplyClick() { | ||||
|     let error = false; | ||||
|     let array = $('#rvc_voice_map').val().split(","); | ||||
|     array = array.map(element => {return element.trim();}); | ||||
|     array = array.filter((str) => str !== ''); | ||||
|     extension_settings.rvc.voiceMap = {}; | ||||
|     for (const text of array) { | ||||
|         if (text.includes(":")) { | ||||
|             const pair = text.split(":") | ||||
|             extension_settings.rvc.voiceMap[pair[0].trim()] = pair[1].trim() | ||||
|             console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.rvc.voiceMap[pair[0]]); | ||||
|         } | ||||
|         else { | ||||
|             $("#rvc_status").text("Voice map is invalid, check console for errors"); | ||||
|             $("#rvc_status").css("color", "red"); | ||||
|             console.error(DEBUG_PREFIX,"Wrong syntax for message mapping, no ':' found in:", text); | ||||
|             toastr.error("no ':' found in: '"+text+"'", DEBUG_PREFIX+' RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|             error = true; | ||||
|         } | ||||
|     } | ||||
|      | ||||
|     if (!error) { | ||||
|         $("#rvc_status").text("Successfully applied settings"); | ||||
|         $("#rvc_status").css("color", "green"); | ||||
|         console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.rvc.voiceMap); | ||||
|         toastr.info("New map:\n"+JSON.stringify(extension_settings.rvc.voiceMap).substring(0,200)+"...", DEBUG_PREFIX+"Updated message mapping", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         extension_settings.rvc.voiceMapText = $('#rvc_voice_map').val(); | ||||
|         saveSettingsDebounced(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| async function onEnabledClick() { | ||||
| @@ -164,12 +110,69 @@ async function onPitchOffsetChange() { | ||||
|     saveSettingsDebounced() | ||||
| } | ||||
|  | ||||
| async function onRmsMixRateChange() { | ||||
|     extension_settings.rvc.rmsMixRate = Number($('#rvc_rms_mix_rate').val()); | ||||
|     $("#rvc_rms_mix_rate_value").text(extension_settings.rvc.rmsMixRate) | ||||
|     saveSettingsDebounced() | ||||
| } | ||||
|  | ||||
| async function onProtectChange() { | ||||
|     extension_settings.rvc.protect = Number($('#rvc_protect').val()); | ||||
|     $("#rvc_protect_value").text(extension_settings.rvc.protect) | ||||
|     saveSettingsDebounced() | ||||
| } | ||||
|  | ||||
| async function onApplyClick() { | ||||
|     let error = false; | ||||
|     const character = $("#rvc_character_select").val(); | ||||
|     const model_name = $("#rvc_model_select").val(); | ||||
|     const pitchExtraction = $("#rvc_pitch_extraction").val(); | ||||
|     const indexRate = $("#rvc_index_rate").val(); | ||||
|     const filterRadius = $("#rvc_filter_radius").val(); | ||||
|     const pitchOffset = $("#rvc_pitch_offset").val(); | ||||
|     const rmsMixRate = $("#rvc_rms_mix_rate").val(); | ||||
|     const protect = $("#rvc_protect").val(); | ||||
|  | ||||
|     if (character === "none") { | ||||
|         toastr.error("Character not selected.", DEBUG_PREFIX+" voice mapping apply", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     if (model_name == "none") { | ||||
|         toastr.error("Model not selected.", DEBUG_PREFIX+" voice mapping apply", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     extension_settings.rvc.voiceMap[character] = { | ||||
|         "modelName": model_name, | ||||
|         "pitchExtraction": pitchExtraction, | ||||
|         "indexRate": indexRate, | ||||
|         "filterRadius": filterRadius, | ||||
|         "pitchOffset": pitchOffset, | ||||
|         "rmsMixRate": rmsMixRate, | ||||
|         "protect": protect | ||||
|     } | ||||
|  | ||||
|     updateVoiceMapText(); | ||||
|  | ||||
|     console.debug(DEBUG_PREFIX,"Updated settings of ",character,":",extension_settings.rvc.voiceMap[character]) | ||||
|     saveSettingsDebounced(); | ||||
| } | ||||
|  | ||||
| async function onDeleteClick() { | ||||
|     const character = $("#rvc_character_select").val(); | ||||
|  | ||||
|     if (character === "none") { | ||||
|         toastr.error("Character not selected.", DEBUG_PREFIX+" voice mapping delete", { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     delete extension_settings.rvc.voiceMap[character]; | ||||
|     console.debug(DEBUG_PREFIX,"Deleted settings of ",character); | ||||
|     updateVoiceMapText(); | ||||
|     saveSettingsDebounced(); | ||||
| } | ||||
|  | ||||
| $(document).ready(function () { | ||||
|     function addExtensionControls() { | ||||
|         const settingsHtml = ` | ||||
| @@ -185,8 +188,19 @@ $(document).ready(function () { | ||||
|                             <input type="checkbox" id="rvc_enabled" name="rvc_enabled"> | ||||
|                             <small>Enabled</small> | ||||
|                         </label> | ||||
|                         <label>Voice Map (debug infos)</label> | ||||
|                         <textarea id="rvc_voice_map" type="text" class="text_pole textarea_compact" rows="4" | ||||
|                             placeholder="Voice map will appear here for debug purpose"></textarea> | ||||
|                     </div> | ||||
|                     <div> | ||||
|                         <label for="rvc_character_select">Character:</label> | ||||
|                         <select id="rvc_character_select"> | ||||
|                             <!-- Populated by JS --> | ||||
|                         </select> | ||||
|                         <label for="rvc_model_select">Voice:</label> | ||||
|                         <select id="rvc_model_select"> | ||||
|                             <!-- Populated by JS --> | ||||
|                         </select> | ||||
|                         <span>Select Pitch Extraction</span> </br> | ||||
|                         <select id="rvc_pitch_extraction"> | ||||
|                             <option value="dio">dio</option> | ||||
| @@ -195,8 +209,6 @@ $(document).ready(function () { | ||||
|                             <option value="torchcrepe">torchcrepe</option> | ||||
|                             <option value="rmvpe">rmvpe</option> | ||||
|                         </select> | ||||
|                     </div> | ||||
|                     <div> | ||||
|                         <label for="rvc_index_rate"> | ||||
|                             Index rate for feature retrieval (<span id="rvc_index_rate_value"></span>) | ||||
|                         </label> | ||||
| @@ -208,15 +220,17 @@ $(document).ready(function () { | ||||
|                         <label for="rvc_pitch_offset">Pitch offset (<span id="rvc_pitch_offset_value"></span>)</label> | ||||
|                         <input id="rvc_pitch_offset" type="range" min="-100" max="100" step="1" value="0" /> | ||||
|  | ||||
|                         <label for="rvc_rms_mix_rate">Mix rate (<span id="rvc_rms_mix_rate_value"></span>)</label> | ||||
|                         <input id="rvc_rms_mix_rate" type="range" min="0" max="1" step="0.01" value="1" /> | ||||
|  | ||||
|                         <label for="rvc_protect">Protect amount (<span id="rvc_protect_value"></span>)</label> | ||||
|                         <input id="rvc_protect" type="range" min="0" max="1" step="0.01" value="0.33" /> | ||||
|                         <label>Voice Map</label> | ||||
|                         <textarea id="rvc_voice_map" type="text" class="text_pole textarea_compact" rows="4" | ||||
|                             placeholder="Enter comma separated map of charName:rvcModel. Example: \nAqua:Bella,\nYou:Josh,"></textarea> | ||||
|                          | ||||
|                         <div id="rvc_status"> | ||||
|                         </div> | ||||
|                         <div class="rvc_buttons"> | ||||
|                             <input id="rvc_apply" class="menu_button" type="submit" value="Apply" /> | ||||
|                             <input id="rvc_delete" class="menu_button" type="submit" value="Delete" /> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </div> | ||||
| @@ -225,14 +239,148 @@ $(document).ready(function () { | ||||
|         `; | ||||
|         $('#extensions_settings').append(settingsHtml); | ||||
|         $("#rvc_enabled").on("click", onEnabledClick); | ||||
|         $("#rvc_voice_map").attr("disabled","disabled");; | ||||
|         $('#rvc_pitch_extraction').on('change', onPitchExtractionChange); | ||||
|         $('#rvc_index_rate').on('input', onIndexRateChange); | ||||
|         $('#rvc_filter_radius').on('input', onFilterRadiusChange); | ||||
|         $('#rvc_pitch_offset').on('input', onPitchOffsetChange); | ||||
|         $('#rvc_rms_mix_rate').on('input', onRmsMixRateChange); | ||||
|         $('#rvc_protect').on('input', onProtectChange); | ||||
|         $("#rvc_apply").on("click", onApplyClick); | ||||
|         $("#rvc_delete").on("click", onDeleteClick); | ||||
|          | ||||
|     } | ||||
|     addExtensionControls(); // No init dependencies | ||||
|     loadSettings(); // Depends on Extension Controls | ||||
|  | ||||
|     const wrapper = new ModuleWorkerWrapper(moduleWorker); | ||||
|     setInterval(wrapper.update.bind(wrapper), UPDATE_INTERVAL); | ||||
|     moduleWorker(); | ||||
| }) | ||||
|  | ||||
| //#############################// | ||||
| //  API Calls                  // | ||||
| //#############################// | ||||
|  | ||||
| /* | ||||
|     Check model installation state, return one of ["installed", "corrupted", "absent"] | ||||
| */ | ||||
| async function get_models_list(model_id) { | ||||
|     const url = new URL(getApiUrl()); | ||||
|     url.pathname = '/api/voice-conversion/rvc/get-models-list'; | ||||
|  | ||||
|     const apiResult = await doExtrasFetch(url, { | ||||
|         method: 'POST' | ||||
|     }); | ||||
|  | ||||
|     if (!apiResult.ok) { | ||||
|         toastr.error(apiResult.statusText, DEBUG_PREFIX+' Check model state request failed'); | ||||
|         throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`); | ||||
|     } | ||||
|  | ||||
|     return apiResult | ||||
| } | ||||
|  | ||||
| /* | ||||
|     Send an audio file to RVC to convert voice | ||||
| */ | ||||
| async function rvcVoiceConversion(response, character) { | ||||
|     let apiResult | ||||
|  | ||||
|     // Check voice map | ||||
|     if (extension_settings.rvc.voiceMap[character] === undefined) { | ||||
|         //toastr.error("No model is assigned to character '"+character+"', check RVC voice map in the extension menu.", DEBUG_PREFIX+'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         console.info(DEBUG_PREFIX,"No RVC model assign in voice map for current character "+character); | ||||
|         return response; | ||||
|     } | ||||
|  | ||||
|     const audioData = await response.blob() | ||||
|     if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) { | ||||
|         throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}` | ||||
|     } | ||||
|     console.log("Audio type received:",audioData.type) | ||||
|  | ||||
|     const voice_settings = extension_settings.rvc.voiceMap[character]; | ||||
|  | ||||
|     console.log("Sending tts audio data to RVC on extras server") | ||||
|  | ||||
|     var requestData = new FormData(); | ||||
|     requestData.append('AudioFile', audioData, 'record'); | ||||
|     requestData.append("json", JSON.stringify({ | ||||
|         "modelName": voice_settings["modelName"], | ||||
|         "pitchExtraction": voice_settings["pitchExtraction"], | ||||
|         "pitchOffset": voice_settings["pitchOffset"], | ||||
|         "indexRate": voice_settings["indexRate"], | ||||
|         "filterRadius": voice_settings["filterRadius"], | ||||
|         "rmsMixRate": voice_settings["rmsMixRate"], | ||||
|         "protect": voice_settings["protect"] | ||||
|     })); | ||||
|      | ||||
|     const url = new URL(getApiUrl()); | ||||
|     url.pathname = '/api/voice-conversion/rvc/process-audio'; | ||||
|  | ||||
|     apiResult = await doExtrasFetch(url, { | ||||
|         method: 'POST', | ||||
|         body: requestData, | ||||
|     }); | ||||
|  | ||||
|     if (!apiResult.ok) { | ||||
|         toastr.error(apiResult.statusText, DEBUG_PREFIX+' RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true }); | ||||
|         throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`); | ||||
|     } | ||||
|  | ||||
|     return apiResult; | ||||
| } | ||||
|  | ||||
| //#############################// | ||||
| //  Module Worker              // | ||||
| //#############################// | ||||
|  | ||||
| async function moduleWorker() { | ||||
|     updateCharactersList(); | ||||
|  | ||||
|     if (rvcModelsList.length == 0) { | ||||
|         let result = await get_models_list(); | ||||
|         result = await result.json(); | ||||
|         rvcModelsList = result["models_list"] | ||||
|  | ||||
|         $('#rvc_model_select') | ||||
|             .find('option') | ||||
|             .remove() | ||||
|             .end() | ||||
|             .append('<option value="none">Select Voice</option>') | ||||
|             .val('none') | ||||
|              | ||||
|         for(const modelName of rvcModelsList) { | ||||
|             $("#rvc_model_select").append(new Option(modelName,modelName)); | ||||
|         } | ||||
|  | ||||
|         console.debug(DEBUG_PREFIX,"Updated model list to:", rvcModelsList); | ||||
|     } | ||||
| } | ||||
|  | ||||
| function updateCharactersList() { | ||||
|     let currentcharacters = new Set(); | ||||
|     for (const i of getContext().characters) { | ||||
|         currentcharacters.add(i.name); | ||||
|     } | ||||
|  | ||||
|     currentcharacters = Array.from(currentcharacters) | ||||
|      | ||||
|     if (JSON.stringify(charactersList) !== JSON.stringify(currentcharacters)) { | ||||
|         charactersList = currentcharacters | ||||
|  | ||||
|         $('#rvc_character_select') | ||||
|             .find('option') | ||||
|             .remove() | ||||
|             .end() | ||||
|             .append('<option value="none">Select Character</option>') | ||||
|             .val('none') | ||||
|              | ||||
|         for(const charName of charactersList) { | ||||
|             $("#rvc_character_select").append(new Option(charName,charName)); | ||||
|         } | ||||
|  | ||||
|         console.debug(DEBUG_PREFIX,"Updated character list to:", charactersList); | ||||
|     } | ||||
| } | ||||
| @@ -27,11 +27,11 @@ | ||||
|             "glow-tts": { | ||||
|                 "id": "tts_models/en/ljspeech/glow-tts" | ||||
|             }, | ||||
|             "vits": { | ||||
|                 "id": "tts_models/en/ljspeech/vits" | ||||
|             }, | ||||
|             "speedy-speech": { | ||||
|                 "id": "tts_models/en/ljspeech/speedy-speech" | ||||
|             }, | ||||
|             "vits": { | ||||
|                 "id": "tts_models/en/ljspeech/vits" | ||||
|             } | ||||
|         }, | ||||
|         "vctk": { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user