mirror of
				https://github.com/SillyTavern/SillyTavern.git
				synced 2025-06-05 21:59:27 +02:00 
			
		
		
		
	Add extension for RVC postprocessing of TTS audio using ST extras
This commit is contained in:
		
							
								
								
									
										262
									
								
								public/scripts/extensions/rvc/index.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										262
									
								
								public/scripts/extensions/rvc/index.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,262 @@
 | 
			
		||||
/*
 | 
			
		||||
TODO:
 | 
			
		||||
 - try pseudo streaming audio by just sending chunk every X seconds and asking VOSK if it is full text.
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
import { saveSettingsDebounced } from "../../../script.js";
 | 
			
		||||
import { getContext, getApiUrl, modules, extension_settings, ModuleWorkerWrapper, doExtrasFetch } from "../../extensions.js";
 | 
			
		||||
export { MODULE_NAME,  rvcVoiceConversion};
 | 
			
		||||
 | 
			
		||||
const MODULE_NAME = 'RVC';
 | 
			
		||||
const DEBUG_PREFIX = "<RVC module> "
 | 
			
		||||
 | 
			
		||||
//let currentModel = null
 | 
			
		||||
 | 
			
		||||
/*/ Load character model if needed
 | 
			
		||||
async function rvcLoadModel(model) {
 | 
			
		||||
    const url = new URL(getApiUrl());
 | 
			
		||||
    url.pathname = '/api/voice-conversion/rvc/load-model';
 | 
			
		||||
 | 
			
		||||
    const apiResult = await doExtrasFetch(url, {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        headers: {
 | 
			
		||||
            'Content-Type': 'application/json',
 | 
			
		||||
            'Bypass-Tunnel-Reminder': 'bypass',
 | 
			
		||||
        },
 | 
			
		||||
        body: JSON.stringify({ "model_name": model }),
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!apiResult.ok) {
 | 
			
		||||
        toastr.error("May be a wrong model name in RVC voice map, please check console for details", 'RVC Voice model load Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
 | 
			
		||||
        throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const result = await apiResult.json();
 | 
			
		||||
    console.log("Loaded RVC model:", result.model_loaded);
 | 
			
		||||
    currentModel = model
 | 
			
		||||
}
 | 
			
		||||
*/
 | 
			
		||||
 | 
			
		||||
// Send an audio file to RVC to convert voice
 | 
			
		||||
async function rvcVoiceConversion(response, character) {
 | 
			
		||||
    let apiResult
 | 
			
		||||
 | 
			
		||||
    // Check voice map
 | 
			
		||||
    if (extension_settings.rvc.voiceMap[character] === undefined) {
 | 
			
		||||
        toastr.error("No model is assigned to character '"+char+"', check RVC voice map in the extension menu.", 'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
 | 
			
		||||
        throw new Error("No RVC model assign in voice map for current character "+char);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Load model if different from currently loaded
 | 
			
		||||
    //if (currentModel === null | currentModel != extension_settings.rvc.voiceMap[character])
 | 
			
		||||
    //    await rvcLoadModel(extension_settings.rvc.voiceMap[character]);
 | 
			
		||||
 | 
			
		||||
    const audioData = await response.blob()
 | 
			
		||||
    if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) {
 | 
			
		||||
        throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}`
 | 
			
		||||
    }
 | 
			
		||||
    console.log("Sending tts audio data to RVC on extras server")
 | 
			
		||||
 | 
			
		||||
    var requestData = new FormData();
 | 
			
		||||
    requestData.append('AudioFile', audioData, 'record.wav');
 | 
			
		||||
    requestData.append("json", JSON.stringify({
 | 
			
		||||
        "modelName": extension_settings.rvc.voiceMap[character],
 | 
			
		||||
        "pitchOffset": extension_settings.rvc.pitchOffset,
 | 
			
		||||
        "pitchExtraction": extension_settings.rvc.pitchExtraction,
 | 
			
		||||
        "indexRate": extension_settings.rvc.indexRate,
 | 
			
		||||
        "filterRadius": extension_settings.rvc.filterRadius,
 | 
			
		||||
        //"rmsMixRate": extension_settings.rvc.rmsMixRate,
 | 
			
		||||
        "protect": extension_settings.rvc.protect
 | 
			
		||||
    }));
 | 
			
		||||
    
 | 
			
		||||
    const url = new URL(getApiUrl());
 | 
			
		||||
    url.pathname = '/api/voice-conversion/rvc/process-audio';
 | 
			
		||||
 | 
			
		||||
    apiResult = await doExtrasFetch(url, {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        body: requestData,
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!apiResult.ok) {
 | 
			
		||||
        toastr.error(apiResult.statusText, 'RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
 | 
			
		||||
        throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return apiResult;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//#############################//
 | 
			
		||||
//  Extension UI and Settings  //
 | 
			
		||||
//#############################//
 | 
			
		||||
 | 
			
		||||
const defaultSettings = {
 | 
			
		||||
    enabled: false,
 | 
			
		||||
    model:"",
 | 
			
		||||
    pitchOffset:0,
 | 
			
		||||
    pitchExtraction:"dio",
 | 
			
		||||
    indexRate:0.88,
 | 
			
		||||
    filterRadius:3,
 | 
			
		||||
    //rmsMixRate:1,
 | 
			
		||||
    protect:0.33,
 | 
			
		||||
    voicMapText: "",
 | 
			
		||||
    voiceMap: {}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function loadSettings() {
 | 
			
		||||
    if (Object.keys(extension_settings.rvc).length === 0) {
 | 
			
		||||
        Object.assign(extension_settings.rvc, defaultSettings)
 | 
			
		||||
    }
 | 
			
		||||
    $('#rvc_enabled').prop('checked',extension_settings.rvc.enabled);
 | 
			
		||||
    $('#rvc_model').val(extension_settings.rvc.model);
 | 
			
		||||
 | 
			
		||||
    $('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset);
 | 
			
		||||
    $('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset);
 | 
			
		||||
 | 
			
		||||
    $('#rvc_pitch_extraction').val(extension_settings.rvc.pitchExtraction);
 | 
			
		||||
    $('#rvc_pitch_extractiont_value').text(extension_settings.rvc.pitchExtraction);
 | 
			
		||||
 | 
			
		||||
    $('#rvc_index_rate').val(extension_settings.rvc.indexRate);
 | 
			
		||||
    $('#rvc_index_rate_value').text(extension_settings.rvc.indexRate);
 | 
			
		||||
 | 
			
		||||
    $('#rvc_filter_radius').val(extension_settings.rvc.filterRadius);
 | 
			
		||||
    $("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius);
 | 
			
		||||
 | 
			
		||||
    //$('#rvc_mix_rate').val(extension_settings.rvc.rmsMixRate);
 | 
			
		||||
    $('#rvc_protect').val(extension_settings.rvc.protect);
 | 
			
		||||
    $("#rvc_protect_value").text(extension_settings.rvc.protect);
 | 
			
		||||
 | 
			
		||||
    $('#rvc_voice_map').val(extension_settings.rvc.voiceMapText);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onApplyClick() {
 | 
			
		||||
    let error = false;
 | 
			
		||||
    let array = $('#rvc_voice_map').val().split(",");
 | 
			
		||||
    array = array.map(element => {return element.trim();});
 | 
			
		||||
    array = array.filter((str) => str !== '');
 | 
			
		||||
    extension_settings.rvc.voiceMap = {};
 | 
			
		||||
    for (const text of array) {
 | 
			
		||||
        if (text.includes("=")) {
 | 
			
		||||
            const pair = text.split("=")
 | 
			
		||||
            extension_settings.rvc.voiceMap[pair[0].trim()] = pair[1].trim()
 | 
			
		||||
            console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.rvc.voiceMap[pair[0]]);
 | 
			
		||||
        }
 | 
			
		||||
        else {
 | 
			
		||||
            $("#rvc_status").text("Voice map is invalid, check console for errors");
 | 
			
		||||
            $("#rvc_status").css("color", "red");
 | 
			
		||||
            console.error(DEBUG_PREFIX+"Wrong syntax for message mapping, no '=' found in:", text);
 | 
			
		||||
            error = true;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    if (!error) {
 | 
			
		||||
        $("#rvc_status").text("Successfully applied settings");
 | 
			
		||||
        $("#rvc_status").css("color", "green");
 | 
			
		||||
        console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.rvc.voiceMap);
 | 
			
		||||
        extension_settings.rvc.voiceMapText = $('#rvc_voice_map').val();
 | 
			
		||||
        saveSettingsDebounced();
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onEnabledClick() {
 | 
			
		||||
    extension_settings.rvc.enabled = $('#rvc_enabled').is(':checked');
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onPitchExtractionChange() {
 | 
			
		||||
    extension_settings.rvc.pitchExtraction = $('#rvc_pitch_extraction').val();
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onIndexRateChange() {
 | 
			
		||||
    extension_settings.rvc.indexRate = Number($('#rvc_index_rate').val());
 | 
			
		||||
    $("#rvc_index_rate_value").text(extension_settings.rvc.indexRate)
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onFilterRadiusChange() {
 | 
			
		||||
    extension_settings.rvc.filterRadius = Number($('#rvc_filter_radius').val());
 | 
			
		||||
    $("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius)
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onPitchOffsetChange() {
 | 
			
		||||
    extension_settings.rvc.pitchOffset = Number($('#rvc_pitch_offset').val());
 | 
			
		||||
    $("#rvc_pitch_offset_value").text(extension_settings.rvc.pitchOffset)
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function onProtectChange() {
 | 
			
		||||
    extension_settings.rvc.protect = Number($('#rvc_protect').val());
 | 
			
		||||
    $("#rvc_protect_value").text(extension_settings.rvc.protect)
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
$(document).ready(function () {
 | 
			
		||||
    function addExtensionControls() {
 | 
			
		||||
        const settingsHtml = `
 | 
			
		||||
        <div id="rvc_settings">
 | 
			
		||||
            <div class="inline-drawer">
 | 
			
		||||
                <div class="inline-drawer-toggle inline-drawer-header">
 | 
			
		||||
                    <b>RVC</b>
 | 
			
		||||
                    <div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="inline-drawer-content">
 | 
			
		||||
                    <div>
 | 
			
		||||
                        <label class="checkbox_label" for="rvc_enabled">
 | 
			
		||||
                            <input type="checkbox" id="rvc_enabled" name="rvc_enabled">
 | 
			
		||||
                            <small>Enabled</small>
 | 
			
		||||
                        </label>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div>
 | 
			
		||||
                        <span>Select Pitch Extraction</span> </br>
 | 
			
		||||
                        <select id="rvc_pitch_extraction">
 | 
			
		||||
                            <option value="dio">dio</option>
 | 
			
		||||
                            <option value="pm">pm</option>
 | 
			
		||||
                            <option value="harvest">harvest</option>
 | 
			
		||||
                            <option value="torchcrepe">torchcrepe</option>
 | 
			
		||||
                            <option value="rmvpe">rmvpe</option>
 | 
			
		||||
                        </select>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div>
 | 
			
		||||
                        <label for="rvc_index_rate">
 | 
			
		||||
                            Index rate for feature retrieval (<span id="rvc_index_rate_value"></span>)
 | 
			
		||||
                        </label>
 | 
			
		||||
                        <input id="rvc_index_rate" type="range" min="0" max="1" step="0.01" value="0.5" />
 | 
			
		||||
 | 
			
		||||
                        <label for="rvc_filter_radius">Filter radius (<span id="rvc_filter_radius_value"></span>)</label>
 | 
			
		||||
                        <input id="rvc_filter_radius" type="range" min="0" max="7" step="1" value="3" />
 | 
			
		||||
 | 
			
		||||
                        <label for="rvc_pitch_offset">Pitch offset (<span id="rvc_pitch_offset_value"></span>)</label>
 | 
			
		||||
                        <input id="rvc_pitch_offset" type="range" min="-100" max="100" step="1" value="0" />
 | 
			
		||||
 | 
			
		||||
                        <label for="rvc_protect">Protect amount (<span id="rvc_protect_value"></span>)</label>
 | 
			
		||||
                        <input id="rvc_protect" type="range" min="0" max="1" step="0.01" value="0.33" />
 | 
			
		||||
                        <label>Voice Map</label>
 | 
			
		||||
                        <textarea id="rvc_voice_map" type="text" class="text_pole textarea_compact" rows="4"
 | 
			
		||||
                            placeholder="Enter comma separated map of charName:rvcModel. Example: \nAqua:Bella,\nYou:Josh,"></textarea>
 | 
			
		||||
                        <div id="rvc_status">
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div class="rvc_buttons">
 | 
			
		||||
                            <input id="rvc_apply" class="menu_button" type="submit" value="Apply" />
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </div>
 | 
			
		||||
            </div>
 | 
			
		||||
        </div>
 | 
			
		||||
        `;
 | 
			
		||||
        $('#extensions_settings').append(settingsHtml);
 | 
			
		||||
        $("#rvc_enabled").on("click", onEnabledClick);
 | 
			
		||||
        $('#rvc_pitch_extraction').on('change', onPitchExtractionChange);
 | 
			
		||||
        $('#rvc_index_rate').on('input', onIndexRateChange);
 | 
			
		||||
        $('#rvc_filter_radius').on('input', onFilterRadiusChange);
 | 
			
		||||
        $('#rvc_pitch_offset').on('input', onPitchOffsetChange);
 | 
			
		||||
        $('#rvc_protect').on('input', onProtectChange);
 | 
			
		||||
        $("#rvc_apply").on("click", onApplyClick);
 | 
			
		||||
        
 | 
			
		||||
    }
 | 
			
		||||
    addExtensionControls(); // No init dependencies
 | 
			
		||||
    loadSettings(); // Depends on Extension Controls
 | 
			
		||||
 | 
			
		||||
    console.log(getContext());
 | 
			
		||||
    console.log(getContext().name2);
 | 
			
		||||
})
 | 
			
		||||
							
								
								
									
										11
									
								
								public/scripts/extensions/rvc/manifest.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								public/scripts/extensions/rvc/manifest.json
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,11 @@
 | 
			
		||||
{
 | 
			
		||||
    "display_name": "RVC",
 | 
			
		||||
    "loading_order": 13,
 | 
			
		||||
    "requires": ["rvc"],
 | 
			
		||||
    "optional": [],
 | 
			
		||||
    "js": "index.js",
 | 
			
		||||
    "css": "style.css",
 | 
			
		||||
    "author": "Keij#6799",
 | 
			
		||||
    "version": "0.1.0",
 | 
			
		||||
    "homePage": "https://github.com/SillyTavern/SillyTavern"
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										3
									
								
								public/scripts/extensions/rvc/style.css
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								public/scripts/extensions/rvc/style.css
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
			
		||||
.speech-toggle {
 | 
			
		||||
    display: flex;
 | 
			
		||||
}
 | 
			
		||||
@@ -8,6 +8,7 @@ import { CoquiTtsProvider } from './coquitts.js'
 | 
			
		||||
import { SystemTtsProvider } from './system.js'
 | 
			
		||||
import { NovelTtsProvider } from './novel.js'
 | 
			
		||||
import { power_user } from '../../power-user.js'
 | 
			
		||||
import { rvcVoiceConversion } from "../rvc/index.js"
 | 
			
		||||
 | 
			
		||||
const UPDATE_INTERVAL = 1000
 | 
			
		||||
 | 
			
		||||
@@ -399,8 +400,13 @@ function saveLastValues() {
 | 
			
		||||
    )
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function tts(text, voiceId) {
 | 
			
		||||
    const response = await ttsProvider.generateTts(text, voiceId)
 | 
			
		||||
async function tts(text, voiceId, char) {
 | 
			
		||||
    let response = await ttsProvider.generateTts(text, voiceId)
 | 
			
		||||
 | 
			
		||||
    // RVC injection
 | 
			
		||||
    if (extension_settings.rvc.enabled)
 | 
			
		||||
        response = await rvcVoiceConversion(response, char)
 | 
			
		||||
 | 
			
		||||
    addAudioJob(response)
 | 
			
		||||
    completeTtsJob()
 | 
			
		||||
}
 | 
			
		||||
@@ -450,7 +456,7 @@ async function processTtsQueue() {
 | 
			
		||||
            toastr.error(`Specified voice for ${char} was not found. Check the TTS extension settings.`)
 | 
			
		||||
            throw `Unable to attain voiceId for ${char}`
 | 
			
		||||
        }
 | 
			
		||||
        tts(text, voiceId)
 | 
			
		||||
        tts(text, voiceId, char)
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
        console.error(error)
 | 
			
		||||
        currentTtsJob = null
 | 
			
		||||
@@ -567,6 +573,7 @@ function onEnableClick() {
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
function onAutoGenerationClick() {
 | 
			
		||||
    extension_settings.tts.auto_generation = $('#tts_auto_generation').prop('checked');
 | 
			
		||||
    saveSettingsDebounced()
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user