Add extension for RVC postprocessing of TTS audio using ST extras

This commit is contained in:
Tony Ribeiro
2023-08-09 03:30:26 +02:00
parent 9a7654598e
commit 8bea721721
4 changed files with 286 additions and 3 deletions

View File

@@ -0,0 +1,262 @@
/*
TODO:
- try pseudo streaming audio by just sending chunk every X seconds and asking VOSK if it is full text.
*/
import { saveSettingsDebounced } from "../../../script.js";
import { getContext, getApiUrl, modules, extension_settings, ModuleWorkerWrapper, doExtrasFetch } from "../../extensions.js";
export { MODULE_NAME, rvcVoiceConversion};
const MODULE_NAME = 'RVC';
const DEBUG_PREFIX = "<RVC module> "
//let currentModel = null
/*/ Load character model if needed
async function rvcLoadModel(model) {
const url = new URL(getApiUrl());
url.pathname = '/api/voice-conversion/rvc/load-model';
const apiResult = await doExtrasFetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Bypass-Tunnel-Reminder': 'bypass',
},
body: JSON.stringify({ "model_name": model }),
});
if (!apiResult.ok) {
toastr.error("May be a wrong model name in RVC voice map, please check console for details", 'RVC Voice model load Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
const result = await apiResult.json();
console.log("Loaded RVC model:", result.model_loaded);
currentModel = model
}
*/
// Send an audio file to RVC to convert voice
async function rvcVoiceConversion(response, character) {
let apiResult
// Check voice map
if (extension_settings.rvc.voiceMap[character] === undefined) {
toastr.error("No model is assigned to character '"+char+"', check RVC voice map in the extension menu.", 'RVC Voice map error', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error("No RVC model assign in voice map for current character "+char);
}
// Load model if different from currently loaded
//if (currentModel === null | currentModel != extension_settings.rvc.voiceMap[character])
// await rvcLoadModel(extension_settings.rvc.voiceMap[character]);
const audioData = await response.blob()
if (!audioData.type in ['audio/mpeg', 'audio/wav', 'audio/x-wav', 'audio/wave', 'audio/webm']) {
throw `TTS received HTTP response with invalid data format. Expecting audio/mpeg, got ${audioData.type}`
}
console.log("Sending tts audio data to RVC on extras server")
var requestData = new FormData();
requestData.append('AudioFile', audioData, 'record.wav');
requestData.append("json", JSON.stringify({
"modelName": extension_settings.rvc.voiceMap[character],
"pitchOffset": extension_settings.rvc.pitchOffset,
"pitchExtraction": extension_settings.rvc.pitchExtraction,
"indexRate": extension_settings.rvc.indexRate,
"filterRadius": extension_settings.rvc.filterRadius,
//"rmsMixRate": extension_settings.rvc.rmsMixRate,
"protect": extension_settings.rvc.protect
}));
const url = new URL(getApiUrl());
url.pathname = '/api/voice-conversion/rvc/process-audio';
apiResult = await doExtrasFetch(url, {
method: 'POST',
body: requestData,
});
if (!apiResult.ok) {
toastr.error(apiResult.statusText, 'RVC Voice Conversion Failed', { timeOut: 10000, extendedTimeOut: 20000, preventDuplicates: true });
throw new Error(`HTTP ${apiResult.status}: ${await apiResult.text()}`);
}
return apiResult;
}
//#############################//
// Extension UI and Settings //
//#############################//
const defaultSettings = {
enabled: false,
model:"",
pitchOffset:0,
pitchExtraction:"dio",
indexRate:0.88,
filterRadius:3,
//rmsMixRate:1,
protect:0.33,
voicMapText: "",
voiceMap: {}
}
function loadSettings() {
if (Object.keys(extension_settings.rvc).length === 0) {
Object.assign(extension_settings.rvc, defaultSettings)
}
$('#rvc_enabled').prop('checked',extension_settings.rvc.enabled);
$('#rvc_model').val(extension_settings.rvc.model);
$('#rvc_pitch_offset').val(extension_settings.rvc.pitchOffset);
$('#rvc_pitch_offset_value').text(extension_settings.rvc.pitchOffset);
$('#rvc_pitch_extraction').val(extension_settings.rvc.pitchExtraction);
$('#rvc_pitch_extractiont_value').text(extension_settings.rvc.pitchExtraction);
$('#rvc_index_rate').val(extension_settings.rvc.indexRate);
$('#rvc_index_rate_value').text(extension_settings.rvc.indexRate);
$('#rvc_filter_radius').val(extension_settings.rvc.filterRadius);
$("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius);
//$('#rvc_mix_rate').val(extension_settings.rvc.rmsMixRate);
$('#rvc_protect').val(extension_settings.rvc.protect);
$("#rvc_protect_value").text(extension_settings.rvc.protect);
$('#rvc_voice_map').val(extension_settings.rvc.voiceMapText);
}
async function onApplyClick() {
let error = false;
let array = $('#rvc_voice_map').val().split(",");
array = array.map(element => {return element.trim();});
array = array.filter((str) => str !== '');
extension_settings.rvc.voiceMap = {};
for (const text of array) {
if (text.includes("=")) {
const pair = text.split("=")
extension_settings.rvc.voiceMap[pair[0].trim()] = pair[1].trim()
console.debug(DEBUG_PREFIX+"Added mapping", pair[0],"=>", extension_settings.rvc.voiceMap[pair[0]]);
}
else {
$("#rvc_status").text("Voice map is invalid, check console for errors");
$("#rvc_status").css("color", "red");
console.error(DEBUG_PREFIX+"Wrong syntax for message mapping, no '=' found in:", text);
error = true;
}
}
if (!error) {
$("#rvc_status").text("Successfully applied settings");
$("#rvc_status").css("color", "green");
console.debug(DEBUG_PREFIX+"Updated message mapping", extension_settings.rvc.voiceMap);
extension_settings.rvc.voiceMapText = $('#rvc_voice_map').val();
saveSettingsDebounced();
}
}
async function onEnabledClick() {
extension_settings.rvc.enabled = $('#rvc_enabled').is(':checked');
saveSettingsDebounced()
}
async function onPitchExtractionChange() {
extension_settings.rvc.pitchExtraction = $('#rvc_pitch_extraction').val();
saveSettingsDebounced()
}
async function onIndexRateChange() {
extension_settings.rvc.indexRate = Number($('#rvc_index_rate').val());
$("#rvc_index_rate_value").text(extension_settings.rvc.indexRate)
saveSettingsDebounced()
}
async function onFilterRadiusChange() {
extension_settings.rvc.filterRadius = Number($('#rvc_filter_radius').val());
$("#rvc_filter_radius_value").text(extension_settings.rvc.filterRadius)
saveSettingsDebounced()
}
async function onPitchOffsetChange() {
extension_settings.rvc.pitchOffset = Number($('#rvc_pitch_offset').val());
$("#rvc_pitch_offset_value").text(extension_settings.rvc.pitchOffset)
saveSettingsDebounced()
}
async function onProtectChange() {
extension_settings.rvc.protect = Number($('#rvc_protect').val());
$("#rvc_protect_value").text(extension_settings.rvc.protect)
saveSettingsDebounced()
}
$(document).ready(function () {
function addExtensionControls() {
const settingsHtml = `
<div id="rvc_settings">
<div class="inline-drawer">
<div class="inline-drawer-toggle inline-drawer-header">
<b>RVC</b>
<div class="inline-drawer-icon fa-solid fa-circle-chevron-down down"></div>
</div>
<div class="inline-drawer-content">
<div>
<label class="checkbox_label" for="rvc_enabled">
<input type="checkbox" id="rvc_enabled" name="rvc_enabled">
<small>Enabled</small>
</label>
</div>
<div>
<span>Select Pitch Extraction</span> </br>
<select id="rvc_pitch_extraction">
<option value="dio">dio</option>
<option value="pm">pm</option>
<option value="harvest">harvest</option>
<option value="torchcrepe">torchcrepe</option>
<option value="rmvpe">rmvpe</option>
</select>
</div>
<div>
<label for="rvc_index_rate">
Index rate for feature retrieval (<span id="rvc_index_rate_value"></span>)
</label>
<input id="rvc_index_rate" type="range" min="0" max="1" step="0.01" value="0.5" />
<label for="rvc_filter_radius">Filter radius (<span id="rvc_filter_radius_value"></span>)</label>
<input id="rvc_filter_radius" type="range" min="0" max="7" step="1" value="3" />
<label for="rvc_pitch_offset">Pitch offset (<span id="rvc_pitch_offset_value"></span>)</label>
<input id="rvc_pitch_offset" type="range" min="-100" max="100" step="1" value="0" />
<label for="rvc_protect">Protect amount (<span id="rvc_protect_value"></span>)</label>
<input id="rvc_protect" type="range" min="0" max="1" step="0.01" value="0.33" />
<label>Voice Map</label>
<textarea id="rvc_voice_map" type="text" class="text_pole textarea_compact" rows="4"
placeholder="Enter comma separated map of charName:rvcModel. Example: \nAqua:Bella,\nYou:Josh,"></textarea>
<div id="rvc_status">
</div>
<div class="rvc_buttons">
<input id="rvc_apply" class="menu_button" type="submit" value="Apply" />
</div>
</div>
</div>
</div>
</div>
`;
$('#extensions_settings').append(settingsHtml);
$("#rvc_enabled").on("click", onEnabledClick);
$('#rvc_pitch_extraction').on('change', onPitchExtractionChange);
$('#rvc_index_rate').on('input', onIndexRateChange);
$('#rvc_filter_radius').on('input', onFilterRadiusChange);
$('#rvc_pitch_offset').on('input', onPitchOffsetChange);
$('#rvc_protect').on('input', onProtectChange);
$("#rvc_apply").on("click", onApplyClick);
}
addExtensionControls(); // No init dependencies
loadSettings(); // Depends on Extension Controls
console.log(getContext());
console.log(getContext().name2);
})

View File

@@ -0,0 +1,11 @@
{
"display_name": "RVC",
"loading_order": 13,
"requires": ["rvc"],
"optional": [],
"js": "index.js",
"css": "style.css",
"author": "Keij#6799",
"version": "0.1.0",
"homePage": "https://github.com/SillyTavern/SillyTavern"
}

View File

@@ -0,0 +1,3 @@
.speech-toggle {
display: flex;
}

View File

@@ -8,6 +8,7 @@ import { CoquiTtsProvider } from './coquitts.js'
import { SystemTtsProvider } from './system.js' import { SystemTtsProvider } from './system.js'
import { NovelTtsProvider } from './novel.js' import { NovelTtsProvider } from './novel.js'
import { power_user } from '../../power-user.js' import { power_user } from '../../power-user.js'
import { rvcVoiceConversion } from "../rvc/index.js"
const UPDATE_INTERVAL = 1000 const UPDATE_INTERVAL = 1000
@@ -399,8 +400,13 @@ function saveLastValues() {
) )
} }
async function tts(text, voiceId) { async function tts(text, voiceId, char) {
const response = await ttsProvider.generateTts(text, voiceId) let response = await ttsProvider.generateTts(text, voiceId)
// RVC injection
if (extension_settings.rvc.enabled)
response = await rvcVoiceConversion(response, char)
addAudioJob(response) addAudioJob(response)
completeTtsJob() completeTtsJob()
} }
@@ -450,7 +456,7 @@ async function processTtsQueue() {
toastr.error(`Specified voice for ${char} was not found. Check the TTS extension settings.`) toastr.error(`Specified voice for ${char} was not found. Check the TTS extension settings.`)
throw `Unable to attain voiceId for ${char}` throw `Unable to attain voiceId for ${char}`
} }
tts(text, voiceId) tts(text, voiceId, char)
} catch (error) { } catch (error) {
console.error(error) console.error(error)
currentTtsJob = null currentTtsJob = null
@@ -567,6 +573,7 @@ function onEnableClick() {
saveSettingsDebounced() saveSettingsDebounced()
} }
function onAutoGenerationClick() { function onAutoGenerationClick() {
extension_settings.tts.auto_generation = $('#tts_auto_generation').prop('checked'); extension_settings.tts.auto_generation = $('#tts_auto_generation').prop('checked');
saveSettingsDebounced() saveSettingsDebounced()