From 6f35a3bf37c85ae16af47ee24f871b0ba28d3353 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 27 Oct 2022 14:22:03 +0300 Subject: [PATCH] renderer_vulkan: Add single-thread record ability to the scheduler * Async is pretty nice but games that do a lot of flushes might have worse performance due to thread synchronization overhead * I haven't noticed any cases of this yet but it doesn't hurt making this a UI option --- src/citra_qt/configuration/config.cpp | 2 ++ .../configuration/configure_graphics.cpp | 2 ++ .../configuration/configure_graphics.ui | 10 ++++++++ src/core/settings.cpp | 1 + src/core/settings.h | 1 + .../renderer_vulkan/vk_scheduler.cpp | 25 +++++++++++++------ src/video_core/renderer_vulkan/vk_scheduler.h | 9 +++++-- 7 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index b031ef03b..5163a16b0 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -485,6 +485,7 @@ void Config::ReadRendererValues() { ReadSetting(QStringLiteral("graphics_api"), static_cast(Settings::GraphicsAPI::OpenGL)) .toUInt()); Settings::values.physical_device = ReadSetting(QStringLiteral("physical_device"), 0).toUInt(); + Settings::values.async_command_recording = ReadSetting(QStringLiteral("async_command_recording"), true).toBool(); Settings::values.use_hw_renderer = ReadSetting(QStringLiteral("use_hw_renderer"), true).toBool(); Settings::values.use_hw_shader = ReadSetting(QStringLiteral("use_hw_shader"), true).toBool(); @@ -1004,6 +1005,7 @@ void Config::SaveRendererValues() { WriteSetting(QStringLiteral("graphics_api"), static_cast(Settings::values.graphics_api), static_cast(Settings::GraphicsAPI::OpenGL)); WriteSetting(QStringLiteral("physical_device"), Settings::values.physical_device, 0); + WriteSetting(QStringLiteral("async_command_recording"), Settings::values.async_command_recording, true); WriteSetting(QStringLiteral("use_hw_renderer"), Settings::values.use_hw_renderer, true); WriteSetting(QStringLiteral("use_hw_shader"), Settings::values.use_hw_shader, true); #ifdef __APPLE__ diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 48abc4a15..00b4ffae2 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -83,6 +83,7 @@ void ConfigureGraphics::SetConfiguration() { ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new); ui->graphics_api_combo->setCurrentIndex(static_cast(Settings::values.graphics_api)); ui->physical_device_combo->setCurrentIndex(static_cast(Settings::values.physical_device)); + ui->toggle_async_recording->setChecked(Settings::values.async_command_recording); } void ConfigureGraphics::ApplyConfiguration() { @@ -96,6 +97,7 @@ void ConfigureGraphics::ApplyConfiguration() { Settings::values.graphics_api = static_cast(ui->graphics_api_combo->currentIndex()); Settings::values.physical_device = static_cast(ui->physical_device_combo->currentIndex()); + Settings::values.async_command_recording = ui->toggle_async_recording->isChecked(); } void ConfigureGraphics::RetranslateUI() { diff --git a/src/citra_qt/configuration/configure_graphics.ui b/src/citra_qt/configuration/configure_graphics.ui index 0dda1f100..81fe84d5e 100644 --- a/src/citra_qt/configuration/configure_graphics.ui +++ b/src/citra_qt/configuration/configure_graphics.ui @@ -171,6 +171,16 @@ Advanced + + + + <html><head/><body><p>Offloads command buffer recording and fragment shader generation to a worker thread. Can improve performance especially on weaker systems. Disable if you notice better performance. If unsure leave it enabled,</p></body></html> + + + Async Command Recording + + + diff --git a/src/core/settings.cpp b/src/core/settings.cpp index a14ec5bdd..2f247305d 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -90,6 +90,7 @@ void LogSettings() { LogSetting("Core_UseCpuJit", values.use_cpu_jit); LogSetting("Core_CPUClockPercentage", values.cpu_clock_percentage); LogSetting("Renderer_GraphicsAPI", GetAPIName(values.graphics_api)); + LogSetting("Renderer_AsyncRecording", values.async_command_recording); LogSetting("Renderer_UseHwRenderer", values.use_hw_renderer); LogSetting("Renderer_UseHwShader", values.use_hw_shader); LogSetting("Renderer_SeparableShader", values.separable_shader); diff --git a/src/core/settings.h b/src/core/settings.h index 93a31575e..933d0cac1 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -169,6 +169,7 @@ struct Values { u16 physical_device; bool renderer_debug; bool dump_command_buffers; + bool async_command_recording; bool use_hw_renderer; bool use_hw_shader; bool separable_shader; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index ee58e46f9..d40c4c48e 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -4,7 +4,7 @@ #include #include #include "common/microprofile.h" -#include "common/thread.h" +#include "core/settings.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" @@ -16,7 +16,7 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co while (command != nullptr) { auto next = command->GetNext(); command->Execute(render_cmdbuf, upload_cmdbuf); - command->~Command(); + std::destroy_at(command); command = next; } submit = false; @@ -26,10 +26,13 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co } Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer) - : instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore} { - AcquireNewChunk(); + : instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore}, + use_worker_thread{Settings::values.async_command_recording} { AllocateWorkerCommandBuffers(); - worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); + if (use_worker_thread) { + AcquireNewChunk(); + worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); + } } Scheduler::~Scheduler() = default; @@ -47,6 +50,10 @@ void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); void Scheduler::WaitWorker() { + if (!use_worker_thread) { + return; + } + MICROPROFILE_SCOPE(Vulkan_WaitForWorker); DispatchWork(); @@ -162,8 +169,12 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa } }); - chunk->MarkSubmit(); - DispatchWork(); + if (!use_worker_thread) { + AllocateWorkerCommandBuffers(); + } else { + chunk->MarkSubmit(); + DispatchWork(); + } } void Scheduler::AcquireNewChunk() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 090c33068..3b2f9acfa 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -52,6 +52,11 @@ public: /// Records the command to the current chunk. template void Record(T&& command) { + if (!use_worker_thread) { + command(render_cmdbuf, upload_cmdbuf); + return; + } + if (chunk->Record(command)) { return; } @@ -144,7 +149,7 @@ private: return false; } Command* const current_last = last; - last = new (data.data() + command_offset) FuncType(std::move(command)); + last = std::construct_at(reinterpret_cast(data.data() + command_offset), std::move(command)); if (current_last) { current_last->SetNext(last); @@ -202,7 +207,7 @@ private: std::condition_variable_any work_cv; std::condition_variable wait_cv; std::jthread worker_thread; - std::jthread prsent_thread; + bool use_worker_thread; }; } // namespace Vulkan