renderer_vulkan: Submit present frames from the EmuThread

* This commit also reworks scheduler synchronization somewhat to be more reliable. Flush is given an atomic_bool is signal when the submit is done
This commit is contained in:
GPUCode
2023-03-04 22:22:12 +02:00
parent 32cb44d2b9
commit ad4339464a
9 changed files with 70 additions and 73 deletions

View File

@ -217,9 +217,8 @@ void RendererVulkan::RenderToMailbox(const Layout::FramebufferLayout& layout,
DrawScreens(frame, layout, flipped); DrawScreens(frame, layout, flipped);
scheduler.Flush(frame->render_ready); scheduler.Flush(frame->render_ready, nullptr, &frame->is_submitted);
scheduler.Record([&mailbox, frame](vk::CommandBuffer) { mailbox->Present(frame); }); mailbox->Present(frame);
scheduler.DispatchWork();
} }
void RendererVulkan::BeginRendering(Frame* frame) { void RendererVulkan::BeginRendering(Frame* frame) {

View File

@ -188,7 +188,8 @@ void DescriptorManager::BuildLayouts() {
pipeline_layout = device.createPipelineLayout(layout_info); pipeline_layout = device.createPipelineLayout(layout_info);
} }
std::vector<vk::DescriptorSet> DescriptorManager::AllocateSets(vk::DescriptorSetLayout layout, u32 num_sets) { std::vector<vk::DescriptorSet> DescriptorManager::AllocateSets(vk::DescriptorSetLayout layout,
u32 num_sets) {
static std::array<vk::DescriptorSetLayout, MAX_BATCH_SIZE> layouts; static std::array<vk::DescriptorSetLayout, MAX_BATCH_SIZE> layouts;
layouts.fill(layout); layouts.fill(layout);

View File

@ -44,12 +44,12 @@ void RenderpassCache::ClearFramebuffers() {
} }
void RenderpassCache::BeginRendering(Surface* const color, Surface* const depth_stencil, void RenderpassCache::BeginRendering(Surface* const color, Surface* const depth_stencil,
vk::Rect2D render_area, bool do_clear, vk::ClearValue clear) { vk::Rect2D render_area, bool do_clear, vk::ClearValue clear) {
return BeginRendering(Framebuffer{color, depth_stencil, render_area}, do_clear, clear); return BeginRendering(Framebuffer{color, depth_stencil, render_area}, do_clear, clear);
} }
void RenderpassCache::BeginRendering(const Framebuffer& framebuffer, bool do_clear, void RenderpassCache::BeginRendering(const Framebuffer& framebuffer, bool do_clear,
vk::ClearValue clear) { vk::ClearValue clear) {
RenderingInfo new_info = { RenderingInfo new_info = {
.color{ .color{
.aspect = vk::ImageAspectFlagBits::eColor, .aspect = vk::ImageAspectFlagBits::eColor,

View File

@ -55,9 +55,9 @@ public:
/// Begins a new renderpass only when no other renderpass is currently active /// Begins a new renderpass only when no other renderpass is currently active
void BeginRendering(const Framebuffer& framebuffer, bool do_clear = false, void BeginRendering(const Framebuffer& framebuffer, bool do_clear = false,
vk::ClearValue clear = {}); vk::ClearValue clear = {});
void BeginRendering(Surface* const color, Surface* const depth_stencil, vk::Rect2D render_area, void BeginRendering(Surface* const color, Surface* const depth_stencil, vk::Rect2D render_area,
bool do_clear = false, vk::ClearValue clear = {}); bool do_clear = false, vk::ClearValue clear = {});
/// Exits from any currently active renderpass instance /// Exits from any currently active renderpass instance
void EndRendering(); void EndRendering();

View File

@ -42,27 +42,16 @@ Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache
Scheduler::~Scheduler() = default; Scheduler::~Scheduler() = default;
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
SubmitExecution(signal, wait);
}
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
const u64 presubmit_tick = CurrentTick(); const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal, wait); std::atomic_bool submit_done{false};
WaitWorker();
Wait(presubmit_tick);
}
void Scheduler::WaitWorker() { Flush(signal, wait, &submit_done);
if (!use_worker_thread) { if (use_worker_thread) {
return; MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
submit_done.wait(false);
} }
Wait(presubmit_tick);
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
std::unique_lock lock{work_mutex};
wait_cv.wait(lock, [this] { return work_queue.empty(); });
} }
void Scheduler::DispatchWork() { void Scheduler::DispatchWork() {
@ -116,56 +105,60 @@ void Scheduler::AllocateWorkerCommandBuffers() {
current_cmdbuf.begin(begin_info); current_cmdbuf.begin(begin_info);
} }
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait, std::atomic_bool* submit_done) {
const vk::Semaphore handle = master_semaphore.Handle(); const vk::Semaphore handle = master_semaphore.Handle();
const u64 signal_value = master_semaphore.NextTick(); const u64 signal_value = master_semaphore.NextTick();
state = StateFlags::AllDirty; state = StateFlags::AllDirty;
renderpass_cache.EndRendering(); renderpass_cache.EndRendering();
Record( Record([signal, wait, handle, signal_value, submit_done, this](vk::CommandBuffer cmdbuf) {
[signal_semaphore, wait_semaphore, handle, signal_value, this](vk::CommandBuffer cmdbuf) { MICROPROFILE_SCOPE(Vulkan_Submit);
MICROPROFILE_SCOPE(Vulkan_Submit); cmdbuf.end();
cmdbuf.end();
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; const u32 num_signal_semaphores = signal ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)}; const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{handle, signal_semaphore}; const std::array signal_semaphores{handle, signal};
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; const u32 num_wait_semaphores = wait ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)}; const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{handle, wait_semaphore}; const std::array wait_semaphores{handle, wait};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = { static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::PipelineStageFlagBits::eColorAttachmentOutput,
}; };
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = num_wait_semaphores, .waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(), .pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores, .signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(), .pSignalSemaphoreValues = signal_values.data(),
}; };
const vk::SubmitInfo submit_info = { const vk::SubmitInfo submit_info = {
.pNext = &timeline_si, .pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores, .waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(), .pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(), .pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 1u, .commandBufferCount = 1u,
.pCommandBuffers = &cmdbuf, .pCommandBuffers = &cmdbuf,
.signalSemaphoreCount = num_signal_semaphores, .signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(), .pSignalSemaphores = signal_semaphores.data(),
}; };
try { try {
std::scoped_lock lock{queue_mutex}; std::scoped_lock lock{queue_mutex};
instance.GetGraphicsQueue().submit(submit_info); instance.GetGraphicsQueue().submit(submit_info);
} catch (vk::DeviceLostError& err) { } catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE(); UNREACHABLE();
} }
});
if (submit_done) {
*submit_done = true;
submit_done->notify_one();
}
});
if (!use_worker_thread) { if (!use_worker_thread) {
AllocateWorkerCommandBuffers(); AllocateWorkerCommandBuffers();

View File

@ -3,6 +3,7 @@
#pragma once #pragma once
#include <atomic>
#include <condition_variable> #include <condition_variable>
#include <cstddef> #include <cstddef>
#include <memory> #include <memory>
@ -38,15 +39,12 @@ public:
~Scheduler(); ~Scheduler();
/// Sends the current execution context to the GPU. /// Sends the current execution context to the GPU.
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr,
std::atomic_bool* submit_done = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete. /// Sends the current execution context to the GPU and waits for it to complete.
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread. /// Sends currently recorded work to the worker thread.
void DispatchWork(); void DispatchWork();
@ -199,8 +197,6 @@ private:
void AllocateWorkerCommandBuffers(); void AllocateWorkerCommandBuffers();
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
void AcquireNewChunk(); void AcquireNewChunk();
private: private:

View File

@ -41,6 +41,7 @@ PresentMailbox::PresentMailbox(const Instance& instance_, Swapchain& swapchain_,
for (u32 i = 0; i < SWAP_CHAIN_SIZE; i++) { for (u32 i = 0; i < SWAP_CHAIN_SIZE; i++) {
Frame& frame = swap_chain[i]; Frame& frame = swap_chain[i];
frame.index = i;
frame.cmdbuf = command_buffers[i]; frame.cmdbuf = command_buffers[i];
frame.render_ready = device.createSemaphore({}); frame.render_ready = device.createSemaphore({});
frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}); frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled});
@ -162,6 +163,7 @@ Frame* PresentMailbox::GetRenderFrame() {
} }
device.resetFences(frame->present_done); device.resetFences(frame->present_done);
frame->is_submitted = false;
return frame; return frame;
} }
@ -326,6 +328,9 @@ void PresentMailbox::CopyToSwapchain(Frame* frame) {
.pSignalSemaphores = &present_ready, .pSignalSemaphores = &present_ready,
}; };
// Ensure we won't wait on a semaphore that has no way of being signaled
frame->is_submitted.wait(false);
try { try {
std::scoped_lock lock{scheduler.QueueMutex(), frame->fence_mutex}; std::scoped_lock lock{scheduler.QueueMutex(), frame->fence_mutex};
graphics_queue.submit(submit_info, frame->present_done); graphics_queue.submit(submit_info, frame->present_done);

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <atomic>
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
#include <queue> #include <queue>
@ -21,6 +22,7 @@ class RenderpassCache;
struct Frame { struct Frame {
u32 width{}; u32 width{};
u32 height{}; u32 height{};
u32 index{};
VmaAllocation allocation{}; VmaAllocation allocation{};
vk::Framebuffer framebuffer{}; vk::Framebuffer framebuffer{};
vk::Image image{}; vk::Image image{};
@ -29,6 +31,7 @@ struct Frame {
vk::Fence present_done{}; vk::Fence present_done{};
std::mutex fence_mutex{}; std::mutex fence_mutex{};
vk::CommandBuffer cmdbuf{}; vk::CommandBuffer cmdbuf{};
std::atomic_bool is_submitted{false};
}; };
class PresentMailbox final { class PresentMailbox final {

View File

@ -446,7 +446,7 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface,
}; };
renderpass_cache.BeginRendering(color_surface, depth_surface, render_area, true, renderpass_cache.BeginRendering(color_surface, depth_surface, render_area, true,
MakeClearValue(clear.value)); MakeClearValue(clear.value));
renderpass_cache.EndRendering(); renderpass_cache.EndRendering();
scheduler.Record([params, access_flag, pipeline_flags](vk::CommandBuffer cmdbuf) { scheduler.Record([params, access_flag, pipeline_flags](vk::CommandBuffer cmdbuf) {
@ -1074,7 +1074,7 @@ vk::PipelineStageFlags Surface::PipelineStageFlags() const noexcept {
return vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eFragmentShader | return vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eFragmentShader |
(alloc.is_framebuffer ? attachment_flags : vk::PipelineStageFlagBits::eNone) | (alloc.is_framebuffer ? attachment_flags : vk::PipelineStageFlagBits::eNone) |
(alloc.is_storage ? vk::PipelineStageFlagBits::eComputeShader (alloc.is_storage ? vk::PipelineStageFlagBits::eComputeShader
: vk::PipelineStageFlagBits::eNone); : vk::PipelineStageFlagBits::eNone);
} }
vk::ImageView Surface::DepthView() noexcept { vk::ImageView Surface::DepthView() noexcept {