renderer_vulkan: Improve task scheduler synchronization

* Use multiple semaphores for swapchain sync and improve the Submit API
This commit is contained in:
GPUCode
2022-09-24 14:26:59 +03:00
parent 3f9e5a2b42
commit 65400936c7
8 changed files with 69 additions and 63 deletions

View File

@ -1004,7 +1004,10 @@ void RendererVulkan::SwapBuffers() {
swapchain.Create(layout.width, layout.height, false); swapchain.Create(layout.width, layout.height, false);
} }
swapchain.AcquireNextImage(); // Calling Submit will change the slot so get the required semaphores now
const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore();
const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore();
swapchain.AcquireNextImage(image_acquired);
const vk::Viewport viewport = { const vk::Viewport viewport = {
.x = 0.0f, .x = 0.0f,
@ -1032,11 +1035,11 @@ void RendererVulkan::SwapBuffers() {
DrawScreens(layout, false); DrawScreens(layout, false);
// Flush all buffers to make the data visible to the GPU before submitting // Flush all buffers to make the data visible to the GPU before submitting
vertex_buffer.Flush();
rasterizer->FlushBuffers(); rasterizer->FlushBuffers();
vertex_buffer.Flush();
scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore()); scheduler.Submit(SubmitMode::SwapchainSynced);
swapchain.Present(); swapchain.Present(present_ready);
// Inform texture runtime about the switch // Inform texture runtime about the switch
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex()); runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());

View File

@ -50,7 +50,7 @@ Instance::Instance(Frontend::EmuWindow& window) {
// TODO: GPU select dialog // TODO: GPU select dialog
auto physical_devices = instance.enumeratePhysicalDevices(); auto physical_devices = instance.enumeratePhysicalDevices();
physical_device = physical_devices[0]; physical_device = physical_devices[1];
device_properties = physical_device.getProperties(); device_properties = physical_device.getProperties();
CreateDevice(); CreateDevice();

View File

@ -172,8 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
} }
RasterizerVulkan::~RasterizerVulkan() { RasterizerVulkan::~RasterizerVulkan() {
// Submit any remaining work scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
scheduler.Submit(true, false);
VmaAllocator allocator = instance.GetAllocator(); VmaAllocator allocator = instance.GetAllocator();
vk::Device device = instance.GetDevice(); vk::Device device = instance.GetDevice();

View File

@ -21,8 +21,6 @@ Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache
Swapchain::~Swapchain() { Swapchain::~Swapchain() {
vk::Device device = instance.GetDevice(); vk::Device device = instance.GetDevice();
device.destroySemaphore(render_finished);
device.destroySemaphore(image_available);
device.destroySwapchainKHR(swapchain); device.destroySwapchainKHR(swapchain);
for (auto& image : swapchain_images) { for (auto& image : swapchain_images) {
@ -72,15 +70,6 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
device.destroySwapchainKHR(old_swapchain); device.destroySwapchainKHR(old_swapchain);
} }
// Create sync objects if not already created
if (!image_available) {
image_available = device.createSemaphore({});
}
if (!render_finished) {
render_finished = device.createSemaphore({});
}
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass(); vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
auto images = device.getSwapchainImagesKHR(swapchain); auto images = device.getSwapchainImagesKHR(swapchain);
@ -132,10 +121,10 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
// Wait for maximum of 1 second // Wait for maximum of 1 second
constexpr u64 ACQUIRE_TIMEOUT = 1000000000; constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
void Swapchain::AcquireNextImage() { void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
vk::Device device = instance.GetDevice(); vk::Device device = instance.GetDevice();
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE, vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
&current_image); VK_NULL_HANDLE, &current_image);
switch (result) { switch (result) {
case vk::Result::eSuccess: case vk::Result::eSuccess:
break; break;
@ -151,10 +140,10 @@ void Swapchain::AcquireNextImage() {
} }
} }
void Swapchain::Present() { void Swapchain::Present(vk::Semaphore wait_for_present) {
const vk::PresentInfoKHR present_info = { const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1, .waitSemaphoreCount = 1,
.pWaitSemaphores = &render_finished, .pWaitSemaphores = &wait_for_present,
.swapchainCount = 1, .swapchainCount = 1,
.pSwapchains = &swapchain, .pSwapchains = &swapchain,
.pImageIndices = &current_image .pImageIndices = &current_image

View File

@ -11,7 +11,6 @@
namespace Vulkan { namespace Vulkan {
class Instance; class Instance;
class TaskScheduler;
class RenderpassCache; class RenderpassCache;
class Swapchain { class Swapchain {
@ -23,10 +22,10 @@ public:
void Create(u32 width, u32 height, bool vsync_enabled); void Create(u32 width, u32 height, bool vsync_enabled);
/// Acquires the next image in the swapchain. /// Acquires the next image in the swapchain.
void AcquireNextImage(); void AcquireNextImage(vk::Semaphore signal_acquired);
/// Presents the current image and move to the next one /// Presents the current image and move to the next one
void Present(); void Present(vk::Semaphore wait_for_present);
/// Returns current swapchain state /// Returns current swapchain state
vk::Extent2D GetExtent() const { vk::Extent2D GetExtent() const {
@ -53,16 +52,6 @@ public:
return swapchain; return swapchain;
} }
/// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes
vk::Semaphore GetAvailableSemaphore() const {
return image_available;
}
/// Returns the semaphore that will signal when the current image will be presented
vk::Semaphore GetPresentSemaphore() const {
return render_finished;
}
/// Returns true when the swapchain should be recreated /// Returns true when the swapchain should be recreated
bool NeedsRecreation() const { bool NeedsRecreation() const {
return is_suboptimal || is_outdated; return is_suboptimal || is_outdated;
@ -92,8 +81,6 @@ private:
// Swapchain state // Swapchain state
std::vector<Image> swapchain_images; std::vector<Image> swapchain_images;
vk::Semaphore image_available{};
vk::Semaphore render_finished{};
u32 current_image = 0; u32 current_image = 0;
u32 current_frame = 0; u32 current_frame = 0;
bool vsync_enabled = false; bool vsync_enabled = false;

View File

@ -56,6 +56,8 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
const auto command_buffers = device.allocateCommandBuffers(buffer_info); const auto command_buffers = device.allocateCommandBuffers(buffer_info);
for (std::size_t i = 0; i < commands.size(); i++) { for (std::size_t i = 0; i < commands.size(); i++) {
commands[i] = ExecutionSlot{ commands[i] = ExecutionSlot{
.image_acquired = device.createSemaphore({}),
.present_ready = device.createSemaphore({}),
.fence = device.createFence({}), .fence = device.createFence({}),
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info), .descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
.render_command_buffer = command_buffers[2 * i], .render_command_buffer = command_buffers[2 * i],
@ -83,6 +85,8 @@ TaskScheduler::~TaskScheduler() {
for (const auto& command : commands) { for (const auto& command : commands) {
device.destroyFence(command.fence); device.destroyFence(command.fence);
device.destroySemaphore(command.image_acquired);
device.destroySemaphore(command.present_ready);
device.destroyDescriptorPool(command.descriptor_pool); device.destroyDescriptorPool(command.descriptor_pool);
} }
@ -134,8 +138,7 @@ void TaskScheduler::WaitFence(u32 counter) {
UNREACHABLE(); UNREACHABLE();
} }
void TaskScheduler::Submit(bool wait_completion, bool begin_next, void TaskScheduler::Submit(SubmitMode mode) {
vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) {
const auto& command = commands[current_command]; const auto& command = commands[current_command];
command.render_command_buffer.end(); command.render_command_buffer.end();
if (command.use_upload_buffer) { if (command.use_upload_buffer) {
@ -151,14 +154,15 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
command_buffers[command_buffer_count++] = command.render_command_buffer; command_buffers[command_buffer_count++] = command.render_command_buffer;
const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
if (instance.IsTimelineSemaphoreSupported()) { if (instance.IsTimelineSemaphoreSupported()) {
const u32 signal_semaphore_count = signal_semaphore ? 2u : 1u; const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array signal_values{command.fence_counter, 0ul};
const std::array signal_semaphores{timeline, signal_semaphore};
const u32 wait_semaphore_count = wait_semaphore ? 2u : 1u;
const std::array wait_values{command.fence_counter - 1, 1ul}; const std::array wait_values{command.fence_counter - 1, 1ul};
const std::array wait_semaphores{timeline, wait_semaphore}; const std::array wait_semaphores{timeline, command.image_acquired};
const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array signal_values{command.fence_counter, 0ul};
const std::array signal_semaphores{timeline, command.present_ready};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = wait_semaphore_count, .waitSemaphoreValueCount = wait_semaphore_count,
@ -187,19 +191,19 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
queue.submit(submit_info); queue.submit(submit_info);
} else { } else {
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u; const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u; const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
const vk::PipelineStageFlags wait_stage_masks = const vk::PipelineStageFlags wait_stage_masks =
vk::PipelineStageFlagBits::eColorAttachmentOutput; vk::PipelineStageFlagBits::eColorAttachmentOutput;
const vk::SubmitInfo submit_info = { const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = wait_semaphore_count, .waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = &wait_semaphore, .pWaitSemaphores = &command.image_acquired,
.pWaitDstStageMask = &wait_stage_masks, .pWaitDstStageMask = &wait_stage_masks,
.commandBufferCount = command_buffer_count, .commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(), .pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count, .signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = &signal_semaphore, .pSignalSemaphores = &command.present_ready,
}; };
vk::Queue queue = instance.GetGraphicsQueue(); vk::Queue queue = instance.GetGraphicsQueue();
@ -207,16 +211,25 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
} }
// Block host until the GPU catches up // Block host until the GPU catches up
if (wait_completion) { if (True(mode & SubmitMode::Flush)) {
Synchronize(current_command); Synchronize(current_command);
} }
// Switch to next cmdbuffer. // Switch to next cmdbuffer.
if (begin_next) { if (False(mode & SubmitMode::Shutdown)) {
SwitchSlot(); SwitchSlot();
} }
} }
u64 TaskScheduler::GetFenceCounter() const {
vk::Device device = instance.GetDevice();
if (instance.IsTimelineSemaphoreSupported()) {
return device.getSemaphoreCounterValue(timeline);
}
return completed_fence_counter;
}
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() { vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
auto& command = commands[current_command]; auto& command = commands[current_command];
if (!command.use_upload_buffer) { if (!command.use_upload_buffer) {

View File

@ -8,6 +8,7 @@
#include <array> #include <array>
#include <functional> #include <functional>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/common_funcs.h"
#include "video_core/renderer_vulkan/vk_common.h" #include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan { namespace Vulkan {
@ -17,6 +18,14 @@ constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
class Buffer; class Buffer;
class Instance; class Instance;
enum class SubmitMode : u8 {
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
};
DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
class TaskScheduler { class TaskScheduler {
public: public:
TaskScheduler(const Instance& instance); TaskScheduler(const Instance& instance);
@ -29,9 +38,10 @@ public:
void WaitFence(u32 counter); void WaitFence(u32 counter);
/// Submits the current command to the graphics queue /// Submits the current command to the graphics queue
void Submit(bool wait_completion = false, bool begin_next = true, void Submit(SubmitMode mode);
vk::Semaphore wait = VK_NULL_HANDLE,
vk::Semaphore signal = VK_NULL_HANDLE); /// Returns the last completed fence counter
u64 GetFenceCounter() const;
/// Returns the command buffer used for early upload operations. /// Returns the command buffer used for early upload operations.
vk::CommandBuffer GetUploadCommandBuffer(); vk::CommandBuffer GetUploadCommandBuffer();
@ -51,9 +61,12 @@ public:
return current_command; return current_command;
} }
/// Returns the last completed fence counter vk::Semaphore GetImageAcquiredSemaphore() const {
u64 GetFenceCounter() const { return commands[current_command].image_acquired;
return completed_fence_counter; }
vk::Semaphore GetPresentReadySemaphore() const {
return commands[current_command].present_ready;
} }
private: private:
@ -68,15 +81,17 @@ private:
struct ExecutionSlot { struct ExecutionSlot {
bool use_upload_buffer = false; bool use_upload_buffer = false;
u64 fence_counter = 0; u64 fence_counter = 0;
vk::Fence fence{}; vk::Semaphore image_acquired;
vk::Semaphore present_ready;
vk::Fence fence;
vk::DescriptorPool descriptor_pool; vk::DescriptorPool descriptor_pool;
vk::CommandBuffer render_command_buffer{}; vk::CommandBuffer render_command_buffer;
vk::CommandBuffer upload_command_buffer{}; vk::CommandBuffer upload_command_buffer;
}; };
vk::CommandPool command_pool{}; vk::CommandPool command_pool{};
vk::Semaphore timeline{}; vk::Semaphore timeline{};
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands; std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
u32 current_command = 0; u32 current_command = 0;
}; };

View File

@ -593,7 +593,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal, command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data()); staging.buffer, region_count, copy_regions.data());
scheduler.Submit(true); scheduler.Submit(SubmitMode::Flush);
} }
// Lock this data until the next scheduler switch // Lock this data until the next scheduler switch