renderer_vulkan: Improve task scheduler synchronization

* Use multiple semaphores for swapchain sync and improve the Submit API
2022-09-24 14:26:59 +03:00
parent 3f9e5a2b42
commit 65400936c7
8 changed files with 69 additions and 63 deletions
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@ -1004,7 +1004,10 @@ void RendererVulkan::SwapBuffers() {
        swapchain.Create(layout.width, layout.height, false);
    }
-    swapchain.AcquireNextImage();
+    // Calling Submit will change the slot so get the required semaphores now
    const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore();
    const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore();
    swapchain.AcquireNextImage(image_acquired);
    const vk::Viewport viewport = {
        .x = 0.0f,
@ -1032,11 +1035,11 @@ void RendererVulkan::SwapBuffers() {
    DrawScreens(layout, false);
    // Flush all buffers to make the data visible to the GPU before submitting
    vertex_buffer.Flush();
    rasterizer->FlushBuffers();
    vertex_buffer.Flush();
-    scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore());
+    scheduler.Submit(SubmitMode::SwapchainSynced);
-    swapchain.Present();
+    swapchain.Present(present_ready);
    // Inform texture runtime about the switch
    runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@ -50,7 +50,7 @@ Instance::Instance(Frontend::EmuWindow& window) {
    // TODO: GPU select dialog
    auto physical_devices = instance.enumeratePhysicalDevices();
-    physical_device = physical_devices[0];
+    physical_device = physical_devices[1];
    device_properties = physical_device.getProperties();
    CreateDevice();
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -172,8 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
 }
 RasterizerVulkan::~RasterizerVulkan() {
-    // Submit any remaining work
+    scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
    scheduler.Submit(true, false);
    VmaAllocator allocator = instance.GetAllocator();
    vk::Device device = instance.GetDevice();
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@ -21,8 +21,6 @@ Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache
 Swapchain::~Swapchain() {
    vk::Device device = instance.GetDevice();
    device.destroySemaphore(render_finished);
    device.destroySemaphore(image_available);
    device.destroySwapchainKHR(swapchain);
    for (auto& image : swapchain_images) {
@ -72,15 +70,6 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
        device.destroySwapchainKHR(old_swapchain);
    }
    // Create sync objects if not already created
    if (!image_available) {
        image_available = device.createSemaphore({});
    }
    if (!render_finished) {
        render_finished = device.createSemaphore({});
    }
    vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
    auto images = device.getSwapchainImagesKHR(swapchain);
@ -132,10 +121,10 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
 // Wait for maximum of 1 second
 constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
-void Swapchain::AcquireNextImage() {
+void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
    vk::Device device = instance.GetDevice();
-    vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE,
+    vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
-                                                   &current_image);
+                                                   VK_NULL_HANDLE, &current_image);
    switch (result) {
    case vk::Result::eSuccess:
        break;
@ -151,10 +140,10 @@ void Swapchain::AcquireNextImage() {
    }
 }
-void Swapchain::Present() {
+void Swapchain::Present(vk::Semaphore wait_for_present) {
    const vk::PresentInfoKHR present_info = {
        .waitSemaphoreCount = 1,
-        .pWaitSemaphores = &render_finished,
+        .pWaitSemaphores = &wait_for_present,
        .swapchainCount = 1,
        .pSwapchains = &swapchain,
        .pImageIndices = &current_image
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@ -11,7 +11,6 @@
 namespace Vulkan {
 class Instance;
 class TaskScheduler;
 class RenderpassCache;
 class Swapchain {
@ -23,10 +22,10 @@ public:
    void Create(u32 width, u32 height, bool vsync_enabled);
    /// Acquires the next image in the swapchain.
-    void AcquireNextImage();
+    void AcquireNextImage(vk::Semaphore signal_acquired);
    /// Presents the current image and move to the next one
-    void Present();
+    void Present(vk::Semaphore wait_for_present);
    /// Returns current swapchain state
    vk::Extent2D GetExtent() const {
@ -53,16 +52,6 @@ public:
        return swapchain;
    }
    /// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes
    vk::Semaphore GetAvailableSemaphore() const {
        return image_available;
    }
    /// Returns the semaphore that will signal when the current image will be presented
    vk::Semaphore GetPresentSemaphore() const {
        return render_finished;
    }
    /// Returns true when the swapchain should be recreated
    bool NeedsRecreation() const {
        return is_suboptimal || is_outdated;
@ -92,8 +81,6 @@ private:
    // Swapchain state
    std::vector<Image> swapchain_images;
    vk::Semaphore image_available{};
    vk::Semaphore render_finished{};
    u32 current_image = 0;
    u32 current_frame = 0;
    bool vsync_enabled = false;
--- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp
@ -56,6 +56,8 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
    const auto command_buffers = device.allocateCommandBuffers(buffer_info);
    for (std::size_t i = 0; i < commands.size(); i++) {
        commands[i] = ExecutionSlot{
            .image_acquired = device.createSemaphore({}),
            .present_ready = device.createSemaphore({}),
            .fence = device.createFence({}),
            .descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
            .render_command_buffer = command_buffers[2 * i],
@ -83,6 +85,8 @@ TaskScheduler::~TaskScheduler() {
    for (const auto& command : commands) {
        device.destroyFence(command.fence);
        device.destroySemaphore(command.image_acquired);
        device.destroySemaphore(command.present_ready);
        device.destroyDescriptorPool(command.descriptor_pool);
    }
@ -134,8 +138,7 @@ void TaskScheduler::WaitFence(u32 counter) {
    UNREACHABLE();
 }
-void TaskScheduler::Submit(bool wait_completion, bool begin_next,
+void TaskScheduler::Submit(SubmitMode mode) {
                           vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) {
    const auto& command = commands[current_command];
    command.render_command_buffer.end();
    if (command.use_upload_buffer) {
@ -151,14 +154,15 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
    command_buffers[command_buffer_count++] = command.render_command_buffer;
    const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
    if (instance.IsTimelineSemaphoreSupported()) {
-        const u32 signal_semaphore_count = signal_semaphore ? 2u : 1u;
+        const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
        const std::array signal_values{command.fence_counter, 0ul};
        const std::array signal_semaphores{timeline, signal_semaphore};
        const u32 wait_semaphore_count = wait_semaphore ? 2u : 1u;
        const std::array wait_values{command.fence_counter - 1, 1ul};
-        const std::array wait_semaphores{timeline, wait_semaphore};
+        const std::array wait_semaphores{timeline, command.image_acquired};
        const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
        const std::array signal_values{command.fence_counter, 0ul};
        const std::array signal_semaphores{timeline, command.present_ready};
        const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
            .waitSemaphoreValueCount = wait_semaphore_count,
@ -187,19 +191,19 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
        queue.submit(submit_info);
    } else {
-        const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
+        const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
-        const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
+        const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
        const vk::PipelineStageFlags wait_stage_masks =
                vk::PipelineStageFlagBits::eColorAttachmentOutput;
        const vk::SubmitInfo submit_info = {
            .waitSemaphoreCount = wait_semaphore_count,
-            .pWaitSemaphores = &wait_semaphore,
+            .pWaitSemaphores = &command.image_acquired,
            .pWaitDstStageMask = &wait_stage_masks,
            .commandBufferCount = command_buffer_count,
            .pCommandBuffers = command_buffers.data(),
            .signalSemaphoreCount = signal_semaphore_count,
-            .pSignalSemaphores = &signal_semaphore,
+            .pSignalSemaphores = &command.present_ready,
        };
        vk::Queue queue = instance.GetGraphicsQueue();
@ -207,16 +211,25 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
    }
    // Block host until the GPU catches up
-    if (wait_completion) {
+    if (True(mode & SubmitMode::Flush)) {
        Synchronize(current_command);
    }
    // Switch to next cmdbuffer.
-    if (begin_next) {
+    if (False(mode & SubmitMode::Shutdown)) {
        SwitchSlot();
    }
 }
 u64 TaskScheduler::GetFenceCounter() const {
    vk::Device device = instance.GetDevice();
    if (instance.IsTimelineSemaphoreSupported()) {
        return device.getSemaphoreCounterValue(timeline);
    }
    return completed_fence_counter;
 }
 vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
    auto& command = commands[current_command];
    if (!command.use_upload_buffer) {
--- a/src/video_core/renderer_vulkan/vk_task_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h
@ -8,6 +8,7 @@
 #include <array>
 #include <functional>
 #include "common/common_types.h"
 #include "common/common_funcs.h"
 #include "video_core/renderer_vulkan/vk_common.h"
 namespace Vulkan {
@ -17,6 +18,14 @@ constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
 class Buffer;
 class Instance;
 enum class SubmitMode : u8 {
    SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
    Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
    Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
 };
 DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
 class TaskScheduler {
 public:
    TaskScheduler(const Instance& instance);
@ -29,9 +38,10 @@ public:
    void WaitFence(u32 counter);
    /// Submits the current command to the graphics queue
-    void Submit(bool wait_completion = false, bool begin_next = true,
+    void Submit(SubmitMode mode);
-                vk::Semaphore wait = VK_NULL_HANDLE,
+
-                vk::Semaphore signal = VK_NULL_HANDLE);
+    /// Returns the last completed fence counter
    u64 GetFenceCounter() const;
    /// Returns the command buffer used for early upload operations.
    vk::CommandBuffer GetUploadCommandBuffer();
@ -51,9 +61,12 @@ public:
        return current_command;
    }
-    /// Returns the last completed fence counter
+    vk::Semaphore GetImageAcquiredSemaphore() const {
-    u64 GetFenceCounter() const {
+        return commands[current_command].image_acquired;
-        return completed_fence_counter;
+    }
    vk::Semaphore GetPresentReadySemaphore() const {
        return commands[current_command].present_ready;
    }
 private:
@ -68,15 +81,17 @@ private:
    struct ExecutionSlot {
        bool use_upload_buffer = false;
        u64 fence_counter = 0;
-        vk::Fence fence{};
+        vk::Semaphore image_acquired;
        vk::Semaphore present_ready;
        vk::Fence fence;
        vk::DescriptorPool descriptor_pool;
-        vk::CommandBuffer render_command_buffer{};
+        vk::CommandBuffer render_command_buffer;
-        vk::CommandBuffer upload_command_buffer{};
+        vk::CommandBuffer upload_command_buffer;
    };
    vk::CommandPool command_pool{};
    vk::Semaphore timeline{};
-    std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands;
+    std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
    u32 current_command = 0;
 };
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
@ -593,7 +593,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
        command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
                                         staging.buffer, region_count, copy_regions.data());
-        scheduler.Submit(true);
+        scheduler.Submit(SubmitMode::Flush);
    }
    // Lock this data until the next scheduler switch