renderer_vulkan: Improve task scheduler synchronization

* Use multiple semaphores for swapchain sync and improve the Submit API
This commit is contained in:
GPUCode
2022-09-24 14:26:59 +03:00
parent 634e6427a8
commit 66158841cb
8 changed files with 69 additions and 63 deletions

View File

@ -1004,7 +1004,10 @@ void RendererVulkan::SwapBuffers() {
swapchain.Create(layout.width, layout.height, false);
}
swapchain.AcquireNextImage();
// Calling Submit will change the slot so get the required semaphores now
const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore();
const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore();
swapchain.AcquireNextImage(image_acquired);
const vk::Viewport viewport = {
.x = 0.0f,
@ -1032,11 +1035,11 @@ void RendererVulkan::SwapBuffers() {
DrawScreens(layout, false);
// Flush all buffers to make the data visible to the GPU before submitting
vertex_buffer.Flush();
rasterizer->FlushBuffers();
vertex_buffer.Flush();
scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore());
swapchain.Present();
scheduler.Submit(SubmitMode::SwapchainSynced);
swapchain.Present(present_ready);
// Inform texture runtime about the switch
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());

View File

@ -50,7 +50,7 @@ Instance::Instance(Frontend::EmuWindow& window) {
// TODO: GPU select dialog
auto physical_devices = instance.enumeratePhysicalDevices();
physical_device = physical_devices[0];
physical_device = physical_devices[1];
device_properties = physical_device.getProperties();
CreateDevice();

View File

@ -172,8 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
}
RasterizerVulkan::~RasterizerVulkan() {
// Submit any remaining work
scheduler.Submit(true, false);
scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
VmaAllocator allocator = instance.GetAllocator();
vk::Device device = instance.GetDevice();

View File

@ -21,8 +21,6 @@ Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache
Swapchain::~Swapchain() {
vk::Device device = instance.GetDevice();
device.destroySemaphore(render_finished);
device.destroySemaphore(image_available);
device.destroySwapchainKHR(swapchain);
for (auto& image : swapchain_images) {
@ -72,15 +70,6 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
device.destroySwapchainKHR(old_swapchain);
}
// Create sync objects if not already created
if (!image_available) {
image_available = device.createSemaphore({});
}
if (!render_finished) {
render_finished = device.createSemaphore({});
}
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
auto images = device.getSwapchainImagesKHR(swapchain);
@ -132,10 +121,10 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
// Wait for maximum of 1 second
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
void Swapchain::AcquireNextImage() {
void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
vk::Device device = instance.GetDevice();
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE,
&current_image);
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
VK_NULL_HANDLE, &current_image);
switch (result) {
case vk::Result::eSuccess:
break;
@ -151,10 +140,10 @@ void Swapchain::AcquireNextImage() {
}
}
void Swapchain::Present() {
void Swapchain::Present(vk::Semaphore wait_for_present) {
const vk::PresentInfoKHR present_info = {
.waitSemaphoreCount = 1,
.pWaitSemaphores = &render_finished,
.pWaitSemaphores = &wait_for_present,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &current_image

View File

@ -11,7 +11,6 @@
namespace Vulkan {
class Instance;
class TaskScheduler;
class RenderpassCache;
class Swapchain {
@ -23,10 +22,10 @@ public:
void Create(u32 width, u32 height, bool vsync_enabled);
/// Acquires the next image in the swapchain.
void AcquireNextImage();
void AcquireNextImage(vk::Semaphore signal_acquired);
/// Presents the current image and move to the next one
void Present();
void Present(vk::Semaphore wait_for_present);
/// Returns current swapchain state
vk::Extent2D GetExtent() const {
@ -53,16 +52,6 @@ public:
return swapchain;
}
/// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes
vk::Semaphore GetAvailableSemaphore() const {
return image_available;
}
/// Returns the semaphore that will signal when the current image will be presented
vk::Semaphore GetPresentSemaphore() const {
return render_finished;
}
/// Returns true when the swapchain should be recreated
bool NeedsRecreation() const {
return is_suboptimal || is_outdated;
@ -92,8 +81,6 @@ private:
// Swapchain state
std::vector<Image> swapchain_images;
vk::Semaphore image_available{};
vk::Semaphore render_finished{};
u32 current_image = 0;
u32 current_frame = 0;
bool vsync_enabled = false;

View File

@ -56,6 +56,8 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
for (std::size_t i = 0; i < commands.size(); i++) {
commands[i] = ExecutionSlot{
.image_acquired = device.createSemaphore({}),
.present_ready = device.createSemaphore({}),
.fence = device.createFence({}),
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
.render_command_buffer = command_buffers[2 * i],
@ -83,6 +85,8 @@ TaskScheduler::~TaskScheduler() {
for (const auto& command : commands) {
device.destroyFence(command.fence);
device.destroySemaphore(command.image_acquired);
device.destroySemaphore(command.present_ready);
device.destroyDescriptorPool(command.descriptor_pool);
}
@ -134,8 +138,7 @@ void TaskScheduler::WaitFence(u32 counter) {
UNREACHABLE();
}
void TaskScheduler::Submit(bool wait_completion, bool begin_next,
vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) {
void TaskScheduler::Submit(SubmitMode mode) {
const auto& command = commands[current_command];
command.render_command_buffer.end();
if (command.use_upload_buffer) {
@ -151,14 +154,15 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
command_buffers[command_buffer_count++] = command.render_command_buffer;
const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
if (instance.IsTimelineSemaphoreSupported()) {
const u32 signal_semaphore_count = signal_semaphore ? 2u : 1u;
const std::array signal_values{command.fence_counter, 0ul};
const std::array signal_semaphores{timeline, signal_semaphore};
const u32 wait_semaphore_count = wait_semaphore ? 2u : 1u;
const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array wait_values{command.fence_counter - 1, 1ul};
const std::array wait_semaphores{timeline, wait_semaphore};
const std::array wait_semaphores{timeline, command.image_acquired};
const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array signal_values{command.fence_counter, 0ul};
const std::array signal_semaphores{timeline, command.present_ready};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = wait_semaphore_count,
@ -187,19 +191,19 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
queue.submit(submit_info);
} else {
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
const vk::PipelineStageFlags wait_stage_masks =
vk::PipelineStageFlagBits::eColorAttachmentOutput;
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = &wait_semaphore,
.pWaitSemaphores = &command.image_acquired,
.pWaitDstStageMask = &wait_stage_masks,
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = &signal_semaphore,
.pSignalSemaphores = &command.present_ready,
};
vk::Queue queue = instance.GetGraphicsQueue();
@ -207,16 +211,25 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
}
// Block host until the GPU catches up
if (wait_completion) {
if (True(mode & SubmitMode::Flush)) {
Synchronize(current_command);
}
// Switch to next cmdbuffer.
if (begin_next) {
if (False(mode & SubmitMode::Shutdown)) {
SwitchSlot();
}
}
u64 TaskScheduler::GetFenceCounter() const {
vk::Device device = instance.GetDevice();
if (instance.IsTimelineSemaphoreSupported()) {
return device.getSemaphoreCounterValue(timeline);
}
return completed_fence_counter;
}
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
auto& command = commands[current_command];
if (!command.use_upload_buffer) {

View File

@ -8,6 +8,7 @@
#include <array>
#include <functional>
#include "common/common_types.h"
#include "common/common_funcs.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
@ -17,6 +18,14 @@ constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
class Buffer;
class Instance;
enum class SubmitMode : u8 {
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
};
DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
class TaskScheduler {
public:
TaskScheduler(const Instance& instance);
@ -29,9 +38,10 @@ public:
void WaitFence(u32 counter);
/// Submits the current command to the graphics queue
void Submit(bool wait_completion = false, bool begin_next = true,
vk::Semaphore wait = VK_NULL_HANDLE,
vk::Semaphore signal = VK_NULL_HANDLE);
void Submit(SubmitMode mode);
/// Returns the last completed fence counter
u64 GetFenceCounter() const;
/// Returns the command buffer used for early upload operations.
vk::CommandBuffer GetUploadCommandBuffer();
@ -51,9 +61,12 @@ public:
return current_command;
}
/// Returns the last completed fence counter
u64 GetFenceCounter() const {
return completed_fence_counter;
vk::Semaphore GetImageAcquiredSemaphore() const {
return commands[current_command].image_acquired;
}
vk::Semaphore GetPresentReadySemaphore() const {
return commands[current_command].present_ready;
}
private:
@ -68,15 +81,17 @@ private:
struct ExecutionSlot {
bool use_upload_buffer = false;
u64 fence_counter = 0;
vk::Fence fence{};
vk::Semaphore image_acquired;
vk::Semaphore present_ready;
vk::Fence fence;
vk::DescriptorPool descriptor_pool;
vk::CommandBuffer render_command_buffer{};
vk::CommandBuffer upload_command_buffer{};
vk::CommandBuffer render_command_buffer;
vk::CommandBuffer upload_command_buffer;
};
vk::CommandPool command_pool{};
vk::Semaphore timeline{};
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands;
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
u32 current_command = 0;
};

View File

@ -593,7 +593,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, region_count, copy_regions.data());
scheduler.Submit(true);
scheduler.Submit(SubmitMode::Flush);
}
// Lock this data until the next scheduler switch