renderer_vulkan: Improve task scheduler synchronization
* Use multiple semaphores for swapchain sync and improve the Submit API
This commit is contained in:
@ -1004,7 +1004,10 @@ void RendererVulkan::SwapBuffers() {
|
||||
swapchain.Create(layout.width, layout.height, false);
|
||||
}
|
||||
|
||||
swapchain.AcquireNextImage();
|
||||
// Calling Submit will change the slot so get the required semaphores now
|
||||
const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore();
|
||||
const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore();
|
||||
swapchain.AcquireNextImage(image_acquired);
|
||||
|
||||
const vk::Viewport viewport = {
|
||||
.x = 0.0f,
|
||||
@ -1032,11 +1035,11 @@ void RendererVulkan::SwapBuffers() {
|
||||
DrawScreens(layout, false);
|
||||
|
||||
// Flush all buffers to make the data visible to the GPU before submitting
|
||||
vertex_buffer.Flush();
|
||||
rasterizer->FlushBuffers();
|
||||
vertex_buffer.Flush();
|
||||
|
||||
scheduler.Submit(false, true, swapchain.GetAvailableSemaphore(), swapchain.GetPresentSemaphore());
|
||||
swapchain.Present();
|
||||
scheduler.Submit(SubmitMode::SwapchainSynced);
|
||||
swapchain.Present(present_ready);
|
||||
|
||||
// Inform texture runtime about the switch
|
||||
runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
|
||||
|
@ -50,7 +50,7 @@ Instance::Instance(Frontend::EmuWindow& window) {
|
||||
|
||||
// TODO: GPU select dialog
|
||||
auto physical_devices = instance.enumeratePhysicalDevices();
|
||||
physical_device = physical_devices[0];
|
||||
physical_device = physical_devices[1];
|
||||
device_properties = physical_device.getProperties();
|
||||
|
||||
CreateDevice();
|
||||
|
@ -172,8 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
}
|
||||
|
||||
RasterizerVulkan::~RasterizerVulkan() {
|
||||
// Submit any remaining work
|
||||
scheduler.Submit(true, false);
|
||||
scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
|
||||
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vk::Device device = instance.GetDevice();
|
||||
|
@ -21,8 +21,6 @@ Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache
|
||||
|
||||
Swapchain::~Swapchain() {
|
||||
vk::Device device = instance.GetDevice();
|
||||
device.destroySemaphore(render_finished);
|
||||
device.destroySemaphore(image_available);
|
||||
device.destroySwapchainKHR(swapchain);
|
||||
|
||||
for (auto& image : swapchain_images) {
|
||||
@ -72,15 +70,6 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
device.destroySwapchainKHR(old_swapchain);
|
||||
}
|
||||
|
||||
// Create sync objects if not already created
|
||||
if (!image_available) {
|
||||
image_available = device.createSemaphore({});
|
||||
}
|
||||
|
||||
if (!render_finished) {
|
||||
render_finished = device.createSemaphore({});
|
||||
}
|
||||
|
||||
vk::RenderPass present_renderpass = renderpass_cache.GetPresentRenderpass();
|
||||
auto images = device.getSwapchainImagesKHR(swapchain);
|
||||
|
||||
@ -132,10 +121,10 @@ void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
|
||||
// Wait for maximum of 1 second
|
||||
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
|
||||
|
||||
void Swapchain::AcquireNextImage() {
|
||||
void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, image_available, VK_NULL_HANDLE,
|
||||
¤t_image);
|
||||
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
|
||||
VK_NULL_HANDLE, ¤t_image);
|
||||
switch (result) {
|
||||
case vk::Result::eSuccess:
|
||||
break;
|
||||
@ -151,10 +140,10 @@ void Swapchain::AcquireNextImage() {
|
||||
}
|
||||
}
|
||||
|
||||
void Swapchain::Present() {
|
||||
void Swapchain::Present(vk::Semaphore wait_for_present) {
|
||||
const vk::PresentInfoKHR present_info = {
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &render_finished,
|
||||
.pWaitSemaphores = &wait_for_present,
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = ¤t_image
|
||||
|
@ -11,7 +11,6 @@
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
class RenderpassCache;
|
||||
|
||||
class Swapchain {
|
||||
@ -23,10 +22,10 @@ public:
|
||||
void Create(u32 width, u32 height, bool vsync_enabled);
|
||||
|
||||
/// Acquires the next image in the swapchain.
|
||||
void AcquireNextImage();
|
||||
void AcquireNextImage(vk::Semaphore signal_acquired);
|
||||
|
||||
/// Presents the current image and move to the next one
|
||||
void Present();
|
||||
void Present(vk::Semaphore wait_for_present);
|
||||
|
||||
/// Returns current swapchain state
|
||||
vk::Extent2D GetExtent() const {
|
||||
@ -53,16 +52,6 @@ public:
|
||||
return swapchain;
|
||||
}
|
||||
|
||||
/// Returns the semaphore that will be signaled when vkAcquireNextImageKHR completes
|
||||
vk::Semaphore GetAvailableSemaphore() const {
|
||||
return image_available;
|
||||
}
|
||||
|
||||
/// Returns the semaphore that will signal when the current image will be presented
|
||||
vk::Semaphore GetPresentSemaphore() const {
|
||||
return render_finished;
|
||||
}
|
||||
|
||||
/// Returns true when the swapchain should be recreated
|
||||
bool NeedsRecreation() const {
|
||||
return is_suboptimal || is_outdated;
|
||||
@ -92,8 +81,6 @@ private:
|
||||
|
||||
// Swapchain state
|
||||
std::vector<Image> swapchain_images;
|
||||
vk::Semaphore image_available{};
|
||||
vk::Semaphore render_finished{};
|
||||
u32 current_image = 0;
|
||||
u32 current_frame = 0;
|
||||
bool vsync_enabled = false;
|
||||
|
@ -56,6 +56,8 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} {
|
||||
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
|
||||
for (std::size_t i = 0; i < commands.size(); i++) {
|
||||
commands[i] = ExecutionSlot{
|
||||
.image_acquired = device.createSemaphore({}),
|
||||
.present_ready = device.createSemaphore({}),
|
||||
.fence = device.createFence({}),
|
||||
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
|
||||
.render_command_buffer = command_buffers[2 * i],
|
||||
@ -83,6 +85,8 @@ TaskScheduler::~TaskScheduler() {
|
||||
|
||||
for (const auto& command : commands) {
|
||||
device.destroyFence(command.fence);
|
||||
device.destroySemaphore(command.image_acquired);
|
||||
device.destroySemaphore(command.present_ready);
|
||||
device.destroyDescriptorPool(command.descriptor_pool);
|
||||
}
|
||||
|
||||
@ -134,8 +138,7 @@ void TaskScheduler::WaitFence(u32 counter) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void TaskScheduler::Submit(bool wait_completion, bool begin_next,
|
||||
vk::Semaphore wait_semaphore, vk::Semaphore signal_semaphore) {
|
||||
void TaskScheduler::Submit(SubmitMode mode) {
|
||||
const auto& command = commands[current_command];
|
||||
command.render_command_buffer.end();
|
||||
if (command.use_upload_buffer) {
|
||||
@ -151,14 +154,15 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
|
||||
|
||||
command_buffers[command_buffer_count++] = command.render_command_buffer;
|
||||
|
||||
const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
const u32 signal_semaphore_count = signal_semaphore ? 2u : 1u;
|
||||
const std::array signal_values{command.fence_counter, 0ul};
|
||||
const std::array signal_semaphores{timeline, signal_semaphore};
|
||||
|
||||
const u32 wait_semaphore_count = wait_semaphore ? 2u : 1u;
|
||||
const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
|
||||
const std::array wait_values{command.fence_counter - 1, 1ul};
|
||||
const std::array wait_semaphores{timeline, wait_semaphore};
|
||||
const std::array wait_semaphores{timeline, command.image_acquired};
|
||||
|
||||
const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
|
||||
const std::array signal_values{command.fence_counter, 0ul};
|
||||
const std::array signal_semaphores{timeline, command.present_ready};
|
||||
|
||||
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
|
||||
.waitSemaphoreValueCount = wait_semaphore_count,
|
||||
@ -187,19 +191,19 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
|
||||
queue.submit(submit_info);
|
||||
|
||||
} else {
|
||||
const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
|
||||
const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
|
||||
const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
|
||||
const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
|
||||
const vk::PipelineStageFlags wait_stage_masks =
|
||||
vk::PipelineStageFlagBits::eColorAttachmentOutput;
|
||||
|
||||
const vk::SubmitInfo submit_info = {
|
||||
.waitSemaphoreCount = wait_semaphore_count,
|
||||
.pWaitSemaphores = &wait_semaphore,
|
||||
.pWaitSemaphores = &command.image_acquired,
|
||||
.pWaitDstStageMask = &wait_stage_masks,
|
||||
.commandBufferCount = command_buffer_count,
|
||||
.pCommandBuffers = command_buffers.data(),
|
||||
.signalSemaphoreCount = signal_semaphore_count,
|
||||
.pSignalSemaphores = &signal_semaphore,
|
||||
.pSignalSemaphores = &command.present_ready,
|
||||
};
|
||||
|
||||
vk::Queue queue = instance.GetGraphicsQueue();
|
||||
@ -207,16 +211,25 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next,
|
||||
}
|
||||
|
||||
// Block host until the GPU catches up
|
||||
if (wait_completion) {
|
||||
if (True(mode & SubmitMode::Flush)) {
|
||||
Synchronize(current_command);
|
||||
}
|
||||
|
||||
// Switch to next cmdbuffer.
|
||||
if (begin_next) {
|
||||
if (False(mode & SubmitMode::Shutdown)) {
|
||||
SwitchSlot();
|
||||
}
|
||||
}
|
||||
|
||||
u64 TaskScheduler::GetFenceCounter() const {
|
||||
vk::Device device = instance.GetDevice();
|
||||
if (instance.IsTimelineSemaphoreSupported()) {
|
||||
return device.getSemaphoreCounterValue(timeline);
|
||||
}
|
||||
|
||||
return completed_fence_counter;
|
||||
}
|
||||
|
||||
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
|
||||
auto& command = commands[current_command];
|
||||
if (!command.use_upload_buffer) {
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include "common/common_types.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
@ -17,6 +18,14 @@ constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
|
||||
class Buffer;
|
||||
class Instance;
|
||||
|
||||
enum class SubmitMode : u8 {
|
||||
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
|
||||
Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
|
||||
Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
|
||||
};
|
||||
|
||||
DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
|
||||
|
||||
class TaskScheduler {
|
||||
public:
|
||||
TaskScheduler(const Instance& instance);
|
||||
@ -29,9 +38,10 @@ public:
|
||||
void WaitFence(u32 counter);
|
||||
|
||||
/// Submits the current command to the graphics queue
|
||||
void Submit(bool wait_completion = false, bool begin_next = true,
|
||||
vk::Semaphore wait = VK_NULL_HANDLE,
|
||||
vk::Semaphore signal = VK_NULL_HANDLE);
|
||||
void Submit(SubmitMode mode);
|
||||
|
||||
/// Returns the last completed fence counter
|
||||
u64 GetFenceCounter() const;
|
||||
|
||||
/// Returns the command buffer used for early upload operations.
|
||||
vk::CommandBuffer GetUploadCommandBuffer();
|
||||
@ -51,9 +61,12 @@ public:
|
||||
return current_command;
|
||||
}
|
||||
|
||||
/// Returns the last completed fence counter
|
||||
u64 GetFenceCounter() const {
|
||||
return completed_fence_counter;
|
||||
vk::Semaphore GetImageAcquiredSemaphore() const {
|
||||
return commands[current_command].image_acquired;
|
||||
}
|
||||
|
||||
vk::Semaphore GetPresentReadySemaphore() const {
|
||||
return commands[current_command].present_ready;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -68,15 +81,17 @@ private:
|
||||
struct ExecutionSlot {
|
||||
bool use_upload_buffer = false;
|
||||
u64 fence_counter = 0;
|
||||
vk::Fence fence{};
|
||||
vk::Semaphore image_acquired;
|
||||
vk::Semaphore present_ready;
|
||||
vk::Fence fence;
|
||||
vk::DescriptorPool descriptor_pool;
|
||||
vk::CommandBuffer render_command_buffer{};
|
||||
vk::CommandBuffer upload_command_buffer{};
|
||||
vk::CommandBuffer render_command_buffer;
|
||||
vk::CommandBuffer upload_command_buffer;
|
||||
};
|
||||
|
||||
vk::CommandPool command_pool{};
|
||||
vk::Semaphore timeline{};
|
||||
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands;
|
||||
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
|
||||
u32 current_command = 0;
|
||||
};
|
||||
|
||||
|
@ -593,7 +593,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
staging.buffer, region_count, copy_regions.data());
|
||||
|
||||
scheduler.Submit(true);
|
||||
scheduler.Submit(SubmitMode::Flush);
|
||||
}
|
||||
|
||||
// Lock this data until the next scheduler switch
|
||||
|
Reference in New Issue
Block a user