diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 6302ac709..638cd3668 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -113,7 +113,8 @@ vk::Format Instance::GetFormatAlternative(vk::Format format) const { bool Instance::CreateDevice() { auto feature_chain = physical_device.getFeatures2(); + vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, + vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>(); // Not having geometry shaders will cause issues with accelerated rendering. const vk::PhysicalDeviceFeatures available = feature_chain.get().features; @@ -146,9 +147,10 @@ bool Instance::CreateDevice() { return false; }; + AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); - AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); // Search queue families for graphics and present queues auto family_properties = physical_device.getQueueFamilyProperties(); @@ -220,6 +222,7 @@ bool Instance::CreateDevice() { } }, feature_chain.get(), + feature_chain.get() }; // Create logical device diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 07753ce62..9d9b84ecf 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -67,12 +67,18 @@ public: return present_queue; } - /// Feature support + /// Returns true when VK_KHR_timeline_semaphore is supported + bool IsTimelineSemaphoreSupported() const { + return timeline_semaphores; + } + + /// Returns true when VK_EXT_extended_dynamic_state is supported bool IsExtendedDynamicStateSupported() const { // TODO: Enable this when the pipeline builder is confirmed functional return false; } + /// Returns true when VK_KHR_push_descriptors is supported bool IsPushDescriptorsSupported() const { return push_descriptors; } @@ -115,6 +121,8 @@ private: vk::Queue graphics_queue; u32 present_queue_family_index = 0; u32 graphics_queue_family_index = 0; + + bool timeline_semaphores = false; bool extended_dynamic_state = false; bool push_descriptors = false; }; diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index ef5b91d67..1cd64b058 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -19,6 +19,19 @@ TaskScheduler::TaskScheduler(const Instance& instance) : instance{instance} { command_pool = device.createCommandPool(command_pool_info); + // If supported, prefer timeline semaphores over binary ones + if (instance.IsTimelineSemaphoreSupported()) { + const vk::StructureChain timeline_info = { + vk::SemaphoreCreateInfo{}, + vk::SemaphoreTypeCreateInfo{ + .semaphoreType = vk::SemaphoreType::eTimeline, + .initialValue = 0 + } + }; + + timeline = device.createSemaphore(timeline_info.get()); + } + constexpr std::array pool_sizes = { vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 1024}, vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 1024}, @@ -64,6 +77,10 @@ TaskScheduler::~TaskScheduler() { vk::Device device = instance.GetDevice(); device.waitIdle(); + if (timeline) { + device.destroySemaphore(timeline); + } + for (const auto& command : commands) { device.destroyFence(command.fence); device.destroyDescriptorPool(command.descriptor_pool); @@ -76,9 +93,27 @@ void TaskScheduler::Synchronize(u32 slot) { const auto& command = commands[slot]; vk::Device device = instance.GetDevice(); - if (command.fence_counter > completed_fence_counter) { - if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Waiting for fences failed!"); + u32 completed_counter = completed_fence_counter; + if (instance.IsTimelineSemaphoreSupported()) { + completed_counter = device.getSemaphoreCounterValue(timeline); + } + + if (command.fence_counter > completed_counter) { + if (instance.IsTimelineSemaphoreSupported()) { + const vk::SemaphoreWaitInfo wait_info = { + .semaphoreCount = 1, + .pSemaphores = &timeline, + .pValues = &command.fence_counter + }; + + if (device.waitSemaphores(wait_info, UINT64_MAX) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter); + UNREACHABLE(); + } + + } else if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter); + UNREACHABLE(); } completed_fence_counter = command.fence_counter; @@ -116,22 +151,60 @@ void TaskScheduler::Submit(bool wait_completion, bool begin_next, command_buffers[command_buffer_count++] = command.render_command_buffer; - const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u; - const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u; - const vk::PipelineStageFlags wait_stage_masks = - vk::PipelineStageFlagBits::eColorAttachmentOutput; - const vk::SubmitInfo submit_info = { - .waitSemaphoreCount = wait_semaphore_count, - .pWaitSemaphores = &wait_semaphore, - .pWaitDstStageMask = &wait_stage_masks, - .commandBufferCount = command_buffer_count, - .pCommandBuffers = command_buffers.data(), - .signalSemaphoreCount = signal_semaphore_count, - .pSignalSemaphores = &signal_semaphore, - }; + if (instance.IsTimelineSemaphoreSupported()) { + const u32 signal_semaphore_count = signal_semaphore ? 2u : 1u; + const std::array signal_values{command.fence_counter, 0ul}; + const std::array signal_semaphores{timeline, signal_semaphore}; - vk::Queue queue = instance.GetGraphicsQueue(); - queue.submit(submit_info, command.fence); + const u32 wait_semaphore_count = wait_semaphore ? 2u : 1u; + const std::array wait_values{command.fence_counter - 1, 1ul}; + const std::array wait_semaphores{timeline, wait_semaphore}; + + const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { + .waitSemaphoreValueCount = wait_semaphore_count, + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = signal_semaphore_count, + .pSignalSemaphoreValues = signal_values.data() + }; + + const std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = wait_semaphore_count, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = command_buffer_count, + .pCommandBuffers = command_buffers.data(), + .signalSemaphoreCount = signal_semaphore_count, + .pSignalSemaphores = signal_semaphores.data(), + }; + + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info); + + } else { + const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u; + const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u; + const vk::PipelineStageFlags wait_stage_masks = + vk::PipelineStageFlagBits::eColorAttachmentOutput; + + const vk::SubmitInfo submit_info = { + .waitSemaphoreCount = wait_semaphore_count, + .pWaitSemaphores = &wait_semaphore, + .pWaitDstStageMask = &wait_stage_masks, + .commandBufferCount = command_buffer_count, + .pCommandBuffers = command_buffers.data(), + .signalSemaphoreCount = signal_semaphore_count, + .pSignalSemaphores = &signal_semaphore, + }; + + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info, command.fence); + } // Block host until the GPU catches up if (wait_completion) { diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h index 40310399a..0e4a6550a 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -75,6 +75,7 @@ private: }; vk::CommandPool command_pool{}; + vk::Semaphore timeline{}; std::array commands; u32 current_command = 0; };