From 11728d6772ca6c7ed6b4c8b21220dd7838526ec9 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Sat, 22 Oct 2022 21:01:03 +0300 Subject: [PATCH] renderer_vulkan: Scheduler and presentation rewrite * This commit ports yuzu's async scheduler replacing our older and crummier version Commands are recorded by the scheduler and processed by a separate worker thread. * Queue submission is also moved to the worker thread which should alliviate slowdowns related to vkQueueSubmit stalls * Fragment shader compilation and queue submission are also moved to that thread to reduce stutters --- externals/sdl2/CMakeLists.txt | 2 +- src/video_core/CMakeLists.txt | 12 +- .../renderer_vulkan/renderer_vulkan.cpp | 230 ++++++----- .../renderer_vulkan/renderer_vulkan.h | 10 +- .../renderer_vulkan/vk_blit_helper.cpp | 35 +- .../renderer_vulkan/vk_blit_helper.h | 10 +- src/video_core/renderer_vulkan/vk_common.h | 2 - .../renderer_vulkan/vk_descriptor_manager.cpp | 109 +++--- .../renderer_vulkan/vk_descriptor_manager.h | 35 +- .../vk_format_reinterpreter.cpp | 36 +- .../renderer_vulkan/vk_format_reinterpreter.h | 16 +- .../renderer_vulkan/vk_instance.cpp | 5 +- .../renderer_vulkan/vk_master_semaphore.cpp | 25 ++ .../renderer_vulkan/vk_master_semaphore.h | 92 +++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 239 ++++++------ .../renderer_vulkan/vk_pipeline_cache.h | 18 +- .../renderer_vulkan/vk_rasterizer.cpp | 80 ++-- .../renderer_vulkan/vk_rasterizer.h | 10 +- .../renderer_vulkan/vk_renderpass_cache.cpp | 45 ++- .../renderer_vulkan/vk_renderpass_cache.h | 32 +- .../renderer_vulkan/vk_resource_pool.cpp | 148 ++++++++ .../renderer_vulkan/vk_resource_pool.h | 84 ++++ .../renderer_vulkan/vk_scheduler.cpp | 180 +++++++++ src/video_core/renderer_vulkan/vk_scheduler.h | 208 ++++++++++ .../{vk_shader.cpp => vk_shader_util.cpp} | 2 +- .../{vk_shader.h => vk_shader_util.h} | 0 .../renderer_vulkan/vk_stream_buffer.cpp | 132 ++++--- .../renderer_vulkan/vk_stream_buffer.h | 44 ++- .../renderer_vulkan/vk_swapchain.cpp | 211 +++++----- src/video_core/renderer_vulkan/vk_swapchain.h | 72 ++-- .../renderer_vulkan/vk_task_scheduler.cpp | 254 ------------- .../renderer_vulkan/vk_task_scheduler.h | 100 ----- .../renderer_vulkan/vk_texture_runtime.cpp | 359 +++++++++--------- .../renderer_vulkan/vk_texture_runtime.h | 37 +- 34 files changed, 1665 insertions(+), 1209 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_master_semaphore.cpp create mode 100644 src/video_core/renderer_vulkan/vk_master_semaphore.h create mode 100644 src/video_core/renderer_vulkan/vk_resource_pool.cpp create mode 100644 src/video_core/renderer_vulkan/vk_resource_pool.h create mode 100644 src/video_core/renderer_vulkan/vk_scheduler.cpp create mode 100644 src/video_core/renderer_vulkan/vk_scheduler.h rename src/video_core/renderer_vulkan/{vk_shader.cpp => vk_shader_util.cpp} (99%) rename src/video_core/renderer_vulkan/{vk_shader.h => vk_shader_util.h} (100%) delete mode 100644 src/video_core/renderer_vulkan/vk_task_scheduler.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_task_scheduler.h diff --git a/externals/sdl2/CMakeLists.txt b/externals/sdl2/CMakeLists.txt index d27fee18a..2d770865a 100644 --- a/externals/sdl2/CMakeLists.txt +++ b/externals/sdl2/CMakeLists.txt @@ -39,9 +39,9 @@ set(SDL_JOYSTICK ON CACHE BOOL "") set(SDL_HAPTIC OFF CACHE BOOL "") set(SDL_HIDAPI ON CACHE BOOL "") set(SDL_POWER OFF CACHE BOOL "") -set(SDL_THREADS ON CACHE BOOL "") set(SDL_TIMERS ON CACHE BOOL "") set(SDL_FILE ON CACHE BOOL "") +set(SDL_THREADS ON CACHE BOOL "") set(SDL_LOADSO ON CACHE BOOL "") set(SDL_CPUINFO OFF CACHE BOOL "") set(SDL_FILESYSTEM OFF CACHE BOOL "") diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index b6b36c005..b7f401423 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -93,8 +93,14 @@ add_library(video_core STATIC renderer_vulkan/vk_format_reinterpreter.cpp renderer_vulkan/vk_format_reinterpreter.h renderer_vulkan/vk_layout_tracker.h + renderer_vulkan/vk_master_semaphore.cpp + renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h + renderer_vulkan/vk_scheduler.cpp + renderer_vulkan/vk_scheduler.h + renderer_vulkan/vk_resource_pool.cpp + renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h renderer_vulkan/vk_pipeline_cache.cpp @@ -105,14 +111,12 @@ add_library(video_core STATIC renderer_vulkan/vk_renderpass_cache.h renderer_vulkan/vk_shader_gen.cpp renderer_vulkan/vk_shader_gen.h - renderer_vulkan/vk_shader.cpp - renderer_vulkan/vk_shader.h + renderer_vulkan/vk_shader_util.cpp + renderer_vulkan/vk_shader_util.h renderer_vulkan/vk_stream_buffer.cpp renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h - renderer_vulkan/vk_task_scheduler.cpp - renderer_vulkan/vk_task_scheduler.h renderer_vulkan/vk_texture_runtime.cpp renderer_vulkan/vk_texture_runtime.h shader/debug_data.h diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 9375c6acd..cdb86f0fe 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -16,9 +16,7 @@ #include "core/tracer/recorder.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" -#include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_shader.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/video_core.h" namespace Vulkan { @@ -154,14 +152,12 @@ struct ScreenRectVertex { constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192; RendererVulkan::RendererVulkan(Frontend::EmuWindow& window) - : RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, - *this}, - renderpass_cache{instance, scheduler}, runtime{instance, scheduler, renderpass_cache}, - swapchain{instance, renderpass_cache}, vertex_buffer{instance, - scheduler, - VERTEX_BUFFER_SIZE, - vk::BufferUsageFlagBits::eVertexBuffer, - {}} { + : RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this}, + renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler}, + runtime{instance, scheduler, renderpass_cache, desc_manager}, + swapchain{instance, scheduler, renderpass_cache}, + vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, + rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} { auto& telemetry_session = Core::System::GetInstance().TelemetrySession(); constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; @@ -197,30 +193,23 @@ RendererVulkan::~RendererVulkan() { runtime.Recycle(tag, std::move(info.texture.alloc)); } - - rasterizer.reset(); } VideoCore::ResultStatus RendererVulkan::Init() { CompileShaders(); BuildLayouts(); BuildPipelines(); - - // Create the rasterizer - rasterizer = std::make_unique(render_window, instance, scheduler, runtime, - renderpass_cache); - return VideoCore::ResultStatus::Success; } VideoCore::RasterizerInterface* RendererVulkan::Rasterizer() { - return rasterizer.get(); + return &rasterizer; } void RendererVulkan::ShutDown() {} void RendererVulkan::Sync() { - rasterizer->SyncEntireState(); + rasterizer.SyncEntireState(); } void RendererVulkan::PrepareRendertarget() { @@ -236,24 +225,26 @@ void RendererVulkan::PrepareRendertarget() { LCD::Read(color_fill.raw, lcd_color_addr); if (color_fill.is_enabled) { - const vk::ClearColorValue clear_color = { - .float32 = std::array{color_fill.color_r / 255.0f, color_fill.color_g / 255.0f, - color_fill.color_b / 255.0f, 1.0f}}; - - const vk::ImageSubresourceRange range = { - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }; - - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); TextureInfo& texture = screen_infos[i].texture; - runtime.Transition(command_buffer, texture.alloc, vk::ImageLayout::eTransferDstOptimal, + runtime.Transition(texture.alloc, vk::ImageLayout::eTransferDstOptimal, 0, texture.alloc.levels); - command_buffer.clearColorImage( - texture.alloc.image, vk::ImageLayout::eTransferDstOptimal, clear_color, range); + + scheduler.Record([image = texture.alloc.image, + color_fill](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ClearColorValue clear_color = { + .float32 = std::array{color_fill.color_r / 255.0f, color_fill.color_g / 255.0f, + color_fill.color_b / 255.0f, 1.0f}}; + + const vk::ImageSubresourceRange range = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_color, range); + }); } else { TextureInfo& texture = screen_infos[i].texture; if (texture.width != framebuffer.width || texture.height != framebuffer.height || @@ -275,9 +266,7 @@ void RendererVulkan::PrepareRendertarget() { } void RendererVulkan::BeginRendering() { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, - present_pipelines[current_pipeline]); + vk::Device device = instance.GetDevice(); std::array present_textures; for (std::size_t i = 0; i < screen_infos.size(); i++) { @@ -290,29 +279,24 @@ void RendererVulkan::BeginRendering() { present_textures[3] = vk::DescriptorImageInfo{.sampler = present_samplers[current_sampler]}; - const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool = - scheduler.GetDescriptorPool(), - .descriptorSetCount = 1, - .pSetLayouts = &present_descriptor_layout}; - - vk::Device device = instance.GetDevice(); - vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0]; + vk::DescriptorSet set = desc_manager.AllocateSet(present_descriptor_layout); device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures[0]); - command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0, - 1, &set, 0, nullptr); + scheduler.Record([this, set, pipeline_index = current_pipeline](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, + present_pipelines[pipeline_index]); - const vk::ClearValue clear_value = {.color = clear_color}; + render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0, set, {}); + }); - const vk::RenderPassBeginInfo begin_info = { - .renderPass = renderpass_cache.GetPresentRenderpass(), + const RenderpassState renderpass_info = { + .renderpass = renderpass_cache.GetPresentRenderpass(), .framebuffer = swapchain.GetFramebuffer(), - .renderArea = vk::Rect2D{.offset = {0, 0}, .extent = swapchain.GetExtent()}, - .clearValueCount = 1, - .pClearValues = &clear_value, + .render_area = vk::Rect2D{.offset = {0, 0}, .extent = swapchain.GetExtent()}, + .clear = vk::ClearValue{.color = clear_color} }; - renderpass_cache.EnterRenderpass(begin_info); + renderpass_cache.EnterRenderpass(renderpass_info); } void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, @@ -340,7 +324,7 @@ void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, + if (!rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { ASSERT(false); // Reset the screen info's display texture to its own permanent texture @@ -618,14 +602,16 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; draw_info.screen_id_l = screen_id; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.pushConstants(present_pipeline_layout, - vk::ShaderStageFlagBits::eFragment | - vk::ShaderStageFlagBits::eVertex, - 0, sizeof(draw_info), &draw_info); + scheduler.Record([this, offset = offset, + info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); - command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); - command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + }); } void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h) { @@ -655,25 +641,16 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w}; draw_info.screen_id_l = screen_id; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.pushConstants(present_pipeline_layout, - vk::ShaderStageFlagBits::eFragment | - vk::ShaderStageFlagBits::eVertex, - 0, sizeof(draw_info), &draw_info); + scheduler.Record([this, offset = offset, + info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); - const vk::ClearValue clear_value = {.color = clear_color}; - - const vk::RenderPassBeginInfo begin_info = { - .renderPass = renderpass_cache.GetPresentRenderpass(), - .framebuffer = swapchain.GetFramebuffer(), - .clearValueCount = 1, - .pClearValues = &clear_value, - }; - - command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); - - command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); - command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + }); } void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x, @@ -704,14 +681,16 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i draw_info.screen_id_l = screen_id_l; draw_info.screen_id_r = screen_id_r; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.pushConstants(present_pipeline_layout, - vk::ShaderStageFlagBits::eFragment | - vk::ShaderStageFlagBits::eVertex, - 0, sizeof(draw_info), &draw_info); + scheduler.Record([this, offset = offset, + info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); - command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); - command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + }); } void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y, @@ -744,14 +723,16 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl draw_info.screen_id_l = screen_id_l; draw_info.screen_id_r = screen_id_r; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.pushConstants(present_pipeline_layout, - vk::ShaderStageFlagBits::eFragment | - vk::ShaderStageFlagBits::eVertex, - 0, sizeof(draw_info), &draw_info); + scheduler.Record([this, offset = offset, + info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.pushConstants(present_pipeline_layout, + vk::ShaderStageFlagBits::eFragment | + vk::ShaderStageFlagBits::eVertex, + 0, sizeof(info), &info); - command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); - command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); + }); } void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool flipped) { @@ -908,41 +889,53 @@ void RendererVulkan::SwapBuffers() { const auto& layout = render_window.GetFramebufferLayout(); PrepareRendertarget(); - // Create swapchain if needed - if (swapchain.NeedsRecreation()) { + const auto RecreateSwapchain = [&] { + scheduler.Finish(); + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); swapchain.Create(layout.width, layout.height); + }; + + if (swapchain.NeedsRecreation()) { + RecreateSwapchain(); } - // Calling Submit will change the slot so get the required semaphores now - const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore(); - const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore(); - swapchain.AcquireNextImage(image_acquired); + do { + scheduler.WaitWorker(); + swapchain.AcquireNextImage(); + if (swapchain.NeedsRecreation()) { + RecreateSwapchain(); + } + } while (swapchain.NeedsRecreation()); - const vk::Viewport viewport = {.x = 0.0f, - .y = 0.0f, - .width = static_cast(layout.width), - .height = static_cast(layout.height), - .minDepth = 0.0f, - .maxDepth = 1.0f}; + scheduler.Record([layout](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::Viewport viewport = {.x = 0.0f, + .y = 0.0f, + .width = static_cast(layout.width), + .height = static_cast(layout.height), + .minDepth = 0.0f, + .maxDepth = 1.0f}; - const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {layout.width, layout.height}}; + const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {layout.width, layout.height}}; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.setViewport(0, viewport); - command_buffer.setScissor(0, scissor); + render_cmdbuf.setViewport(0, viewport); + render_cmdbuf.setScissor(0, scissor); + }); renderpass_cache.ExitRenderpass(); for (auto& info : screen_infos) { - auto alloc = info.display_texture ? info.display_texture : &info.texture.alloc; - runtime.Transition(command_buffer, *alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0, + ImageAlloc* alloc = info.display_texture ? info.display_texture : &info.texture.alloc; + runtime.Transition(*alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0, alloc->levels); } DrawScreens(layout, false); - scheduler.Submit(SubmitMode::SwapchainSynced); - swapchain.Present(present_ready); + const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); + const VkSemaphore present_ready = swapchain.GetPresentReadySemaphore(); + scheduler.Flush(present_ready, image_acquired); + //scheduler.WaitWorker(); + swapchain.Present(); m_current_frame++; @@ -961,13 +954,8 @@ void RendererVulkan::SwapBuffers() { void RendererVulkan::FlushBuffers() { vertex_buffer.Flush(); - rasterizer->FlushBuffers(); + rasterizer.FlushBuffers(); runtime.FlushBuffers(); } -void RendererVulkan::OnSlotSwitch() { - renderpass_cache.OnSlotSwitch(); - rasterizer->pipeline_cache.MarkDirty(); -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index ca7e410a6..20877d686 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -11,9 +11,11 @@ #include "core/hw/gpu.h" #include "video_core/renderer_base.h" #include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_descriptor_manager.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/renderer_vulkan/vk_texture_runtime.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Layout { struct FramebufferLayout; @@ -73,7 +75,6 @@ public: void CleanupVideoDumping() override {} void Sync() override; void FlushBuffers(); - void OnSlotSwitch(); private: void ReloadSampler(); @@ -103,12 +104,13 @@ private: private: Instance instance; - TaskScheduler scheduler; + Scheduler scheduler; RenderpassCache renderpass_cache; + DescriptorManager desc_manager; TextureRuntime runtime; Swapchain swapchain; - std::unique_ptr rasterizer; StreamBuffer vertex_buffer; + RasterizerVulkan rasterizer; // Present pipelines (Normal, Anaglyph, Interlaced) vk::PipelineLayout present_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp index 8c4a7991b..3cee30c52 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -5,14 +5,15 @@ #include "common/vector_math.h" #include "video_core/renderer_vulkan/vk_blit_helper.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_shader.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_descriptor_manager.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" namespace Vulkan { -BlitHelper::BlitHelper(const Instance& instance, TaskScheduler& scheduler) - : scheduler{scheduler}, device{instance.GetDevice()} { +BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager) + : scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} { constexpr std::string_view cs_source = R"( #version 450 core #extension GL_EXT_samplerless_texture_functions : require @@ -137,25 +138,19 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, vk::DescriptorImageInfo{.imageView = dest.GetImageView(), .imageLayout = vk::ImageLayout::eGeneral}}; - const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool = - scheduler.GetDescriptorPool(), - .descriptorSetCount = 1, - .pSetLayouts = &descriptor_layout}; + vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); + device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); - descriptor_set = device.allocateDescriptorSets(alloc_info)[0]; + scheduler.Record([this, set, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); + render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); - device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]); + const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); + render_cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(Common::Vec2i), src_offset.AsArray()); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, - 1, &descriptor_set, 0, nullptr); - command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); - - const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); - command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, - sizeof(Common::Vec2i), src_offset.AsArray()); - - command_buffer.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); + render_cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h index 0aed257b9..ffc9d26f7 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.h +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -13,12 +13,14 @@ struct TextureBlit; namespace Vulkan { class Instance; -class TaskScheduler; +class DescriptorManager; +class Scheduler; class Surface; class BlitHelper { public: - BlitHelper(const Instance& instance, TaskScheduler& scheduler); + BlitHelper(const Instance& instance, Scheduler& scheduler, + DescriptorManager& desc_manager); ~BlitHelper(); /// Blits D24S8 pixel data to the provided buffer @@ -26,12 +28,12 @@ public: const VideoCore::TextureBlit& blit); private: - TaskScheduler& scheduler; + Scheduler& scheduler; + DescriptorManager& desc_manager; vk::Device device; vk::Pipeline compute_pipeline; vk::PipelineLayout compute_pipeline_layout; vk::DescriptorSetLayout descriptor_layout; - vk::DescriptorSet descriptor_set; vk::DescriptorUpdateTemplate update_template; vk::ShaderModule compute_shader; }; diff --git a/src/video_core/renderer_vulkan/vk_common.h b/src/video_core/renderer_vulkan/vk_common.h index c3285198e..e87032e20 100644 --- a/src/video_core/renderer_vulkan/vk_common.h +++ b/src/video_core/renderer_vulkan/vk_common.h @@ -20,8 +20,6 @@ namespace Vulkan { -constexpr u32 SCHEDULER_COMMAND_COUNT = 4; - /// Return the image aspect associated on the provided format constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) { switch (format) { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp b/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp index 893536e99..6bc2cc1d5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp @@ -4,7 +4,8 @@ #include "video_core/renderer_vulkan/vk_descriptor_manager.h" #include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "vulkan/vulkan.hpp" namespace Vulkan { @@ -13,8 +14,6 @@ struct Bindings { u32 binding_count; }; -constexpr u32 DESCRIPTOR_BATCH_SIZE = 8; -constexpr u32 RASTERIZER_SET_COUNT = 4; constexpr static std::array RASTERIZER_SETS = { Bindings{// Utility set .bindings = {vk::DescriptorType::eUniformBuffer, vk::DescriptorType::eUniformBuffer, @@ -58,71 +57,56 @@ constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) { return flags; } -DescriptorManager::DescriptorManager(const Instance& instance, TaskScheduler& scheduler) - : instance{instance}, scheduler{scheduler} { - descriptor_dirty.fill(true); +DescriptorManager::DescriptorManager(const Instance& instance, Scheduler& scheduler) + : instance{instance}, scheduler{scheduler}, pool_provider{instance, scheduler.GetMasterSemaphore()} { BuildLayouts(); + descriptor_set_dirty.fill(true); + current_pool = pool_provider.Commit(); } DescriptorManager::~DescriptorManager() { vk::Device device = instance.GetDevice(); - device.destroyPipelineLayout(layout); + device.destroyPipelineLayout(pipeline_layout); - for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) { + for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) { device.destroyDescriptorSetLayout(descriptor_set_layouts[i]); device.destroyDescriptorUpdateTemplate(update_templates[i]); } } void DescriptorManager::SetBinding(u32 set, u32 binding, DescriptorData data) { - if (update_data[set][binding] != data) { - update_data[set][binding] = data; - descriptor_dirty[set] = true; + DescriptorData& current = update_data[set][binding]; + if (current != data) { + current = data; + descriptor_set_dirty[set] = true; } } void DescriptorManager::BindDescriptorSets() { - vk::Device device = instance.GetDevice(); - std::array layouts; - - for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { - if (descriptor_dirty[i] || !descriptor_sets[i]) { - auto& batch = descriptor_batch[i]; - if (batch.empty()) { - layouts.fill(descriptor_set_layouts[i]); - const vk::DescriptorSetAllocateInfo alloc_info = { - .descriptorPool = scheduler.GetDescriptorPool(), - .descriptorSetCount = DESCRIPTOR_BATCH_SIZE, - .pSetLayouts = layouts.data()}; - - try { - batch = device.allocateDescriptorSets(alloc_info); - } catch (vk::OutOfPoolMemoryError& err) { - LOG_CRITICAL(Render_Vulkan, "Run out of pool memory for layout {}: {}", i, - err.what()); - UNREACHABLE(); - } - } - - vk::DescriptorSet set = batch.back(); - device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]); - - descriptor_sets[i] = set; - descriptor_dirty[i] = false; - batch.pop_back(); - } + const bool is_dirty = scheduler.IsStateDirty(StateFlags::DescriptorSets); + if (is_dirty) { + descriptor_set_dirty.fill(true); } - // Bind the descriptor sets - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, - RASTERIZER_SET_COUNT, descriptor_sets.data(), 0, nullptr); -} + vk::Device device = instance.GetDevice(); + std::array bound_sets; + for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) { + if (descriptor_set_dirty[i]) { + vk::DescriptorSet set = AllocateSet(descriptor_set_layouts[i]); + device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]); + descriptor_sets[i] = set; + } -void DescriptorManager::MarkDirty() { - descriptor_dirty.fill(true); - for (auto& batch : descriptor_batch) { - batch.clear(); + bound_sets[i] = descriptor_sets[i]; + } + + scheduler.Record([this, bound_sets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, 0, bound_sets, {}); + }); + + descriptor_set_dirty.fill(false); + if (is_dirty) { + scheduler.MarkStateNonDirty(StateFlags::DescriptorSets); } } @@ -131,7 +115,7 @@ void DescriptorManager::BuildLayouts() { std::array update_entries; vk::Device device = instance.GetDevice(); - for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) { + for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) { const auto& set = RASTERIZER_SETS[i]; for (u32 j = 0; j < set.binding_count; j++) { vk::DescriptorType type = set.bindings[j]; @@ -151,8 +135,6 @@ void DescriptorManager::BuildLayouts() { const vk::DescriptorSetLayoutCreateInfo layout_info = {.bindingCount = set.binding_count, .pBindings = set_bindings.data()}; - - // Create descriptor set layout descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info); const vk::DescriptorUpdateTemplateCreateInfo template_info = { @@ -161,16 +143,33 @@ void DescriptorManager::BuildLayouts() { .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, .descriptorSetLayout = descriptor_set_layouts[i]}; - // Create descriptor set update template update_templates[i] = device.createDescriptorUpdateTemplate(template_info); } - const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = RASTERIZER_SET_COUNT, + const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = MAX_DESCRIPTOR_SETS, .pSetLayouts = descriptor_set_layouts.data(), .pushConstantRangeCount = 0, .pPushConstantRanges = nullptr}; - layout = device.createPipelineLayout(layout_info); + pipeline_layout = device.createPipelineLayout(layout_info); +} + +vk::DescriptorSet DescriptorManager::AllocateSet(vk::DescriptorSetLayout layout) { + vk::Device device = instance.GetDevice(); + + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = current_pool, + .descriptorSetCount = 1, + .pSetLayouts = &layout}; + + try { + return device.allocateDescriptorSets(alloc_info)[0]; + } catch (vk::OutOfPoolMemoryError) { + pool_provider.RefreshTick(); + current_pool = pool_provider.Commit(); + } + + return AllocateSet(layout); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_descriptor_manager.h b/src/video_core/renderer_vulkan/vk_descriptor_manager.h index 1f4937127..1206bacca 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_manager.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_manager.h @@ -4,19 +4,19 @@ #pragma once -#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" namespace Vulkan { constexpr u32 MAX_DESCRIPTORS = 8; -constexpr u32 MAX_DESCRIPTOR_SETS = 6; +constexpr u32 MAX_DESCRIPTOR_SETS = 4; union DescriptorData { vk::DescriptorImageInfo image_info; vk::DescriptorBufferInfo buffer_info; vk::BufferView buffer_view; - bool operator!=(const DescriptorData& other) const { + [[nodiscard]] bool operator!=(const DescriptorData& other) const noexcept { return std::memcmp(this, &other, sizeof(DescriptorData)) != 0; } }; @@ -24,25 +24,25 @@ union DescriptorData { using DescriptorSetData = std::array; class Instance; -class TaskScheduler; +class Scheduler; class DescriptorManager { public: - DescriptorManager(const Instance& instance, TaskScheduler& scheduler); + DescriptorManager(const Instance& instance, Scheduler& scheduler); ~DescriptorManager(); + /// Allocates a descriptor set of the provided layout + vk::DescriptorSet AllocateSet(vk::DescriptorSetLayout layout); + /// Binds a resource to the provided binding void SetBinding(u32 set, u32 binding, DescriptorData data); /// Builds descriptor sets that reference the currently bound resources void BindDescriptorSets(); - /// Marks cached descriptor state dirty - void MarkDirty(); - /// Returns the rasterizer pipeline layout - vk::PipelineLayout GetPipelineLayout() const { - return layout; + [[nodiscard]] vk::PipelineLayout GetPipelineLayout() const noexcept { + return pipeline_layout; } private: @@ -51,18 +51,15 @@ private: private: const Instance& instance; - TaskScheduler& scheduler; - - // Cached layouts for the rasterizer pipelines - vk::PipelineLayout layout; + Scheduler& scheduler; + DescriptorPool pool_provider; + vk::PipelineLayout pipeline_layout; + vk::DescriptorPool current_pool; std::array descriptor_set_layouts; std::array update_templates; - - // Current data for the descriptor sets std::array update_data{}; - std::array descriptor_dirty{}; - std::array descriptor_sets; - std::array, MAX_DESCRIPTOR_SETS> descriptor_batch; + std::array descriptor_sets{}; + std::array descriptor_set_dirty{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp index 08127b933..e191857a4 100644 --- a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp @@ -3,14 +3,16 @@ // Refer to the license.txt file included. #include "video_core/renderer_vulkan/vk_format_reinterpreter.h" -#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_descriptor_manager.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" namespace Vulkan { -D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, - TextureRuntime& runtime) - : FormatReinterpreterBase{instance, scheduler, runtime}, device{instance.GetDevice()} { +D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, Scheduler& scheduler, + DescriptorManager& desc_manager, TextureRuntime& runtime) + : FormatReinterpreterBase{instance, scheduler, desc_manager, runtime}, device{instance.GetDevice()} { constexpr std::string_view cs_source = R"( #version 450 core #extension GL_EXT_samplerless_texture_functions : require @@ -135,25 +137,19 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf vk::DescriptorImageInfo{.imageView = dest.GetImageView(), .imageLayout = vk::ImageLayout::eGeneral}}; - const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool = - scheduler.GetDescriptorPool(), - .descriptorSetCount = 1, - .pSetLayouts = &descriptor_layout}; + vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); + device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); - descriptor_set = device.allocateDescriptorSets(alloc_info)[0]; + scheduler.Record([this, set, src_rect](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); + render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); - device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]); + const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom); + render_cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, + sizeof(Common::Vec2i), src_offset.AsArray()); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, - 1, &descriptor_set, 0, nullptr); - command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); - - const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom); - command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, - sizeof(Common::Vec2i), src_offset.AsArray()); - - command_buffer.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); + render_cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.h b/src/video_core/renderer_vulkan/vk_format_reinterpreter.h index 0c0972565..5dc93cc4b 100644 --- a/src/video_core/renderer_vulkan/vk_format_reinterpreter.h +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.h @@ -11,14 +11,15 @@ namespace Vulkan { class Surface; class Instance; -class TaskScheduler; +class DescriptorManager; +class Scheduler; class TextureRuntime; class FormatReinterpreterBase { public: - FormatReinterpreterBase(const Instance& instance, TaskScheduler& scheduler, - TextureRuntime& runtime) - : instance{instance}, scheduler{scheduler}, runtime{runtime} {} + FormatReinterpreterBase(const Instance& instance, Scheduler& scheduler, + DescriptorManager& desc_manager, TextureRuntime& runtime) + : instance{instance}, scheduler{scheduler}, desc_manager{desc_manager}, runtime{runtime} {} virtual ~FormatReinterpreterBase() = default; virtual VideoCore::PixelFormat GetSourceFormat() const = 0; @@ -27,7 +28,8 @@ public: protected: const Instance& instance; - TaskScheduler& scheduler; + Scheduler& scheduler; + DescriptorManager& desc_manager; TextureRuntime& runtime; }; @@ -35,7 +37,8 @@ using ReinterpreterList = std::vector>; class D24S8toRGBA8 final : public FormatReinterpreterBase { public: - D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime); + D24S8toRGBA8(const Instance& instance, Scheduler& scheduler, + DescriptorManager& desc_manager, TextureRuntime& runtime); ~D24S8toRGBA8(); [[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override { @@ -50,7 +53,6 @@ private: vk::Pipeline compute_pipeline; vk::PipelineLayout compute_pipeline_layout; vk::DescriptorSetLayout descriptor_layout; - vk::DescriptorSet descriptor_set; vk::DescriptorUpdateTemplate update_template; vk::ShaderModule compute_shader; VideoCore::Rect2D temp_rect{0, 0, 0, 0}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 24db6dbaf..b9abce254 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -111,7 +111,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) { try { instance = vk::createInstance(instance_info); } catch (vk::LayerNotPresentError& err) { - LOG_CRITICAL(Render_Vulkan, "Validation requested but layer is not available!"); + LOG_CRITICAL(Render_Vulkan, "Validation requested but layer is not available {}", err.what()); UNREACHABLE(); } @@ -354,8 +354,7 @@ bool Instance::CreateDevice() { try { device = physical_device.createDevice(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { - LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available, " - "check extension log for details"); + LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what()); UNREACHABLE(); } diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp new file mode 100644 index 000000000..c67858e3a --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +MasterSemaphore::MasterSemaphore(const Instance& instance) : device{instance.GetDevice()} { + const vk::StructureChain semaphore_chain = { + vk::SemaphoreCreateInfo{}, + vk::SemaphoreTypeCreateInfoKHR{ + .semaphoreType = vk::SemaphoreType::eTimeline, + .initialValue = 0, + } + }; + + semaphore = device.createSemaphore(semaphore_chain.get()); +} + +MasterSemaphore::~MasterSemaphore() { + device.destroySemaphore(semaphore); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h new file mode 100644 index 000000000..619e15077 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; + +constexpr u64 WAIT_TIMEOUT = std::numeric_limits::max(); + +class MasterSemaphore { +public: + explicit MasterSemaphore(const Instance& instance); + ~MasterSemaphore(); + + /// Returns the current logical tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return current_tick.load(std::memory_order_acquire); + } + + /// Returns the last known GPU tick. + [[nodiscard]] u64 KnownGpuTick() const noexcept { + return gpu_tick.load(std::memory_order_acquire); + } + + /// Returns the timeline semaphore handle. + [[nodiscard]] vk::Semaphore Handle() const noexcept { + return semaphore; + } + + /// Returns true when a tick has been hit by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return KnownGpuTick() >= tick; + } + + /// Advance to the logical tick and return the old one + [[nodiscard]] u64 NextTick() noexcept { + return current_tick.fetch_add(1, std::memory_order_release); + } + + /// Refresh the known GPU tick + void Refresh() { + u64 this_tick{}; + u64 counter{}; + do { + this_tick = gpu_tick.load(std::memory_order_acquire); + counter = device.getSemaphoreCounterValueKHR(semaphore); + if (counter < this_tick) { + return; + } + } while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release, + std::memory_order_relaxed)); + } + + /// Waits for a tick to be hit on the GPU + void Wait(u64 tick) { + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + // Update the GPU tick and try again + Refresh(); + if (IsFree(tick)) { + return; + } + + // If none of the above is hit, fallback to a regular wait + const vk::SemaphoreWaitInfoKHR wait_info = { + .semaphoreCount = 1, + .pSemaphores = &semaphore, + .pValues = &tick, + }; + + while (device.waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) != vk::Result::eSuccess) {} + Refresh(); + } + +private: + vk::Device device; + vk::Semaphore semaphore; ///< Timeline semaphore. + std::atomic gpu_tick{0}; ///< Current known GPU tick. + std::atomic current_tick{1}; ///< Current logical tick. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 76672239a..fb4c781c4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -6,12 +6,14 @@ #include "common/common_paths.h" #include "common/file_util.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "core/settings.h" #include "video_core/renderer_vulkan/pica_to_vk.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_descriptor_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { @@ -64,11 +66,9 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { return vk::ShaderStageFlagBits::eVertex; } -PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler, - RenderpassCache& renderpass_cache) - : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{ - instance, - scheduler} { +PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} { trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex, instance.GetDevice(), ShaderOptimization::Debug); } @@ -158,36 +158,38 @@ void PipelineCache::SaveDiskCache() { void PipelineCache::BindPipeline(const PipelineInfo& info) { ApplyDynamic(info); - std::size_t shader_hash = 0; - for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { - shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); - } + scheduler.Record([this, info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + std::size_t shader_hash = 0; + for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { + shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); + } - const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() - ? offsetof(PipelineInfo, rasterization) - : offsetof(PipelineInfo, depth_stencil) + - offsetof(DepthStencilState, stencil_reference); + const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() + ? offsetof(PipelineInfo, rasterization) + : offsetof(PipelineInfo, depth_stencil) + + offsetof(DepthStencilState, stencil_reference); - u64 info_hash = Common::ComputeHash64(&info, info_hash_size); - u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); + u64 info_hash = Common::ComputeHash64(&info, info_hash_size); + u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); - auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{}); - if (new_pipeline) { - it->second = BuildPipeline(info); - } + auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{}); + if (new_pipeline) { + it->second = BuildPipeline(info); + } - if (it->second != current_pipeline) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second); + render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second); current_pipeline = it->second; - } + }); desc_manager.BindDescriptorSets(); } +MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, const VertexLayout& layout) { + MICROPROFILE_SCOPE(Vulkan_VS); + PicaVSConfig config{regs.vs, setup}; for (u32 i = 0; i < layout.attribute_count; i++) { const auto& attrib = layout.attributes[i]; @@ -198,38 +200,52 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex, instance.GetDevice(), ShaderOptimization::Debug); if (!handle) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); return false; } - current_shaders[ProgramType::VS] = handle; - shader_hashes[ProgramType::VS] = config.Hash(); + scheduler.Record([this, handle = handle, hash = config.Hash()](vk::CommandBuffer, vk::CommandBuffer) { + current_shaders[ProgramType::VS] = handle; + shader_hashes[ProgramType::VS] = hash; + }); + return true; } void PipelineCache::UseTrivialVertexShader() { - current_shaders[ProgramType::VS] = trivial_vertex_shader; - shader_hashes[ProgramType::VS] = 0; + scheduler.Record([this](vk::CommandBuffer, vk::CommandBuffer) { + current_shaders[ProgramType::VS] = trivial_vertex_shader; + shader_hashes[ProgramType::VS] = 0; + }); } void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { const PicaFixedGSConfig gs_config{regs}; - auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, - instance.GetDevice(), ShaderOptimization::Debug); - current_shaders[ProgramType::GS] = handle; - shader_hashes[ProgramType::GS] = gs_config.Hash(); + + scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) { + auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, + instance.GetDevice(), ShaderOptimization::High); + current_shaders[ProgramType::GS] = handle; + shader_hashes[ProgramType::GS] = gs_config.Hash(); + }); } void PipelineCache::UseTrivialGeometryShader() { - current_shaders[ProgramType::GS] = VK_NULL_HANDLE; - shader_hashes[ProgramType::GS] = 0; + scheduler.Record([this](vk::CommandBuffer, vk::CommandBuffer) { + current_shaders[ProgramType::GS] = VK_NULL_HANDLE; + shader_hashes[ProgramType::GS] = 0; + }); } void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs); - auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment, - instance.GetDevice(), ShaderOptimization::Debug); - current_shaders[ProgramType::FS] = handle; - shader_hashes[ProgramType::FS] = config.Hash(); + + scheduler.Record([this, config](vk::CommandBuffer, vk::CommandBuffer) { + auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment, + instance.GetDevice(), ShaderOptimization::High); + current_shaders[ProgramType::FS] = handle; + shader_hashes[ProgramType::FS] = config.Hash(); + }); } void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { @@ -261,105 +277,108 @@ void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) { } void PipelineCache::SetViewport(float x, float y, float width, float height) { + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); const vk::Viewport viewport{x, y, width, height, 0.f, 1.f}; - if (viewport != current_viewport || state_dirty) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f}); + if (viewport != current_viewport || is_dirty) { + scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.setViewport(0, viewport); + }); current_viewport = viewport; } } void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) { + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); const vk::Rect2D scissor{{x, y}, {width, height}}; - if (scissor != current_scissor || state_dirty) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}}); + if (scissor != current_scissor || is_dirty) { + scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.setScissor(0, scissor); + }); current_scissor = scissor; } } -void PipelineCache::MarkDirty() { - desc_manager.MarkDirty(); - current_pipeline = VK_NULL_HANDLE; - state_dirty = true; -} - void PipelineCache::ApplyDynamic(const PipelineInfo& info) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); - if (info.depth_stencil.stencil_compare_mask != - current_info.depth_stencil.stencil_compare_mask || - state_dirty) { - command_buffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, - info.depth_stencil.stencil_compare_mask); - } - - if (info.depth_stencil.stencil_write_mask != current_info.depth_stencil.stencil_write_mask || - state_dirty) { - command_buffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, - info.depth_stencil.stencil_write_mask); - } - - if (info.depth_stencil.stencil_reference != current_info.depth_stencil.stencil_reference || - state_dirty) { - command_buffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, - info.depth_stencil.stencil_reference); - } - - if (instance.IsExtendedDynamicStateSupported()) { - if (info.rasterization.cull_mode != current_info.rasterization.cull_mode || state_dirty) { - command_buffer.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode)); - command_buffer.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode)); + PipelineInfo current = current_info; + scheduler.Record([this, info, is_dirty, current](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + if (info.depth_stencil.stencil_compare_mask != + current.depth_stencil.stencil_compare_mask || + is_dirty) { + render_cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, + info.depth_stencil.stencil_compare_mask); } - if (info.depth_stencil.depth_compare_op != current_info.depth_stencil.depth_compare_op || - state_dirty) { - command_buffer.setDepthCompareOpEXT( - PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op)); + if (info.depth_stencil.stencil_write_mask != current.depth_stencil.stencil_write_mask || + is_dirty) { + render_cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, + info.depth_stencil.stencil_write_mask); } - if (info.depth_stencil.depth_test_enable != current_info.depth_stencil.depth_test_enable || - state_dirty) { - command_buffer.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable); + if (info.depth_stencil.stencil_reference != current.depth_stencil.stencil_reference || + is_dirty) { + render_cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, + info.depth_stencil.stencil_reference); } - if (info.depth_stencil.depth_write_enable != - current_info.depth_stencil.depth_write_enable || - state_dirty) { - command_buffer.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable); - } + if (instance.IsExtendedDynamicStateSupported()) { + if (info.rasterization.cull_mode != current.rasterization.cull_mode || is_dirty) { + render_cmdbuf.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode)); + render_cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode)); + } - if (info.rasterization.topology != current_info.rasterization.topology || state_dirty) { - command_buffer.setPrimitiveTopologyEXT( - PicaToVK::PrimitiveTopology(info.rasterization.topology)); - } + if (info.depth_stencil.depth_compare_op != current.depth_stencil.depth_compare_op || + is_dirty) { + render_cmdbuf.setDepthCompareOpEXT( + PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op)); + } - if (info.depth_stencil.stencil_test_enable != - current_info.depth_stencil.stencil_test_enable || - state_dirty) { - command_buffer.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable); - } + if (info.depth_stencil.depth_test_enable != current.depth_stencil.depth_test_enable || + is_dirty) { + render_cmdbuf.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable); + } - if (info.depth_stencil.stencil_fail_op != current_info.depth_stencil.stencil_fail_op || - info.depth_stencil.stencil_pass_op != current_info.depth_stencil.stencil_pass_op || - info.depth_stencil.stencil_depth_fail_op != - current_info.depth_stencil.stencil_depth_fail_op || - info.depth_stencil.stencil_compare_op != - current_info.depth_stencil.stencil_compare_op || - state_dirty) { - command_buffer.setStencilOpEXT( - vk::StencilFaceFlagBits::eFrontAndBack, - PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), - PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), - PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), - PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)); + if (info.depth_stencil.depth_write_enable != + current.depth_stencil.depth_write_enable || + is_dirty) { + render_cmdbuf.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable); + } + + if (info.rasterization.topology != current.rasterization.topology || is_dirty) { + render_cmdbuf.setPrimitiveTopologyEXT( + PicaToVK::PrimitiveTopology(info.rasterization.topology)); + } + + if (info.depth_stencil.stencil_test_enable != + current.depth_stencil.stencil_test_enable || + is_dirty) { + render_cmdbuf.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable); + } + + if (info.depth_stencil.stencil_fail_op != current.depth_stencil.stencil_fail_op || + info.depth_stencil.stencil_pass_op != current.depth_stencil.stencil_pass_op || + info.depth_stencil.stencil_depth_fail_op != + current.depth_stencil.stencil_depth_fail_op || + info.depth_stencil.stencil_compare_op != + current.depth_stencil.stencil_compare_op || + is_dirty) { + render_cmdbuf.setStencilOpEXT( + vk::StencilFaceFlagBits::eFrontAndBack, + PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), + PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), + PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), + PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)); + } } - } + }); current_info = info; - state_dirty = false; + if (is_dirty) { + scheduler.MarkStateNonDirty(StateFlags::Pipeline); + } } vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index cd92d640a..5f0695306 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -9,8 +9,7 @@ #include "common/hash.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/regs.h" -#include "video_core/renderer_vulkan/vk_descriptor_manager.h" -#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_shader_gen.h" #include "video_core/shader/shader_cache.h" @@ -120,8 +119,9 @@ using FragmentShaders = Pica::Shader::ShaderCache; class Instance; -class TaskScheduler; +class Scheduler; class RenderpassCache; +class DescriptorManager; /** * Stores a collection of rasterizer pipelines used during rendering. @@ -129,8 +129,8 @@ class RenderpassCache; */ class PipelineCache { public: - PipelineCache(const Instance& instance, TaskScheduler& scheduler, - RenderpassCache& renderpass_cache); + PipelineCache(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorManager& desc_manager); ~PipelineCache(); /// Loads the pipeline cache stored to disk @@ -179,9 +179,6 @@ public: /// Sets the scissor rectange to the provided values void SetScissor(s32 x, s32 y, u32 width, u32 height); - /// Marks all cached pipeline cache state as dirty - void MarkDirty(); - private: /// Applies dynamic pipeline state to the current command buffer void ApplyDynamic(const PipelineInfo& info); @@ -203,9 +200,9 @@ private: private: const Instance& instance; - TaskScheduler& scheduler; + Scheduler& scheduler; RenderpassCache& renderpass_cache; - DescriptorManager desc_manager; + DescriptorManager& desc_manager; // Cached pipelines vk::PipelineCache pipeline_cache; @@ -214,7 +211,6 @@ private: PipelineInfo current_info{}; vk::Viewport current_viewport{}; vk::Rect2D current_scissor{}; - bool state_dirty = true; // Bound shader modules enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 }; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 744baac9a..954d60ac7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -13,7 +13,7 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/video_core.h" #include @@ -88,8 +88,8 @@ constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() { return layout; } -constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; -constexpr u32 INDEX_BUFFER_SIZE = 8 * 1024 * 1024; +constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024; +constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024; @@ -111,11 +111,11 @@ constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled | constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage; RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, - TaskScheduler& scheduler, TextureRuntime& runtime, - RenderpassCache& renderpass_cache) + Scheduler& scheduler, DescriptorManager& desc_manager, + TextureRuntime& runtime, RenderpassCache& renderpass_cache) : instance{instance}, scheduler{scheduler}, runtime{runtime}, - renderpass_cache{renderpass_cache}, res_cache{*this, runtime}, - pipeline_cache{instance, scheduler, renderpass_cache}, + renderpass_cache{renderpass_cache}, desc_manager{desc_manager}, res_cache{*this, runtime}, + pipeline_cache{instance, scheduler, renderpass_cache, desc_manager}, null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime}, null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime}, vertex_buffer{ @@ -178,7 +178,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan RasterizerVulkan::~RasterizerVulkan() { renderpass_cache.ExitRenderpass(); - scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown); + scheduler.Finish(); vk::Device device = instance.GetDevice(); @@ -466,18 +466,15 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi pipeline_info.vertex_layout = layout; vertex_buffer.Commit(buffer_offset - array_offset); - std::array buffers; - buffers.fill(vertex_buffer.GetHandle()); - - // Bind the vertex buffer with all the bindings - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.bindVertexBuffers(0, layout.binding_count, buffers.data(), - binding_offsets.data()); + scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + std::array buffers; + buffers.fill(vertex_buffer.GetHandle()); + render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(), + offsets.data()); + }); } -MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); bool RasterizerVulkan::SetupVertexShader() { - MICROPROFILE_SCOPE(Vulkan_VS); return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs, pipeline_info.vertex_layout); } @@ -533,7 +530,6 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); pipeline_cache.BindPipeline(pipeline_info); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); if (is_indexed) { bool index_u16 = regs.pipeline.index_array.format != 0; const u32 index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); @@ -552,13 +548,16 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { std::memcpy(index_ptr, index_data, index_buffer_size); index_buffer.Commit(index_buffer_size); - vk::IndexType index_type = index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT; - command_buffer.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type); - - // Submit draw - command_buffer.drawIndexed(regs.pipeline.num_vertices, 1, 0, -vs_input_index_min, 0); + scheduler.Record([this, offset = index_offset, num_vertices = regs.pipeline.num_vertices, + index_u16, vertex_offset = vs_input_index_min](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::IndexType index_type = index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT; + render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), offset, index_type); + render_cmdbuf.drawIndexed(num_vertices, 1, 0, -vertex_offset, 0); + }); } else { - command_buffer.draw(regs.pipeline.num_vertices, 1, 0, 0); + scheduler.Record([num_vertices = regs.pipeline.num_vertices](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.draw(num_vertices, 1, 0, 0); + }); } return true; @@ -863,17 +862,16 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1); } - const vk::RenderPassBeginInfo renderpass_begin = { - .renderPass = framebuffer_info.renderpass, + const RenderpassState renderpass_info = { + .renderpass = framebuffer_info.renderpass, .framebuffer = it->second, - .renderArea = vk::Rect2D{.offset = {static_cast(draw_rect.left), - static_cast(draw_rect.bottom)}, - .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}}, + .render_area = vk::Rect2D{.offset = {static_cast(draw_rect.left), + static_cast(draw_rect.bottom)}, + .extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}}, + .clear = {} + }; - .clearValueCount = 0, - .pClearValues = nullptr}; - - renderpass_cache.EnterRenderpass(renderpass_begin); + renderpass_cache.EnterRenderpass(renderpass_info); // Draw the vertex batch bool succeeded = true; @@ -886,8 +884,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.BindPipeline(pipeline_info); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); const u32 batch_size = static_cast(vertex_batch.size()); for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { @@ -899,8 +895,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); vertex_buffer.Commit(vertex_size); - command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset); - command_buffer.draw(vertices, 1, base_vertex, 0); + scheduler.Record([this, vertices, base_vertex, + offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer){ + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset); + render_cmdbuf.draw(vertices, 1, base_vertex, 0); + }); } } @@ -1738,11 +1737,12 @@ void RasterizerVulkan::SyncBlendFuncs() { } void RasterizerVulkan::SyncBlendColor() { - auto blend_color = + const Common::Vec4f blend_color = PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.setBlendConstants(blend_color.AsArray()); + scheduler.Record([blend_color](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.setBlendConstants(blend_color.AsArray()); + }); } void RasterizerVulkan::SyncFogColor() { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 3703cfc93..65787b13c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -24,8 +24,9 @@ namespace Vulkan { struct ScreenInfo; class Instance; -class TaskScheduler; +class Scheduler; class RenderpassCache; +class DescriptorManager; struct SamplerInfo { using TextureConfig = Pica::TexturingRegs::TextureConfig; @@ -80,8 +81,8 @@ class RasterizerVulkan : public VideoCore::RasterizerAccelerated { public: explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, - TaskScheduler& scheduler, TextureRuntime& runtime, - RenderpassCache& renderpass_cache); + Scheduler& scheduler, DescriptorManager& desc_manager, + TextureRuntime& runtime, RenderpassCache& renderpass_cache); ~RasterizerVulkan() override; void LoadDiskResources(const std::atomic_bool& stop_loading, @@ -251,9 +252,10 @@ private: private: const Instance& instance; - TaskScheduler& scheduler; + Scheduler& scheduler; TextureRuntime& runtime; RenderpassCache& renderpass_cache; + DescriptorManager& desc_manager; RasterizerCache res_cache; PipelineCache pipeline_cache; bool shader_dirty = true; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 251d09022..f490f663c 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -5,7 +5,7 @@ #include "common/assert.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" namespace Vulkan { @@ -39,7 +39,7 @@ VideoCore::PixelFormat ToFormatDepth(u32 index) { } } -RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler) +RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler) : instance{instance}, scheduler{scheduler} { // Pre-create all needed renderpasses by the renderer for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { @@ -88,28 +88,46 @@ RenderpassCache::~RenderpassCache() { device.destroyRenderPass(present_renderpass); } -void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) { - if (active_begin == begin_info) { +void RenderpassCache::EnterRenderpass(const RenderpassState& state) { + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Renderpass); + if (current_state == state && !is_dirty) { return; } - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - if (active_begin.renderPass) { - command_buffer.endRenderPass(); + scheduler.Record([should_end = bool(current_state.renderpass), state] + (vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + if (should_end) { + render_cmdbuf.endRenderPass(); + } + + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = state.renderpass, + .framebuffer = state.framebuffer, + .renderArea = state.render_area, + .clearValueCount = 1, + .pClearValues = &state.clear}; + + render_cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + + }); + + if (is_dirty) { + scheduler.MarkStateNonDirty(StateFlags::Renderpass); } - command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline); - active_begin = begin_info; + current_state = state; } void RenderpassCache::ExitRenderpass() { - if (!active_begin.renderPass) { + if (!current_state.renderpass) { return; } - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.endRenderPass(); - active_begin = vk::RenderPassBeginInfo{}; + scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.endRenderPass(); + }); + + current_state = {}; } void RenderpassCache::CreatePresentRenderpass(vk::Format format) { @@ -136,7 +154,6 @@ vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format de vk::AttachmentLoadOp load_op, vk::ImageLayout initial_layout, vk::ImageLayout final_layout) const { - // Define attachments u32 attachment_count = 0; std::array attachments; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index 8b4475619..73d443f8e 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -4,24 +4,35 @@ #pragma once +#include #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Vulkan { class Instance; -class TaskScheduler; +class Scheduler; -constexpr u32 MAX_COLOR_FORMATS = 5; -constexpr u32 MAX_DEPTH_FORMATS = 4; +struct RenderpassState { + vk::RenderPass renderpass; + vk::Framebuffer framebuffer; + vk::Rect2D render_area; + vk::ClearValue clear; + + [[nodiscard]] bool operator==(const RenderpassState& other) const { + return std::memcmp(this, &other, sizeof(RenderpassState)) == 0; + } +}; class RenderpassCache { + static constexpr u32 MAX_COLOR_FORMATS = 5; + static constexpr u32 MAX_DEPTH_FORMATS = 4; public: - RenderpassCache(const Instance& instance, TaskScheduler& scheduler); + RenderpassCache(const Instance& instance, Scheduler& scheduler); ~RenderpassCache(); /// Begins a new renderpass only when no other renderpass is currently active - void EnterRenderpass(const vk::RenderPassBeginInfo begin_info); + void EnterRenderpass(const RenderpassState& state); /// Exits from any currently active renderpass instance void ExitRenderpass(); @@ -32,16 +43,12 @@ public: /// Returns the renderpass associated with the color-depth format pair [[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, bool is_clear) const; + /// Returns the swapchain clear renderpass [[nodiscard]] vk::RenderPass GetPresentRenderpass() const { return present_renderpass; } - /// Invalidates the currently active renderpass - void OnSlotSwitch() { - active_begin = vk::RenderPassBeginInfo{}; - } - private: /// Creates a renderpass configured appropriately and stores it in cached_renderpasses vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth, @@ -50,9 +57,8 @@ private: private: const Instance& instance; - TaskScheduler& scheduler; - - vk::RenderPassBeginInfo active_begin{}; + Scheduler& scheduler; + RenderpassState current_state{}; vk::RenderPass present_renderpass{}; vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2]; }; diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp new file mode 100644 index 000000000..ea8ed48d4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "video_core/renderer_vulkan/vk_resource_pool.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) + : master_semaphore{&master_semaphore_}, grow_step{grow_step_} {} + +std::size_t ResourcePool::CommitResource() { + // Refresh semaphore to query updated results + master_semaphore->Refresh(); + const u64 gpu_tick = master_semaphore->KnownGpuTick(); + const auto search = [this, gpu_tick](std::size_t begin, std::size_t end) -> std::optional { + for (std::size_t iterator = begin; iterator < end; ++iterator) { + if (gpu_tick >= ticks[iterator]) { + ticks[iterator] = master_semaphore->CurrentTick(); + return iterator; + } + } + return std::nullopt; + }; + + // Try to find a free resource from the hinted position to the end. + std::optional found = search(hint_iterator, ticks.size()); + if (!found) { + // Search from beginning to the hinted position. + found = search(0, hint_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const std::size_t free_resource = ManageOverflow(); + + ticks[free_resource] = master_semaphore->CurrentTick(); + found = free_resource; + } + } + + // Free iterator is hinted to the resource after the one that's been commited. + hint_iterator = (*found + 1) % ticks.size(); + return *found; +} + +std::size_t ResourcePool::ManageOverflow() { + const std::size_t old_capacity = ticks.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void ResourcePool::Grow() { + const size_t old_capacity = ticks.size(); + ticks.resize(old_capacity + grow_step); + Allocate(old_capacity, old_capacity + grow_step); +} + +constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4; + +struct CommandPool::Pool { + vk::CommandPool handle; + std::array cmdbufs; +}; + +CommandPool::CommandPool(const Instance& instance, MasterSemaphore& master_semaphore) + : ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {} + +CommandPool::~CommandPool() { + vk::Device device = instance.GetDevice(); + for (Pool& pool : pools) { + device.destroyCommandPool(pool.handle); + } +} + +void CommandPool::Allocate(std::size_t begin, std::size_t end) { + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + Pool& pool = pools.emplace_back(); + + const vk::CommandPoolCreateInfo pool_create_info = { + .flags = vk::CommandPoolCreateFlagBits::eTransient | + vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex() + }; + + vk::Device device = instance.GetDevice(); + pool.handle = device.createCommandPool(pool_create_info); + + const vk::CommandBufferAllocateInfo buffer_alloc_info = {.commandPool = pool.handle, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = COMMAND_BUFFER_POOL_SIZE}; + + auto buffers = device.allocateCommandBuffers(buffer_alloc_info); + std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin()); +} + +vk::CommandBuffer CommandPool::Commit() { + const std::size_t index = CommitResource(); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return pools[pool_index].cmdbufs[sub_index]; +} + +DescriptorPool::DescriptorPool(const Instance& instance, MasterSemaphore& master_semaphore) + : ResourcePool{master_semaphore, 1}, instance{instance} {} + +DescriptorPool::~DescriptorPool() { + vk::Device device = instance.GetDevice(); + for (vk::DescriptorPool& pool : pools) { + device.destroyDescriptorPool(pool); + } +} + +void DescriptorPool::RefreshTick() { + ticks[pool_index] = master_semaphore->CurrentTick(); +} + +void DescriptorPool::Allocate(std::size_t begin, std::size_t end) { + vk::DescriptorPool& pool = pools.emplace_back(); + + // Choose a sane pool size good for most games + static constexpr std::array pool_sizes = {{ + {vk::DescriptorType::eUniformBuffer, 2048}, + {vk::DescriptorType::eSampledImage, 4096}, + {vk::DescriptorType::eSampler, 4096}, + {vk::DescriptorType::eUniformTexelBuffer, 2048}, + {vk::DescriptorType::eStorageImage, 1024}}}; + + const vk::DescriptorPoolCreateInfo descriptor_pool_info = { + .maxSets = 8192, + .poolSizeCount = static_cast(pool_sizes.size()), + .pPoolSizes = pool_sizes.data()}; + + pool = instance.GetDevice().createDescriptorPool(descriptor_pool_info); +} + +vk::DescriptorPool DescriptorPool::Commit() { + pool_index = CommitResource(); + instance.GetDevice().resetDescriptorPool(pools[pool_index]); + return pools[pool_index]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h new file mode 100644 index 000000000..9c26e4576 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -0,0 +1,84 @@ +// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Instance; +class MasterSemaphore; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class ResourcePool { +public: + explicit ResourcePool() = default; + explicit ResourcePool(MasterSemaphore& master_semaphore, std::size_t grow_step); + virtual ~ResourcePool() = default; + + ResourcePool& operator=(ResourcePool&&) noexcept = default; + ResourcePool(ResourcePool&&) noexcept = default; + + ResourcePool& operator=(const ResourcePool&) = default; + ResourcePool(const ResourcePool&) = default; + +protected: + std::size_t CommitResource(); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(std::size_t begin, std::size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + std::size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + +protected: + MasterSemaphore* master_semaphore{nullptr}; + std::size_t grow_step = 0; ///< Number of new resources created after an overflow + std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector ticks; ///< Ticks for each resource +}; + +class CommandPool final : public ResourcePool { +public: + explicit CommandPool(const Instance& instance, MasterSemaphore& master_semaphore); + ~CommandPool() override; + + void Allocate(std::size_t begin, std::size_t end) override; + + vk::CommandBuffer Commit(); + +private: + struct Pool; + const Instance& instance; + std::vector pools; +}; + +class DescriptorPool final : public ResourcePool { +public: + explicit DescriptorPool(const Instance& instance, MasterSemaphore& master_semaphore); + ~DescriptorPool() override; + + /// Refreshes the tick of the currently commited pool + void RefreshTick(); + + void Allocate(std::size_t begin, std::size_t end) override; + + vk::DescriptorPool Commit(); + +private: + const Instance& instance; + std::vector pools; + std::size_t pool_index; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp new file mode 100644 index 000000000..ee58e46f9 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -0,0 +1,180 @@ +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include "common/microprofile.h" +#include "common/thread.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" + +namespace Vulkan { + +void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) { + auto command = first; + while (command != nullptr) { + auto next = command->GetNext(); + command->Execute(render_cmdbuf, upload_cmdbuf); + command->~Command(); + command = next; + } + submit = false; + command_offset = 0; + first = nullptr; + last = nullptr; +} + +Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer) + : instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore} { + AcquireNewChunk(); + AllocateWorkerCommandBuffers(); + worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); +} + +Scheduler::~Scheduler() = default; + +void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) { + SubmitExecution(signal, wait); +} + +void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) { + const u64 presubmit_tick = CurrentTick(); + SubmitExecution(signal, wait); + WaitWorker(); + Wait(presubmit_tick); +} + +MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); +void Scheduler::WaitWorker() { + MICROPROFILE_SCOPE(Vulkan_WaitForWorker); + DispatchWork(); + + std::unique_lock lock{work_mutex}; + wait_cv.wait(lock, [this] { return work_queue.empty(); }); +} + +void Scheduler::DispatchWork() { + if (chunk->Empty()) { + return; + } + + { + std::scoped_lock lock{work_mutex}; + work_queue.push(std::move(chunk)); + } + + work_cv.notify_one(); + AcquireNewChunk(); +} + +void Scheduler::WorkerThread(std::stop_token stop_token) { + do { + std::unique_ptr work; + bool has_submit{false}; + { + std::unique_lock lock{work_mutex}; + if (work_queue.empty()) { + wait_cv.notify_all(); + } + work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); }); + if (stop_token.stop_requested()) { + continue; + } + work = std::move(work_queue.front()); + work_queue.pop(); + + has_submit = work->HasSubmit(); + work->ExecuteAll(render_cmdbuf, upload_cmdbuf); + } + if (has_submit) { + AllocateWorkerCommandBuffers(); + } + std::scoped_lock reserve_lock{reserve_mutex}; + chunk_reserve.push_back(std::move(work)); + } while (!stop_token.stop_requested()); +} + +void Scheduler::AllocateWorkerCommandBuffers() { + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }; + + upload_cmdbuf = command_pool.Commit(); + upload_cmdbuf.begin(begin_info); + + render_cmdbuf = command_pool.Commit(); + render_cmdbuf.begin(begin_info); +} + +MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255)); +void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { + renderer.FlushBuffers(); + const u64 signal_value = master_semaphore.NextTick(); + state = StateFlags::AllDirty; + + Record([signal_semaphore, wait_semaphore, signal_value, this] + (vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) { + MICROPROFILE_SCOPE(Vulkan_Submit); + upload_cmdbuf.end(); + render_cmdbuf.end(); + + const vk::Semaphore timeline_semaphore = master_semaphore.Handle(); + + const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + + const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; + const std::array wait_values{signal_value - 1, u64(1)}; + const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eColorAttachmentOutput, + }; + + const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { + .waitSemaphoreValueCount = num_wait_semaphores, + .pWaitSemaphoreValues = wait_values.data(), + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; + + const std::array cmdbuffers = {upload_cmdbuf, render_cmdbuf}; + const vk::SubmitInfo submit_info = { + .pNext = &timeline_si, + .waitSemaphoreCount = num_wait_semaphores, + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 2, + .pCommandBuffers = cmdbuffers.data(), + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), + }; + + try { + vk::Queue queue = instance.GetGraphicsQueue(); + queue.submit(submit_info); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); + UNREACHABLE(); + } + }); + + chunk->MarkSubmit(); + DispatchWork(); +} + +void Scheduler::AcquireNewChunk() { + std::scoped_lock lock{reserve_mutex}; + if (chunk_reserve.empty()) { + chunk = std::make_unique(); + return; + } + + chunk = std::move(chunk_reserve.back()); + chunk_reserve.pop_back(); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h new file mode 100644 index 000000000..090c33068 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -0,0 +1,208 @@ +// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/common_funcs.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +enum class StateFlags { + AllDirty = 0, + Renderpass = 1 << 0, + Pipeline = 1 << 1, + DescriptorSets = 1 << 2 +}; + +DECLARE_ENUM_FLAG_OPERATORS(StateFlags) + +class Instance; +class RendererVulkan; + +/// The scheduler abstracts command buffer and fence management with an interface that's able to do +/// OpenGL-like operations on Vulkan command buffers. +class Scheduler { +public: + explicit Scheduler(const Instance& instance, RendererVulkan& renderer); + ~Scheduler(); + + /// Sends the current execution context to the GPU. + void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + + /// Sends the current execution context to the GPU and waits for it to complete. + void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr); + + /// Waits for the worker thread to finish executing everything. After this function returns it's + /// safe to touch worker resources. + void WaitWorker(); + + /// Sends currently recorded work to the worker thread. + void DispatchWork(); + + /// Records the command to the current chunk. + template + void Record(T&& command) { + if (chunk->Record(command)) { + return; + } + DispatchWork(); + (void)chunk->Record(command); + } + + /// Marks the provided state as non dirty + void MarkStateNonDirty(StateFlags flag) noexcept { + state |= flag; + } + + /// Returns true if the state is dirty + [[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept { + return False(state & flag); + } + + /// Returns the current command buffer tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return master_semaphore.CurrentTick(); + } + + /// Returns true when a tick has been triggered by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept { + return master_semaphore.IsFree(tick); + } + + /// Waits for the given tick to trigger on the GPU. + void Wait(u64 tick) { + if (tick >= master_semaphore.CurrentTick()) { + // Make sure we are not waiting for the current tick without signalling + Flush(); + } + master_semaphore.Wait(tick); + } + + /// Returns the master timeline semaphore. + [[nodiscard]] MasterSemaphore& GetMasterSemaphore() noexcept { + return master_semaphore; + } + +private: + class Command { + public: + virtual ~Command() = default; + + virtual void Execute(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0; + + Command* GetNext() const { + return next; + } + + void SetNext(Command* next_) { + next = next_; + } + + private: + Command* next = nullptr; + }; + + template + class TypedCommand final : public Command { + public: + explicit TypedCommand(T&& command_) : command{std::move(command_)} {} + ~TypedCommand() override = default; + + TypedCommand(TypedCommand&&) = delete; + TypedCommand& operator=(TypedCommand&&) = delete; + + void Execute(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) const override { + command(render_cmdbuf, upload_cmdbuf); + } + + private: + T command; + }; + + class CommandChunk final { + public: + void ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf); + + template + bool Record(T& command) { + using FuncType = TypedCommand; + static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large"); + + recorded_counts++; + command_offset = Common::AlignUp(command_offset, alignof(FuncType)); + if (command_offset > sizeof(data) - sizeof(FuncType)) { + return false; + } + Command* const current_last = last; + last = new (data.data() + command_offset) FuncType(std::move(command)); + + if (current_last) { + current_last->SetNext(last); + } else { + first = last; + } + command_offset += sizeof(FuncType); + return true; + } + + void MarkSubmit() { + submit = true; + } + + bool Empty() const { + return recorded_counts == 0; + } + + bool HasSubmit() const { + return submit; + } + + private: + Command* first = nullptr; + Command* last = nullptr; + + std::size_t recorded_counts = 0; + std::size_t command_offset = 0; + bool submit = false; + alignas(std::max_align_t) std::array data{}; + }; + +private: + void WorkerThread(std::stop_token stop_token); + + void AllocateWorkerCommandBuffers(); + + void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore); + + void AcquireNewChunk(); + +private: + const Instance& instance; + RendererVulkan& renderer; + MasterSemaphore master_semaphore; + CommandPool command_pool; + std::unique_ptr chunk; + std::queue> work_queue; + std::vector> chunk_reserve; + vk::CommandBuffer render_cmdbuf; + vk::CommandBuffer upload_cmdbuf; + StateFlags state{}; + std::mutex reserve_mutex; + std::mutex work_mutex; + std::condition_variable_any work_cv; + std::condition_variable wait_cv; + std::jthread worker_thread; + std::jthread prsent_thread; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp similarity index 99% rename from src/video_core/renderer_vulkan/vk_shader.cpp rename to src/video_core/renderer_vulkan/vk_shader_util.cpp index 1e2b412da..ea073f6d8 100644 --- a/src/video_core/renderer_vulkan/vk_shader.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -7,7 +7,7 @@ #include #include "common/assert.h" #include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_shader.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader.h b/src/video_core/renderer_vulkan/vk_shader_util.h similarity index 100% rename from src/video_core/renderer_vulkan/vk_shader.h rename to src/video_core/renderer_vulkan/vk_shader_util.h diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index f5496a209..6c43a81f1 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -8,7 +8,7 @@ #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include @@ -69,17 +69,16 @@ StagingBuffer::~StagingBuffer() { vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); } -StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size, +StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback) - : instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT}, - staging{instance, total_size, readback}, bucket_size{size} {} + : instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, + total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {} -StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size, +StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, vk::BufferUsageFlagBits usage, std::span view_formats, bool readback) - : instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT}, - staging{instance, total_size, readback}, usage{usage}, bucket_size{size} { - + : instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, + usage{usage}, total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} { const vk::BufferCreateInfo buffer_info = { .size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst}; @@ -94,14 +93,14 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation, &alloc_info); - buffer = vk::Buffer{unsafe_buffer}; + gpu_buffer = vk::Buffer{unsafe_buffer}; ASSERT(view_formats.size() < MAX_BUFFER_VIEWS); vk::Device device = instance.GetDevice(); for (std::size_t i = 0; i < view_formats.size(); i++) { const vk::BufferViewCreateInfo view_info = { - .buffer = buffer, .format = view_formats[i], .offset = 0, .range = total_size}; + .buffer = gpu_buffer, .format = view_formats[i], .offset = 0, .range = total_size}; views[i] = device.createBufferView(view_info); } @@ -110,9 +109,9 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u } StreamBuffer::~StreamBuffer() { - if (buffer) { + if (gpu_buffer) { vk::Device device = instance.GetDevice(); - vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); + vmaDestroyBuffer(instance.GetAllocator(), static_cast(gpu_buffer), allocation); for (std::size_t i = 0; i < view_count; i++) { device.destroyBufferView(views[i]); } @@ -121,92 +120,103 @@ StreamBuffer::~StreamBuffer() { std::tuple StreamBuffer::Map(u32 size, u32 alignment) { ASSERT(size <= total_size && alignment <= total_size); - - const u32 current_bucket = scheduler.GetCurrentSlotIndex(); - auto& bucket = buckets[current_bucket]; + Bucket& bucket = buckets[bucket_index]; if (alignment > 0) { - bucket.offset = Common::AlignUp(bucket.offset, alignment); + bucket.cursor = Common::AlignUp(bucket.cursor, alignment); } - if (bucket.offset + size > bucket_size) { - UNREACHABLE(); + // If we reach bucket boundaries move over to the next one + if (bucket.cursor + size > bucket_size) { + bucket.gpu_tick = scheduler.CurrentTick(); + MoveNextBucket(); + return Map(size, alignment); } - bool invalidate = false; - if (bucket.invalid) { - invalidate = true; - bucket.invalid = false; - } - - const u32 buffer_offset = current_bucket * bucket_size + bucket.offset; + const bool invalidate = std::exchange(bucket.invalid, false); + const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor; u8* mapped = reinterpret_cast(staging.mapped.data() + buffer_offset); + return std::make_tuple(mapped, buffer_offset, invalidate); } void StreamBuffer::Commit(u32 size) { - buckets[scheduler.GetCurrentSlotIndex()].offset += size; + buckets[bucket_index].cursor += size; } void StreamBuffer::Flush() { - const u32 current_bucket = scheduler.GetCurrentSlotIndex(); - const u32 flush_start = current_bucket * bucket_size; - const u32 flush_size = buckets[current_bucket].offset; + if (readback) { + LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer"); + return; + } + + Bucket& bucket = buckets[bucket_index]; + const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor; + const u32 flush_size = bucket.cursor - bucket.flush_cursor; ASSERT(flush_size <= bucket_size); if (flush_size > 0) [[likely]] { // Ensure all staging writes are visible to the host memory domain VmaAllocator allocator = instance.GetAllocator(); vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size); + if (gpu_buffer) { + scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { + const vk::BufferCopy copy_region = { + .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size}; - // Make the data available to the GPU if possible - if (buffer) { - const vk::BufferCopy copy_region = { - .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size}; + upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region); - vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer(); - command_buffer.copyBuffer(staging.buffer, buffer, copy_region); + auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage); + const vk::BufferMemoryBarrier buffer_barrier = { + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = access_mask, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = gpu_buffer, + .offset = flush_start, + .size = flush_size}; - // Add pipeline barrier for the flushed region - auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage); - const vk::BufferMemoryBarrier buffer_barrier = { - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = access_mask, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = buffer, - .offset = flush_start, - .size = flush_size}; - - command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, - vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, - {}); + upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, + vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, + {}); + }); } + bucket.flush_cursor += flush_size; } - - SwitchBucket(); } void StreamBuffer::Invalidate() { - const u32 current_bucket = scheduler.GetCurrentSlotIndex(); - const u32 flush_start = current_bucket * bucket_size; - const u32 flush_size = buckets[current_bucket].offset; + if (!readback) { + return; + } + + Bucket& bucket = buckets[bucket_index]; + const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor; + const u32 flush_size = bucket.cursor - bucket.flush_cursor; ASSERT(flush_size <= bucket_size); if (flush_size > 0) [[likely]] { // Ensure the staging memory can be read by the host VmaAllocator allocator = instance.GetAllocator(); vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size); + bucket.flush_cursor += flush_size; } - - SwitchBucket(); } -void StreamBuffer::SwitchBucket() { - const u32 current_bucket = scheduler.GetCurrentSlotIndex(); - const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT; - buckets[next_bucket].offset = 0; - buckets[next_bucket].invalid = true; +void StreamBuffer::MoveNextBucket() { + // Flush and Invalidate are bucket local operations for simplicity so perform them here + if (readback) { + Invalidate(); + } else { + Flush(); + } + + bucket_index = (bucket_index + 1) % BUCKET_COUNT; + Bucket& next_bucket = buckets[bucket_index]; + scheduler.Wait(next_bucket.gpu_tick); + next_bucket.cursor = 0; + next_bucket.flush_cursor = 0; + next_bucket.invalid = true; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 7a63e66a9..d573c2f0e 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -15,9 +15,7 @@ VK_DEFINE_HANDLE(VmaAllocation) namespace Vulkan { class Instance; -class TaskScheduler; - -constexpr u32 MAX_BUFFER_VIEWS = 3; +class Scheduler; struct StagingBuffer { StagingBuffer(const Instance& instance, u32 size, bool readback); @@ -30,12 +28,14 @@ struct StagingBuffer { }; class StreamBuffer { + static constexpr u32 MAX_BUFFER_VIEWS = 3; + static constexpr u32 BUCKET_COUNT = 8; public: /// Staging only constructor - StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size, + StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback = false); /// Staging + GPU streaming constructor - StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size, + StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, vk::BufferUsageFlagBits usage, std::span views, bool readback = false); ~StreamBuffer(); @@ -55,45 +55,47 @@ public: /// Invalidates staging memory for reading void Invalidate(); - /// Switches to the next available bucket - void SwitchBucket(); - /// Returns the GPU buffer handle - vk::Buffer GetHandle() const { - return buffer; + [[nodiscard]] vk::Buffer GetHandle() const { + return gpu_buffer; } /// Returns the staging buffer handle - vk::Buffer GetStagingHandle() const { + [[nodiscard]] vk::Buffer GetStagingHandle() const { return staging.buffer; } /// Returns an immutable reference to the requested buffer view - const vk::BufferView& GetView(u32 index = 0) const { + [[nodiscard]] const vk::BufferView& GetView(u32 index = 0) const { ASSERT(index < view_count); return views[index]; } private: + /// Moves to the next bucket + void MoveNextBucket(); + struct Bucket { - bool invalid; - u32 fence_counter; - u32 offset; + bool invalid = false; + u32 gpu_tick = 0; + u32 cursor = 0; + u32 flush_cursor = 0; }; +private: const Instance& instance; - TaskScheduler& scheduler; - u32 total_size = 0; + Scheduler& scheduler; StagingBuffer staging; - - vk::Buffer buffer{}; + vk::Buffer gpu_buffer{}; VmaAllocation allocation{}; vk::BufferUsageFlagBits usage; std::array views{}; + std::array buckets; std::size_t view_count = 0; - + u32 total_size = 0; u32 bucket_size = 0; - std::array buckets{}; + u32 bucket_index = 0; + bool readback = false; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 13fc993d0..bcc1600bf 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -8,34 +8,35 @@ #include "core/settings.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" namespace Vulkan { -Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache) - : instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} { - - // Set the surface format early for RenderpassCache to create the present renderpass - Configure(0, 0); +Swapchain::Swapchain(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, + surface{instance.GetSurface()} { + FindPresentFormat(); + SetPresentMode(); renderpass_cache.CreatePresentRenderpass(surface_format.format); } Swapchain::~Swapchain() { - vk::Device device = instance.GetDevice(); - device.destroySwapchainKHR(swapchain); + Destroy(); - for (auto& image : swapchain_images) { - device.destroyImageView(image.image_view); - device.destroyFramebuffer(image.framebuffer); + vk::Device device = instance.GetDevice(); + for (const vk::Semaphore semaphore : image_acquired) { + device.destroySemaphore(semaphore); + } + for (const vk::Semaphore semaphore : present_ready) { + device.destroySemaphore(semaphore); } } void Swapchain::Create(u32 width, u32 height) { is_outdated = false; is_suboptimal = false; - - // Fetch information about the provided surface - Configure(width, height); + SetSurfaceProperties(width, height); const std::array queue_family_indices = { instance.GetGraphicsQueueFamilyIndex(), @@ -59,70 +60,29 @@ void Swapchain::Create(u32 width, u32 height) { .pQueueFamilyIndices = queue_family_indices.data(), .preTransform = transform, .presentMode = present_mode, - .clipped = true, - .oldSwapchain = swapchain}; + .clipped = true}; vk::Device device = instance.GetDevice(); - vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info); + device.waitIdle(); + Destroy(); - // If an old swapchain exists, destroy it and move the new one to its place. - if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) { - device.destroySwapchainKHR(old_swapchain); - } + swapchain = device.createSwapchainKHR(swapchain_info); + SetupImages(); - auto images = device.getSwapchainImagesKHR(swapchain); - - // Destroy the previous image views - for (auto& image : swapchain_images) { - device.destroyImageView(image.image_view); - device.destroyFramebuffer(image.framebuffer); - } - - swapchain_images.clear(); - swapchain_images.resize(images.size()); - - std::transform( - images.begin(), images.end(), swapchain_images.begin(), [device, this](vk::Image image) -> Image { - const vk::ImageViewCreateInfo view_info = { - .image = image, - .viewType = vk::ImageViewType::e2D, - .format = surface_format.format, - .subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1}}; - - vk::ImageView image_view = device.createImageView(view_info); - const std::array attachments = {image_view}; - - const vk::FramebufferCreateInfo framebuffer_info = { - .renderPass = renderpass_cache.GetPresentRenderpass(), - .attachmentCount = 1, - .pAttachments = attachments.data(), - .width = extent.width, - .height = extent.height, - .layers = 1}; - - vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info); - - return Image{.image = image, .image_view = image_view, .framebuffer = framebuffer}; - }); + resource_ticks.clear(); + resource_ticks.resize(image_count); } -// Wait for maximum of 1 second -constexpr u64 ACQUIRE_TIMEOUT = 1000000000; - MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245)); -void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) { +void Swapchain::AcquireNextImage() { if (NeedsRecreation()) [[unlikely]] { return; } MICROPROFILE_SCOPE(Vulkan_Acquire); vk::Device device = instance.GetDevice(); - vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired, - VK_NULL_HANDLE, ¤t_image); + vk::Result result = device.acquireNextImageKHR(swapchain, UINT64_MAX, image_acquired[frame_index], + VK_NULL_HANDLE, &image_index); switch (result) { case vk::Result::eSuccess: break; @@ -133,42 +93,46 @@ void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) { is_outdated = true; break; default: - LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result"); + LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result {}", result); break; } + + scheduler.Wait(resource_ticks[image_index]); + resource_ticks[image_index] = scheduler.CurrentTick(); } MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245)); -void Swapchain::Present(vk::Semaphore wait_for_present) { +void Swapchain::Present() { if (NeedsRecreation()) [[unlikely]] { return; } - MICROPROFILE_SCOPE(Vulkan_Present); - const vk::PresentInfoKHR present_info = {.waitSemaphoreCount = 1, - .pWaitSemaphores = &wait_for_present, - .swapchainCount = 1, - .pSwapchains = &swapchain, - .pImageIndices = ¤t_image}; + scheduler.Record([this, index = image_index](vk::CommandBuffer, vk::CommandBuffer) { + const vk::PresentInfoKHR present_info = {.waitSemaphoreCount = 1, + .pWaitSemaphores = &present_ready[index], + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = &index}; - vk::Queue present_queue = instance.GetPresentQueue(); - try { - [[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info); - } catch (vk::OutOfDateKHRError err) { - is_outdated = true; - } catch (vk::SystemError err) { - LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed"); - UNREACHABLE(); - } + vk::Queue present_queue = instance.GetPresentQueue(); + try { + [[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info); + } catch (vk::OutOfDateKHRError& err) { + is_outdated = true; + } catch (vk::SystemError& err) { + LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed"); + UNREACHABLE(); + } + }); + + frame_index = (frame_index + 1) % image_count; } -void Swapchain::Configure(u32 width, u32 height) { - vk::PhysicalDevice physical = instance.GetPhysicalDevice(); +void Swapchain::FindPresentFormat() { + const std::vector formats = + instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface); - // Choose surface format - auto formats = physical.getSurfaceFormatsKHR(surface); surface_format = formats[0]; - if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { surface_format.format = vk::Format::eB8G8R8A8Unorm; } else { @@ -179,17 +143,19 @@ void Swapchain::Configure(u32 width, u32 height) { if (it == formats.end()) { LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!"); + UNREACHABLE(); } else { surface_format = *it; } } +} - // Checks if a particular mode is supported, if it is, returns that mode. - auto modes = physical.getSurfacePresentModesKHR(surface); - - // FIFO is guaranteed by the Vulkan standard to be available +void Swapchain::SetPresentMode() { present_mode = vk::PresentModeKHR::eFifo; if (!Settings::values.use_vsync_new) { + const std::vector modes = + instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface); + const auto FindMode = [&modes](vk::PresentModeKHR requested) { auto it = std::find_if(modes.begin(), modes.end(), @@ -198,7 +164,7 @@ void Swapchain::Configure(u32 width, u32 height) { return it != modes.end(); }; - // Prefer Immediate when vsync is disabled for fastest acquire + // Prefer immediate when vsync is disabled for fastest acquire if (FindMode(vk::PresentModeKHR::eImmediate)) { present_mode = vk::PresentModeKHR::eImmediate; } else if (FindMode(vk::PresentModeKHR::eMailbox)) { @@ -206,15 +172,18 @@ void Swapchain::Configure(u32 width, u32 height) { } } - // Query surface extent - auto capabilities = physical.getSurfaceCapabilitiesKHR(surface); - extent = capabilities.currentExtent; +} +void Swapchain::SetSurfaceProperties(u32 width, u32 height) { + const vk::SurfaceCapabilitiesKHR capabilities = + instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface); + + extent = capabilities.currentExtent; if (capabilities.currentExtent.width == std::numeric_limits::max()) { extent.width = std::clamp(width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width); - extent.height = std::clamp(height, capabilities.minImageExtent.height, - capabilities.maxImageExtent.height); + extent.height = + std::clamp(height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height); } // Select number of images in swap chain, we prefer one buffer in the background to work on @@ -230,4 +199,54 @@ void Swapchain::Configure(u32 width, u32 height) { } } +void Swapchain::Destroy() { + vk::Device device = instance.GetDevice(); + if (swapchain) { + device.destroySwapchainKHR(swapchain); + } + for (const vk::ImageView view : image_views) { + device.destroyImageView(view); + } + for (const vk::Framebuffer framebuffer : framebuffers) { + device.destroyFramebuffer(framebuffer); + } + + frame_index = 0; + image_acquired.clear(); + framebuffers.clear(); + image_views.clear(); +} + +void Swapchain::SetupImages() { + vk::Device device = instance.GetDevice(); + images = device.getSwapchainImagesKHR(swapchain); + + for (const vk::Image image : images) { + image_acquired.push_back(device.createSemaphore({})); + present_ready.push_back(device.createSemaphore({})); + + const vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = vk::ImageViewType::e2D, + .format = surface_format.format, + .subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1}}; + + image_views.push_back(device.createImageView(view_info)); + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .attachmentCount = 1, + .pAttachments = &image_views.back(), + .width = extent.width, + .height = extent.height, + .layers = 1}; + + framebuffers.push_back(device.createFramebuffer(framebuffer_info)); + } +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index d7b30280d..739eefb58 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -11,77 +11,97 @@ namespace Vulkan { class Instance; +class Scheduler; class RenderpassCache; class Swapchain { public: - Swapchain(const Instance& instance, RenderpassCache& renderpass_cache); + Swapchain(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache); ~Swapchain(); /// Creates (or recreates) the swapchain with a given size. void Create(u32 width, u32 height); /// Acquires the next image in the swapchain. - void AcquireNextImage(vk::Semaphore signal_acquired); + void AcquireNextImage(); /// Presents the current image and move to the next one - void Present(vk::Semaphore wait_for_present); + void Present(); + + /// Returns true when the swapchain should be recreated + [[nodiscard]] bool NeedsRecreation() const { + return is_suboptimal || is_outdated; + } /// Returns current swapchain state - vk::Extent2D GetExtent() const { + [[nodiscard]] vk::Extent2D GetExtent() const { return extent; } /// Returns the swapchain surface - vk::SurfaceKHR GetSurface() const { + [[nodiscard]] vk::SurfaceKHR GetSurface() const { return surface; } /// Returns the current framebuffe - vk::Framebuffer GetFramebuffer() const { - return swapchain_images[current_image].framebuffer; + [[nodiscard]] vk::Framebuffer GetFramebuffer() const { + return framebuffers[frame_index]; } /// Returns the swapchain format - vk::SurfaceFormatKHR GetSurfaceFormat() const { + [[nodiscard]] vk::SurfaceFormatKHR GetSurfaceFormat() const { return surface_format; } /// Returns the Vulkan swapchain handle - vk::SwapchainKHR GetHandle() const { + [[nodiscard]] vk::SwapchainKHR GetHandle() const { return swapchain; } - /// Returns true when the swapchain should be recreated - bool NeedsRecreation() const { - return is_suboptimal || is_outdated; + [[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const { + return image_acquired[frame_index]; + } + + [[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const { + return present_ready[image_index]; } private: - void Configure(u32 width, u32 height); + /// Selects the best available swapchain image format + void FindPresentFormat(); + + /// Sets the best available present mode + void SetPresentMode(); + + /// Sets the surface properties according to device capabilities + void SetSurfaceProperties(u32 width, u32 height); + + /// Destroys current swapchain resources + void Destroy(); + + /// Performs creation of image views and framebuffers from the swapchain images + void SetupImages(); private: const Instance& instance; + Scheduler& scheduler; RenderpassCache& renderpass_cache; vk::SwapchainKHR swapchain{}; vk::SurfaceKHR surface{}; - - // Swapchain properties vk::SurfaceFormatKHR surface_format; vk::PresentModeKHR present_mode; vk::Extent2D extent; vk::SurfaceTransformFlagBitsKHR transform; - u32 image_count; - - struct Image { - vk::Image image; - vk::ImageView image_view; - vk::Framebuffer framebuffer; - }; - - // Swapchain state - std::vector swapchain_images; - u32 current_image = 0; + std::vector images; + std::vector image_views; + std::vector framebuffers; + std::vector resource_ticks; + std::vector image_acquired; + std::vector present_ready; + u32 image_count = 0; + u32 image_index = 0; + u32 frame_index = 0; bool is_outdated = true; bool is_suboptimal = true; }; diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp deleted file mode 100644 index 054c89d31..000000000 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" - -namespace Vulkan { - -TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer) - : instance{instance}, renderer{renderer} { - vk::Device device = instance.GetDevice(); - const vk::CommandPoolCreateInfo command_pool_info = { - .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, - .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()}; - - command_pool = device.createCommandPool(command_pool_info); - - // If supported, prefer timeline semaphores over binary ones - if (instance.IsTimelineSemaphoreSupported()) { - const vk::StructureChain timeline_info = { - vk::SemaphoreCreateInfo{}, - vk::SemaphoreTypeCreateInfo{.semaphoreType = vk::SemaphoreType::eTimeline, - .initialValue = 0}}; - - timeline = device.createSemaphore(timeline_info.get()); - } - - constexpr std::array pool_sizes = { - vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 2048}, - vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 2048}, - vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048}, - vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 4096}, - vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 2048}, - vk::DescriptorPoolSize{vk::DescriptorType::eStorageImage, 1024}}; - - const vk::DescriptorPoolCreateInfo descriptor_pool_info = { - .maxSets = 8192, - .poolSizeCount = static_cast(pool_sizes.size()), - .pPoolSizes = pool_sizes.data()}; - - const vk::CommandBufferAllocateInfo buffer_info = {.commandPool = command_pool, - .level = vk::CommandBufferLevel::ePrimary, - .commandBufferCount = - 2 * SCHEDULER_COMMAND_COUNT}; - - const auto command_buffers = device.allocateCommandBuffers(buffer_info); - for (std::size_t i = 0; i < commands.size(); i++) { - commands[i] = ExecutionSlot{ - .image_acquired = device.createSemaphore({}), - .present_ready = device.createSemaphore({}), - .fence = device.createFence({}), - .descriptor_pool = device.createDescriptorPool(descriptor_pool_info), - .render_command_buffer = command_buffers[2 * i], - .upload_command_buffer = command_buffers[2 * i + 1], - }; - } - - const vk::CommandBufferBeginInfo begin_info = { - .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit}; - - // Begin first command - auto& command = commands[current_command]; - command.render_command_buffer.begin(begin_info); - command.fence_counter = next_fence_counter++; -} - -TaskScheduler::~TaskScheduler() { - vk::Device device = instance.GetDevice(); - device.waitIdle(); - - if (timeline) { - device.destroySemaphore(timeline); - } - - for (const auto& command : commands) { - device.destroyFence(command.fence); - device.destroySemaphore(command.image_acquired); - device.destroySemaphore(command.present_ready); - device.destroyDescriptorPool(command.descriptor_pool); - } - - device.destroyCommandPool(command_pool); -} - -MICROPROFILE_DEFINE(Vulkan_Synchronize, "Vulkan", "Scheduler Synchronize", MP_RGB(100, 52, 235)); -void TaskScheduler::Synchronize(u32 slot) { - const auto& command = commands[slot]; - vk::Device device = instance.GetDevice(); - - const u64 completed_counter = GetFenceCounter(); - if (command.fence_counter > completed_counter) { - MICROPROFILE_SCOPE(Vulkan_Synchronize); - if (instance.IsTimelineSemaphoreSupported()) { - const vk::SemaphoreWaitInfo wait_info = { - .semaphoreCount = 1, .pSemaphores = &timeline, .pValues = &command.fence_counter}; - - if (device.waitSemaphores(wait_info, UINT64_MAX) != vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", - command.fence_counter); - UNREACHABLE(); - } - - } else if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) { - LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter); - UNREACHABLE(); - } - completed_fence_counter = command.fence_counter; - } - - device.resetFences(command.fence); - device.resetDescriptorPool(command.descriptor_pool); -} - -MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Scheduler Queue Submit", MP_RGB(66, 245, 170)); -void TaskScheduler::Submit(SubmitMode mode) { - if (False(mode & SubmitMode::Shutdown)) { - renderer.FlushBuffers(); - } - - const auto& command = commands[current_command]; - command.render_command_buffer.end(); - if (command.use_upload_buffer) { - command.upload_command_buffer.end(); - } - - u32 command_buffer_count = 0; - std::array command_buffers; - - if (command.use_upload_buffer) { - command_buffers[command_buffer_count++] = command.upload_command_buffer; - } - - command_buffers[command_buffer_count++] = command.render_command_buffer; - - const auto QueueSubmit = [this](const vk::SubmitInfo& info, vk::Fence fence) { - MICROPROFILE_SCOPE(Vulkan_Submit); - - try { - vk::Queue queue = instance.GetGraphicsQueue(); - queue.submit(info, fence); - } catch (vk::DeviceLostError& err) { - LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); - UNREACHABLE(); - } - }; - - const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced); - if (instance.IsTimelineSemaphoreSupported()) { - const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u; - const std::array wait_values{command.fence_counter - 1, u64(1)}; - const std::array wait_semaphores{timeline, command.image_acquired}; - - const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u; - const std::array signal_values{command.fence_counter, u64(0)}; - const std::array signal_semaphores{timeline, command.present_ready}; - - const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = { - .waitSemaphoreValueCount = wait_semaphore_count, - .pWaitSemaphoreValues = wait_values.data(), - .signalSemaphoreValueCount = signal_semaphore_count, - .pSignalSemaphoreValues = signal_values.data()}; - - const std::array wait_stage_masks = { - vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eColorAttachmentOutput, - }; - - const vk::SubmitInfo submit_info = { - .pNext = &timeline_si, - .waitSemaphoreCount = wait_semaphore_count, - .pWaitSemaphores = wait_semaphores.data(), - .pWaitDstStageMask = wait_stage_masks.data(), - .commandBufferCount = command_buffer_count, - .pCommandBuffers = command_buffers.data(), - .signalSemaphoreCount = signal_semaphore_count, - .pSignalSemaphores = signal_semaphores.data(), - }; - - QueueSubmit(submit_info, command.fence); - } else { - const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u; - const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u; - const vk::PipelineStageFlags wait_stage_masks = - vk::PipelineStageFlagBits::eColorAttachmentOutput; - - const vk::SubmitInfo submit_info = { - .waitSemaphoreCount = wait_semaphore_count, - .pWaitSemaphores = &command.image_acquired, - .pWaitDstStageMask = &wait_stage_masks, - .commandBufferCount = command_buffer_count, - .pCommandBuffers = command_buffers.data(), - .signalSemaphoreCount = signal_semaphore_count, - .pSignalSemaphores = &command.present_ready, - }; - - QueueSubmit(submit_info, command.fence); - } - - // Block host until the GPU catches up - if (True(mode & SubmitMode::Flush)) { - Synchronize(current_command); - } - - // Switch to next cmdbuffer. - if (False(mode & SubmitMode::Shutdown)) { - SwitchSlot(); - renderer.OnSlotSwitch(); - } -} - -u64 TaskScheduler::GetFenceCounter() const { - vk::Device device = instance.GetDevice(); - if (instance.IsTimelineSemaphoreSupported()) { - return device.getSemaphoreCounterValue(timeline); - } - - return completed_fence_counter; -} - -vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() { - auto& command = commands[current_command]; - if (!command.use_upload_buffer) { - const vk::CommandBufferBeginInfo begin_info = { - .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit}; - - command.upload_command_buffer.begin(begin_info); - command.use_upload_buffer = true; - } - - return command.upload_command_buffer; -} - -void TaskScheduler::SwitchSlot() { - current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT; - auto& command = commands[current_command]; - - // Wait for the GPU to finish with all resources for this command. - Synchronize(current_command); - - const vk::CommandBufferBeginInfo begin_info = { - .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit}; - - // Begin the next command buffer. - command.render_command_buffer.begin(begin_info); - command.fence_counter = next_fence_counter++; - command.use_upload_buffer = false; -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h deleted file mode 100644 index b6c296d8a..000000000 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include "common/common_funcs.h" -#include "common/common_types.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { - -class Buffer; -class Instance; -class RendererVulkan; - -enum class SubmitMode : u8 { - SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain - Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads - Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer -}; - -DECLARE_ENUM_FLAG_OPERATORS(SubmitMode); - -class TaskScheduler { -public: - TaskScheduler(const Instance& instance, RendererVulkan& renderer); - ~TaskScheduler(); - - /// Blocks the host until the current command completes execution - void Synchronize(u32 slot); - - /// Submits the current command to the graphics queue - void Submit(SubmitMode mode); - - /// Returns the last completed fence counter - u64 GetFenceCounter() const; - - /// Returns the command buffer used for early upload operations. - vk::CommandBuffer GetUploadCommandBuffer(); - - /// Returns the command buffer used for rendering - vk::CommandBuffer GetRenderCommandBuffer() const { - return commands[current_command].render_command_buffer; - } - - /// Returns the current descriptor pool - vk::DescriptorPool GetDescriptorPool() const { - return commands[current_command].descriptor_pool; - } - - /// Returns the index of the current command slot - u32 GetCurrentSlotIndex() const { - return current_command; - } - - u64 GetHostFenceCounter() const { - return next_fence_counter - 1; - } - - vk::Semaphore GetImageAcquiredSemaphore() const { - return commands[current_command].image_acquired; - } - - vk::Semaphore GetPresentReadySemaphore() const { - return commands[current_command].present_ready; - } - -private: - /// Activates the next command slot and optionally waits for its completion - void SwitchSlot(); - -private: - const Instance& instance; - RendererVulkan& renderer; - u64 next_fence_counter = 1; - u64 completed_fence_counter = 0; - - struct ExecutionSlot { - bool use_upload_buffer = false; - u64 fence_counter = 0; - vk::Semaphore image_acquired; - vk::Semaphore present_ready; - vk::Fence fence; - vk::DescriptorPool descriptor_pool; - vk::CommandBuffer render_command_buffer; - vk::CommandBuffer upload_command_buffer; - }; - - vk::CommandPool command_pool{}; - vk::Semaphore timeline{}; - std::array commands{}; - u32 current_command = 0; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index e91620783..7c8ff4376 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -8,7 +8,7 @@ #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" #include @@ -33,13 +33,40 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) { return vk::ImageAspectFlagBits::eColor; } +u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { + u32 depth_offset = 0; + u32 stencil_offset = 4 * data.size / 5; + const auto& mapped = data.mapped; + + switch (dest) { + case vk::Format::eD24UnormS8Uint: { + for (; stencil_offset < data.size; depth_offset += 4) { + std::byte* ptr = mapped.data() + depth_offset; + const u32 d24s8 = VideoCore::MakeInt(ptr); + const u32 d24 = d24s8 >> 8; + mapped[stencil_offset] = static_cast(d24s8 & 0xFF); + std::memcpy(ptr, &d24, 4); + stencil_offset++; + } + break; + } + default: + LOG_ERROR(Render_Vulkan, "Unimplemtend convertion for depth format {}", + vk::to_string(dest)); + UNREACHABLE(); + } + + ASSERT(depth_offset == 4 * data.size / 5); + return depth_offset; +} + constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; -TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler, - RenderpassCache& renderpass_cache) - : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, - blit_helper{instance, scheduler}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE}, +TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager}, + blit_helper{instance, scheduler, desc_manager}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE}, download_buffer{instance, scheduler, DOWNLOAD_BUFFER_SIZE, true} { auto Register = [this](VideoCore::PixelFormat dest, @@ -49,7 +76,7 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule }; Register(VideoCore::PixelFormat::RGBA8, - std::make_unique(instance, scheduler, *this)); + std::make_unique(instance, scheduler, desc_manager, *this)); } TextureRuntime::~TextureRuntime() { @@ -98,7 +125,7 @@ MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, void TextureRuntime::Finish() { MICROPROFILE_SCOPE(Vulkan_Finish); renderpass_cache.ExitRenderpass(); - scheduler.Submit(SubmitMode::Flush); + scheduler.Finish(); download_buffer.Invalidate(); } @@ -295,36 +322,33 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea vk::ClearDepthStencilValue{.depth = value.depth, .stencil = value.stencil}; } - // For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage if (clear.texture_rect == surface.GetScaledRect()) { - const vk::ImageSubresourceRange range = {.aspectMask = aspect, - .baseMipLevel = clear.texture_level, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1}; + scheduler.Record( + [aspect, image = surface.alloc.image, clear_value, clear](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageSubresourceRange range = {.aspectMask = aspect, + .baseMipLevel = clear.texture_level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1}; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - if (aspect & vk::ImageAspectFlagBits::eColor) { - command_buffer.clearColorImage(surface.alloc.image, - vk::ImageLayout::eTransferDstOptimal, clear_value.color, - range); - } else if (aspect & vk::ImageAspectFlagBits::eDepth || - aspect & vk::ImageAspectFlagBits::eStencil) { - command_buffer.clearDepthStencilImage(surface.alloc.image, - vk::ImageLayout::eTransferDstOptimal, - clear_value.depthStencil, range); - } + if (aspect & vk::ImageAspectFlagBits::eColor) { + render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_value.color, + range); + } else if (aspect & vk::ImageAspectFlagBits::eDepth || + aspect & vk::ImageAspectFlagBits::eStencil) { + render_cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, + clear_value.depthStencil, range); + } + }); } else { - // For partial clears we begin a clear renderpass with the appropriate render area - vk::RenderPass clear_renderpass{}; + vk::RenderPass clear_renderpass; if (aspect & vk::ImageAspectFlagBits::eColor) { clear_renderpass = renderpass_cache.GetRenderpass( surface.pixel_format, VideoCore::PixelFormat::Invalid, true); surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1); - } else if (aspect & vk::ImageAspectFlagBits::eDepth || - aspect & vk::ImageAspectFlagBits::eStencil) { - clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, - surface.pixel_format, true); + } else if (aspect & vk::ImageAspectFlagBits::eDepth) { + clear_renderpass = renderpass_cache.GetRenderpass( + VideoCore::PixelFormat::Invalid, surface.pixel_format, true); surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1); } @@ -344,17 +368,17 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea it->second = device.createFramebuffer(framebuffer_info); } - const vk::RenderPassBeginInfo clear_begin_info = { - .renderPass = clear_renderpass, + const RenderpassState clear_info = { + .renderpass = clear_renderpass, .framebuffer = it->second, - .renderArea = vk::Rect2D{.offset = {static_cast(clear.texture_rect.left), - static_cast(clear.texture_rect.bottom)}, - .extent = {clear.texture_rect.GetWidth(), - clear.texture_rect.GetHeight()}}, - .clearValueCount = 1, - .pClearValues = &clear_value}; + .render_area = vk::Rect2D{.offset = {static_cast(clear.texture_rect.left), + static_cast(clear.texture_rect.bottom)}, + .extent = {clear.texture_rect.GetWidth(), + clear.texture_rect.GetHeight()}}, + .clear = clear_value + }; - renderpass_cache.EnterRenderpass(clear_begin_info); + renderpass_cache.EnterRenderpass(clear_info); renderpass_cache.ExitRenderpass(); } @@ -368,22 +392,24 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1); dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1); - const vk::ImageCopy image_copy = { - .srcSubresource = {.aspectMask = ToVkAspect(source.type), - .mipLevel = copy.src_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), 0}, - .dstSubresource = {.aspectMask = ToVkAspect(dest.type), - .mipLevel = copy.dst_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), 0}, - .extent = {copy.extent.width, copy.extent.height, 1}}; + scheduler.Record([src_image = source.alloc.image, src_type = source.type, + dst_image = dest.alloc.image, dst_type = dest.type, copy](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const vk::ImageCopy image_copy = { + .srcSubresource = {.aspectMask = ToVkAspect(src_type), + .mipLevel = copy.src_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .srcOffset = {static_cast(copy.src_offset.x), static_cast(copy.src_offset.y), 0}, + .dstSubresource = {.aspectMask = ToVkAspect(dst_type), + .mipLevel = copy.dst_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .dstOffset = {static_cast(copy.dst_offset.x), static_cast(copy.dst_offset.y), 0}, + .extent = {copy.extent.width, copy.extent.height, 1}}; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, - dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy); + render_cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, + dst_image, vk::ImageLayout::eTransferDstOptimal, image_copy); + }); return true; } @@ -395,45 +421,47 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1); dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1); - const std::array source_offsets = {vk::Offset3D{static_cast(blit.src_rect.left), - static_cast(blit.src_rect.bottom), 0}, - vk::Offset3D{static_cast(blit.src_rect.right), - static_cast(blit.src_rect.top), 1}}; + scheduler.Record([src_iamge = source.alloc.image, src_type = source.type, + dst_image = dest.alloc.image, dst_type = dest.type, + format = source.pixel_format, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + const std::array source_offsets = {vk::Offset3D{static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.src_rect.right), + static_cast(blit.src_rect.top), 1}}; - const std::array dest_offsets = {vk::Offset3D{static_cast(blit.dst_rect.left), - static_cast(blit.dst_rect.bottom), 0}, - vk::Offset3D{static_cast(blit.dst_rect.right), - static_cast(blit.dst_rect.top), 1}}; + const std::array dest_offsets = {vk::Offset3D{static_cast(blit.dst_rect.left), + static_cast(blit.dst_rect.bottom), 0}, + vk::Offset3D{static_cast(blit.dst_rect.right), + static_cast(blit.dst_rect.top), 1}}; - const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = ToVkAspect(source.type), - .mipLevel = blit.src_level, - .baseArrayLayer = blit.src_layer, - .layerCount = 1}, - .srcOffsets = source_offsets, - .dstSubresource = {.aspectMask = ToVkAspect(dest.type), - .mipLevel = blit.dst_level, - .baseArrayLayer = blit.dst_layer, - .layerCount = 1}, - .dstOffsets = dest_offsets}; + const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = ToVkAspect(src_type), + .mipLevel = blit.src_level, + .baseArrayLayer = blit.src_layer, + .layerCount = 1}, + .srcOffsets = source_offsets, + .dstSubresource = {.aspectMask = ToVkAspect(dst_type), + .mipLevel = blit.dst_level, + .baseArrayLayer = blit.dst_layer, + .layerCount = 1}, + .dstOffsets = dest_offsets}; - // Don't use linear filtering on depth attachments - const VideoCore::PixelFormat format = source.pixel_format; - const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 || - format == VideoCore::PixelFormat::D24 || - format == VideoCore::PixelFormat::D16 - ? vk::Filter::eNearest - : vk::Filter::eLinear; + // Don't use linear filtering on depth attachments + const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 || + format == VideoCore::PixelFormat::D24 || + format == VideoCore::PixelFormat::D16 + ? vk::Filter::eNearest + : vk::Filter::eLinear; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal, - dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area, - filtering); + render_cmdbuf.blitImage(src_iamge, vk::ImageLayout::eTransferSrcOptimal, + dst_image, vk::ImageLayout::eTransferDstOptimal, blit_area, + filtering); + }); return true; } void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { - renderpass_cache.ExitRenderpass(); + /*renderpass_cache.ExitRenderpass(); // TODO: Investigate AMD single pass downsampler s32 current_width = surface.GetScaledWidth(); @@ -467,7 +495,7 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal, surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eLinear); - } + }*/ } const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( @@ -483,8 +511,7 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const { !traits.attachment_support); } -void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, - vk::ImageLayout new_layout, u32 level, u32 level_count) { +void TextureRuntime::Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count) { LayoutTracker& tracker = alloc.tracker; if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) { return; @@ -566,22 +593,26 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al LayoutInfo dest = GetLayoutInfo(new_layout); tracker.ForEachLayoutRange( level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) { + scheduler.Record([old_layout, new_layout, dest, start, count, + image = alloc.image, aspect = alloc.aspect, + layers = alloc.layers, GetLayoutInfo](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { LayoutInfo source = GetLayoutInfo(old_layout); - const vk::ImageMemoryBarrier barrier = { - .srcAccessMask = source.access, - .dstAccessMask = dest.access, - .oldLayout = old_layout, - .newLayout = new_layout, - .image = alloc.image, - .subresourceRange = {.aspectMask = alloc.aspect, - .baseMipLevel = start, - .levelCount = count, - .baseArrayLayer = 0, - .layerCount = alloc.layers}}; + const vk::ImageMemoryBarrier barrier = { + .srcAccessMask = source.access, + .dstAccessMask = dest.access, + .oldLayout = old_layout, + .newLayout = new_layout, + .image = image, + .subresourceRange = {.aspectMask = aspect, + .baseMipLevel = start, + .levelCount = count, + .baseArrayLayer = 0, + .layerCount = layers}}; - command_buffer.pipelineBarrier(source.stage, dest.stage, - vk::DependencyFlagBits::eByRegion, {}, {}, barrier); - }); + render_cmdbuf.pipelineBarrier(source.stage, dest.stage, + vk::DependencyFlagBits::eByRegion, {}, {}, barrier); + }); + }); tracker.SetLayout(new_layout, level, level_count); for (u32 i = 0; i < level_count; i++) { @@ -625,8 +656,7 @@ Surface::~Surface() { } void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) { - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - runtime.Transition(command_buffer, alloc, new_layout, level, level_count); + runtime.Transition(alloc, new_layout, level, level_count); } MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64)); @@ -644,40 +674,40 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa if (is_scaled) { ScaledUpload(upload, staging); } else { - u32 region_count = 0; - std::array copy_regions; - - const VideoCore::Rect2D rect = upload.texture_rect; - vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset + upload.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource = {.aspectMask = alloc.aspect, - .mipLevel = upload.texture_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; - - if (alloc.aspect & vk::ImageAspectFlagBits::eColor) { - copy_regions[region_count++] = copy_region; - } else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) { - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; - copy_regions[region_count++] = copy_region; - - if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) { - copy_region.bufferOffset += UnpackDepthStencil(staging); - copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; - copy_regions[region_count++] = copy_region; - } - } - Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1); + scheduler.Record([aspect = alloc.aspect, image = alloc.image, + format = alloc.format, staging, upload](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + u32 region_count = 0; + std::array copy_regions; - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.copyBufferToImage(staging.buffer, alloc.image, - vk::ImageLayout::eTransferDstOptimal, region_count, - copy_regions.data()); + const VideoCore::Rect2D rect = upload.texture_rect; + vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset + upload.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = {.aspectMask = aspect, + .mipLevel = upload.texture_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; + + if (aspect & vk::ImageAspectFlagBits::eColor) { + copy_regions[region_count++] = copy_region; + } else if (aspect & vk::ImageAspectFlagBits::eDepth) { + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth; + copy_regions[region_count++] = copy_region; + + if (aspect & vk::ImageAspectFlagBits::eStencil) { + copy_region.bufferOffset += UnpackDepthStencil(staging, format); + copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil; + copy_regions[region_count++] = copy_region; + } + } + + render_cmdbuf.copyBufferToImage(staging.buffer, image, vk::ImageLayout::eTransferDstOptimal, + region_count, copy_regions.data()); + }); } InvalidateAllWatcher(); @@ -703,24 +733,24 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi if (is_scaled) { ScaledDownload(download, staging); } else { - const VideoCore::Rect2D rect = download.texture_rect; - const vk::BufferImageCopy copy_region = { - .bufferOffset = staging.buffer_offset + download.buffer_offset, - .bufferRowLength = rect.GetWidth(), - .bufferImageHeight = rect.GetHeight(), - .imageSubresource = {.aspectMask = alloc.aspect, - .mipLevel = download.texture_level, - .baseArrayLayer = 0, - .layerCount = 1}, - .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, - .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; - Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1); + scheduler.Record([aspect = alloc.aspect, image = alloc.image, + staging, download](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer){ + const VideoCore::Rect2D rect = download.texture_rect; + const vk::BufferImageCopy copy_region = { + .bufferOffset = staging.buffer_offset + download.buffer_offset, + .bufferRowLength = rect.GetWidth(), + .bufferImageHeight = rect.GetHeight(), + .imageSubresource = {.aspectMask = aspect, + .mipLevel = download.texture_level, + .baseArrayLayer = 0, + .layerCount = 1}, + .imageOffset = {static_cast(rect.left), static_cast(rect.bottom), 0}, + .imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}}; - // Copy pixel data to the staging buffer - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal, - staging.buffer, copy_region); + render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, + staging.buffer, copy_region); + }); } // Lock this data until the next scheduler switch @@ -852,31 +882,4 @@ void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download, r32_surface.Download(r32_download, staging); } -u32 Surface::UnpackDepthStencil(const StagingData& data) { - u32 depth_offset = 0; - u32 stencil_offset = 4 * data.size / 5; - const auto& mapped = data.mapped; - - switch (alloc.format) { - case vk::Format::eD24UnormS8Uint: { - for (; stencil_offset < data.size; depth_offset += 4) { - std::byte* ptr = mapped.data() + depth_offset; - const u32 d24s8 = VideoCore::MakeInt(ptr); - const u32 d24 = d24s8 >> 8; - mapped[stencil_offset] = static_cast(d24s8 & 0xFF); - std::memcpy(ptr, &d24, 4); - stencil_offset++; - } - break; - } - default: - LOG_ERROR(Render_Vulkan, "Unimplemtend convertion for depth format {}", - vk::to_string(alloc.format)); - UNREACHABLE(); - } - - ASSERT(depth_offset == 4 * data.size / 5); - return depth_offset; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 1e3689869..191a54567 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -14,7 +14,6 @@ #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_layout_tracker.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include "video_core/renderer_vulkan/vk_task_scheduler.h" namespace Vulkan { @@ -78,6 +77,7 @@ namespace Vulkan { class Instance; class RenderpassCache; +class DescriptorManager; class Surface; /** @@ -88,10 +88,16 @@ class TextureRuntime { friend class Surface; public: - TextureRuntime(const Instance& instance, TaskScheduler& scheduler, - RenderpassCache& renderpass_cache); + TextureRuntime(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorManager& desc_manager); ~TextureRuntime(); + /// Causes a GPU command flush + void Finish(); + + /// Takes back ownership of the allocation for recycling + void Recycle(const HostTextureTag tag, ImageAlloc&& alloc); + /// Maps an internal staging buffer of the provided size of pixel uploads/downloads [[nodiscard]] StagingData FindStaging(u32 size, bool upload); @@ -104,22 +110,12 @@ public: VideoCore::TextureType type, vk::Format format, vk::ImageUsageFlags usage); - /// Flushes staging buffers - void FlushBuffers(); - - /// Causes a GPU command flush - void Finish(); - - /// Takes back ownership of the allocation for recycling - void Recycle(const HostTextureTag tag, ImageAlloc&& alloc); - /// Performs required format convertions on the staging data void FormatConvert(const Surface& surface, bool upload, std::span source, std::span dest); /// Transitions the mip level range of the surface to new_layout - void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout, - u32 level, u32 level_count); + void Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count); /// Fills the rectangle of the texture with the clear value provided bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, @@ -134,6 +130,9 @@ public: /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(Surface& surface, u32 max_level); + /// Flushes staging buffers + void FlushBuffers(); + /// Returns all source formats that support reinterpretation to the dest format [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( VideoCore::PixelFormat dest_format) const; @@ -148,14 +147,15 @@ private: } /// Returns the current Vulkan scheduler - TaskScheduler& GetScheduler() const { + Scheduler& GetScheduler() const { return scheduler; } private: const Instance& instance; - TaskScheduler& scheduler; + Scheduler& scheduler; RenderpassCache& renderpass_cache; + DescriptorManager& desc_manager; BlitHelper blit_helper; StreamBuffer upload_buffer; StreamBuffer download_buffer; @@ -235,13 +235,10 @@ private: void DepthStencilDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging); - /// Unpacks packed D24S8 data to facilitate depth upload - u32 UnpackDepthStencil(const StagingData& data); - private: TextureRuntime& runtime; const Instance& instance; - TaskScheduler& scheduler; + Scheduler& scheduler; public: ImageAlloc alloc;