From 5c401b8ea017e310c85ae45439cfc7dc3a16523a Mon Sep 17 00:00:00 2001 From: GPUCode Date: Tue, 31 Jan 2023 22:40:08 +0200 Subject: [PATCH] renderer_vulkan: Async presentation * This rewrites a large portion of the presentation engine to be more thread safe and moves all swapchain usage to the presentation thread. Previously acquires were done on the main thread which required the next frame to wait for the previous one to finish presenting * The new implementation is based on the OpenGL mailbox system, simplified. The screens are drawn on separate render frames that get sent to the presentation thread to be presented. Queue access is now thread safe as well. --- .../src/main/jni/emu_window/emu_window_vk.cpp | 4 + src/citra_qt/bootmanager.cpp | 18 +- src/core/frontend/emu_window.h | 35 +- src/video_core/CMakeLists.txt | 2 + .../renderer_vulkan/renderer_vulkan.cpp | 556 ++++++++++++------ .../renderer_vulkan/renderer_vulkan.h | 18 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 26 +- .../renderer_vulkan/vk_rasterizer.cpp | 7 + .../renderer_vulkan/vk_renderpass_cache.cpp | 2 +- .../renderer_vulkan/vk_renderpass_cache.h | 2 +- .../renderer_vulkan/vk_scheduler.cpp | 4 +- src/video_core/renderer_vulkan/vk_scheduler.h | 6 + .../renderer_vulkan/vk_shader_gen.cpp | 85 +-- .../renderer_vulkan/vk_shader_gen.h | 12 +- .../renderer_vulkan/vk_swapchain.cpp | 116 ++-- src/video_core/renderer_vulkan/vk_swapchain.h | 21 +- .../renderer_vulkan/vk_texture_mailbox.cpp | 170 ++++++ .../renderer_vulkan/vk_texture_mailbox.h | 66 +++ 18 files changed, 798 insertions(+), 352 deletions(-) create mode 100644 src/video_core/renderer_vulkan/vk_texture_mailbox.cpp create mode 100644 src/video_core/renderer_vulkan/vk_texture_mailbox.h diff --git a/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp index 7f680d3b8..58c07f6df 100644 --- a/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp +++ b/src/android/app/src/main/jni/emu_window/emu_window_vk.cpp @@ -58,4 +58,8 @@ void EmuWindow_Android_Vulkan::TryPresenting() { return; } } + + if (VideoCore::g_renderer) { + VideoCore::g_renderer->TryPresent(0); + } } \ No newline at end of file diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index d53cc2875..991c9ad95 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -312,9 +312,23 @@ private: class VulkanRenderWidget : public RenderWidget { public: - explicit VulkanRenderWidget(GRenderWindow* parent) : RenderWidget(parent) { + explicit VulkanRenderWidget(GRenderWindow* parent, bool is_secondary) + : RenderWidget(parent), is_secondary(is_secondary) { windowHandle()->setSurfaceType(QWindow::VulkanSurface); } + + void Present() override { + if (!isVisible()) { + return; + } + if (!Core::System::GetInstance().IsPoweredOn()) { + return; + } + VideoCore::g_renderer->TryPresent(100, is_secondary); + } + +private: + bool is_secondary; }; static Frontend::WindowSystemType GetWindowSystemType() { @@ -656,7 +670,7 @@ bool GRenderWindow::InitializeOpenGL() { } bool GRenderWindow::InitializeVulkan() { - auto child = new VulkanRenderWidget(this); + auto child = new VulkanRenderWidget(this, is_secondary); child_widget = child; child_widget->windowHandle()->create(); main_context = std::make_unique(); diff --git a/src/core/frontend/emu_window.h b/src/core/frontend/emu_window.h index 61846a8a5..65e7f6ce0 100644 --- a/src/core/frontend/emu_window.h +++ b/src/core/frontend/emu_window.h @@ -36,32 +36,37 @@ class TextureMailbox { public: virtual ~TextureMailbox() = default; - /** - * Recreate the render objects attached to this frame with the new specified width/height - */ - virtual void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) = 0; - - /** - * Recreate the presentation objects attached to this frame with the new specified width/height - */ - virtual void ReloadPresentFrame(Frontend::Frame* frame, u32 width, u32 height) = 0; - /** * Render thread calls this to get an available frame to present */ virtual Frontend::Frame* GetRenderFrame() = 0; - /** - * Render thread calls this after draw commands are done to add to the presentation mailbox - */ - virtual void ReleaseRenderFrame(Frame* frame) = 0; - /** * Presentation thread calls this to get the latest frame available to present. If there is no * frame available after timeout, returns the previous frame. If there is no previous frame it * returns nullptr */ virtual Frontend::Frame* TryGetPresentFrame(int timeout_ms) = 0; + + /** + * Recreate the render objects attached to this frame with the new specified width/height + */ + virtual void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) {} + + /** + * Recreate the presentation objects attached to this frame with the new specified width/height + */ + virtual void ReloadPresentFrame(Frontend::Frame* frame, u32 width, u32 height) {} + + /** + * Render thread calls this after draw commands are done to add to the presentation mailbox + */ + virtual void ReleaseRenderFrame(Frame* frame) {} + + /** + * Presentation thread calls this after presentation to free the render frame + */ + virtual void ReleasePresentFrame(Frame* frame) {} }; /** diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5fc9d2e2b..c71d4fb19 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -121,6 +121,8 @@ add_library(video_core STATIC renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp renderer_vulkan/vk_swapchain.h + renderer_vulkan/vk_texture_mailbox.cpp + renderer_vulkan/vk_texture_mailbox.h renderer_vulkan/vk_texture_runtime.cpp renderer_vulkan/vk_texture_runtime.h shader/debug_data.h diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 3ca3cbed0..69ee49350 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -8,7 +8,6 @@ #include "common/logging/log.h" #include "common/settings.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/frontend/framebuffer_layout.h" #include "core/hw/gpu.h" #include "core/hw/hw.h" @@ -18,6 +17,7 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_platform.h" #include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/renderer_vulkan/vk_texture_mailbox.h" #include "video_core/video_core.h" #include "video_core/host_shaders/vulkan_present_anaglyph_frag_spv.h" @@ -27,6 +27,10 @@ #include +MICROPROFILE_DEFINE(Vulkan_RenderFrame, "Vulkan", "Render Frame", MP_RGB(128, 128, 64)); +MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_SwapchainCopy, "Vulkan", "Swapchain Copy", MP_RGB(64, 64, 0)); + namespace Vulkan { /** @@ -109,7 +113,7 @@ RendererVulkan::RendererVulkan(Frontend::EmuWindow& window, Frontend::EmuWindow* VERTEX_BUFFER_SIZE}, rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} { Report(); - window.mailbox = nullptr; + window.mailbox = std::make_unique(instance, swapchain, renderpass_cache); } RendererVulkan::~RendererVulkan() { @@ -137,6 +141,8 @@ RendererVulkan::~RendererVulkan() { runtime.Recycle(tag, std::move(info.texture.alloc)); } + + render_window.mailbox.reset(); } VideoCore::ResultStatus RendererVulkan::Init() { @@ -191,6 +197,77 @@ void RendererVulkan::PrepareRendertarget() { } } +void RendererVulkan::RenderToMailbox(const Layout::FramebufferLayout& layout, + std::unique_ptr& mailbox, + bool flipped) { + const vk::Device device = instance.GetDevice(); + Frontend::Frame* frame; + { + MICROPROFILE_SCOPE(Vulkan_WaitPresent); + frame = mailbox->GetRenderFrame(); + std::scoped_lock lock{frame->fence_mutex}; + [[maybe_unused]] vk::Result result = + device.waitForFences(frame->present_done, false, std::numeric_limits::max()); + device.resetFences(frame->present_done); + } + + { + MICROPROFILE_SCOPE(Vulkan_RenderFrame); + + const auto [width, height] = swapchain.GetExtent(); + if (width != frame->width || height != frame->height) { + LOG_INFO(Render_Vulkan, "Reloading render frame"); + mailbox->ReloadRenderFrame(frame, width, height); + } + + scheduler.Record([layout](vk::CommandBuffer cmdbuf) { + const vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = static_cast(layout.width), + .height = static_cast(layout.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + + const vk::Rect2D scissor = { + .offset = {0, 0}, + .extent = {layout.width, layout.height}, + }; + + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); + }); + + renderpass_cache.ExitRenderpass(); + + scheduler.Record([this, framebuffer = frame->framebuffer, width = frame->width, + height = frame->height](vk::CommandBuffer cmdbuf) { + const vk::ClearValue clear{.color = clear_color}; + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = framebuffer, + .renderArea = + vk::Rect2D{ + .offset = {0, 0}, + .extent = {width, height}, + }, + .clearValueCount = 1, + .pClearValues = &clear, + }; + + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); + }); + + DrawScreens(layout, flipped); + + scheduler.Flush(frame->render_ready); + scheduler.Record( + [&mailbox, frame](vk::CommandBuffer) { mailbox->ReleaseRenderFrame(frame); }); + scheduler.DispatchWork(); + } +} + void RendererVulkan::BeginRendering() { vk::Device device = instance.GetDevice(); @@ -215,26 +292,6 @@ void RendererVulkan::BeginRendering() { cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0, set, {}); }); - - renderpass_cache.ExitRenderpass(); - - scheduler.Record([this, framebuffer = swapchain.GetFramebuffer(), - extent = swapchain.GetExtent()](vk::CommandBuffer cmdbuf) { - const vk::ClearValue clear{.color = clear_color}; - const vk::RenderPassBeginInfo renderpass_begin_info = { - .renderPass = renderpass_cache.GetPresentRenderpass(), - .framebuffer = framebuffer, - .renderArea = - vk::Rect2D{ - .offset = {0, 0}, - .extent = extent, - }, - .clearValueCount = 1, - .pClearValues = &clear, - }; - - cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); - }); } void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer, @@ -255,27 +312,12 @@ void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram int bpp = GPU::Regs::BytesPerPixel(framebuffer.color_format); std::size_t pixel_stride = framebuffer.stride / bpp; - // OpenGL only supports specifying a stride in units of pixels, not bytes, unfortunately ASSERT(pixel_stride * bpp == framebuffer.stride); - - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default - // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); if (!rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { ASSERT(false); - // Reset the screen info's display texture to its own permanent texture - /*screen_info.display_texture = &screen_info.texture; - screen_info.display_texcoords = Common::Rectangle(0.f, 0.f, 1.f, 1.f); - - Memory::RasterizerFlushRegion(framebuffer_addr, framebuffer.stride * framebuffer.height); - - vk::Rect2D region{{0, 0}, {framebuffer.width, framebuffer.height}}; - std::span framebuffer_data(VideoCore::g_memory->GetPhysicalPointer(framebuffer_addr), - screen_info.texture.GetSize()); - - screen_info.texture.Upload(0, 1, pixel_stride, region, framebuffer_data);*/ } } @@ -328,7 +370,7 @@ void RendererVulkan::BuildLayouts() { .pBindings = present_layout_bindings.data(), }; - vk::Device device = instance.GetDevice(); + const vk::Device device = instance.GetDevice(); present_descriptor_layout = device.createDescriptorSetLayout(present_layout_info); const std::array update_template_entries = { @@ -911,44 +953,165 @@ void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool f scheduler.Record([](vk::CommandBuffer cmdbuf) { cmdbuf.endRenderPass(); }); } +void RendererVulkan::TryPresent(int timeout_ms, bool is_secondary) { + Frontend::Frame* frame = render_window.mailbox->TryGetPresentFrame(timeout_ms); + if (!frame) { + LOG_DEBUG(Render_Vulkan, "TryGetPresentFrame returned no frame to present"); + return; + } + +#if ANDROID + // On Android swapchain invalidations are always due to surface changes. + // These are processed on the main thread so wait for it to recreate + // the swapchain for us. + std::unique_lock lock{swapchain_mutex}; + swapchain_cv.wait(lock, [this]() { return !swapchain.NeedsRecreation(); }); +#endif + + while (!swapchain.AcquireNextImage()) { +#if ANDROID + swapchain_cv.wait(lock, [this]() { return !swapchain.NeedsRecreation(); }); +#else + std::scoped_lock lock{scheduler.QueueMutex()}; + instance.GetGraphicsQueue().waitIdle(); + swapchain.Create(); +#endif + } + + { + MICROPROFILE_SCOPE(Vulkan_SwapchainCopy); + const vk::Image swapchain_image = swapchain.Image(); + + const vk::CommandBufferBeginInfo begin_info = { + .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, + }; + const vk::CommandBuffer cmdbuf = frame->cmdbuf; + cmdbuf.begin(begin_info); + + const auto [width, height] = swapchain.GetExtent(); + const u32 copy_width = std::min(width, frame->width); + const u32 copy_height = std::min(height, frame->height); + + const vk::ImageCopy image_copy = { + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffset = {0, 0, 0}, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffset = {0, 0, 0}, + .extent = {copy_width, copy_height, 1}, + }; + + const std::array pre_barriers{ + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = frame->image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const vk::ImageMemoryBarrier post_barrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eNone, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::ePresentSrcKHR, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapchain_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + + cmdbuf.copyImage(frame->image, vk::ImageLayout::eTransferSrcOptimal, swapchain_image, + vk::ImageLayout::eTransferDstOptimal, image_copy); + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eBottomOfPipe, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + + cmdbuf.end(); + + static constexpr std::array wait_stage_masks = { + vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::PipelineStageFlagBits::eAllCommands, + }; + + const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); + const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); + const std::array wait_semaphores = {image_acquired, frame->render_ready}; + + vk::SubmitInfo submit_info = { + .waitSemaphoreCount = static_cast(wait_semaphores.size()), + .pWaitSemaphores = wait_semaphores.data(), + .pWaitDstStageMask = wait_stage_masks.data(), + .commandBufferCount = 1u, + .pCommandBuffers = &cmdbuf, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &present_ready, + }; + + try { + std::scoped_lock lock{scheduler.QueueMutex(), frame->fence_mutex}; + instance.GetGraphicsQueue().submit(submit_info, frame->present_done); + } catch (vk::DeviceLostError& err) { + LOG_CRITICAL(Render_Vulkan, "Device lost during present submit: {}", err.what()); + UNREACHABLE(); + } + } + + swapchain.Present(); + render_window.mailbox->ReleasePresentFrame(frame); +} + void RendererVulkan::SwapBuffers() { const auto& layout = render_window.GetFramebufferLayout(); PrepareRendertarget(); RenderScreenshot(); - do { - if (swapchain.NeedsRecreation()) { - swapchain.Create(); - } - scheduler.WaitWorker(); - swapchain.AcquireNextImage(); - } while (swapchain.NeedsRecreation()); - - scheduler.Record([layout](vk::CommandBuffer cmdbuf) { - const vk::Viewport viewport = { - .x = 0.0f, - .y = 0.0f, - .width = static_cast(layout.width), - .height = static_cast(layout.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - - const vk::Rect2D scissor = { - .offset = {0, 0}, - .extent = {layout.width, layout.height}, - }; - - cmdbuf.setViewport(0, viewport); - cmdbuf.setScissor(0, scissor); - }); - - DrawScreens(layout, false); - - const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); - const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); - scheduler.Flush(present_ready, image_acquired); - swapchain.Present(); + RenderToMailbox(layout, render_window.mailbox, false); m_current_frame++; @@ -978,12 +1141,11 @@ void RendererVulkan::RenderScreenshot() { const vk::ImageCreateInfo staging_image_info = { .imageType = vk::ImageType::e2D, .format = vk::Format::eB8G8R8A8Unorm, - .extent = - { - .width = width, - .height = height, - .depth = 1, - }, + .extent{ + .width = width, + .height = height, + .depth = 1, + }, .mipLevels = 1, .arrayLayers = 1, .samples = vk::SampleCountFlagBits::e1, @@ -993,7 +1155,8 @@ void RendererVulkan::RenderScreenshot() { }; const VmaAllocationCreateInfo alloc_create_info = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT, + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT, .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, .requiredFlags = 0, .preferredFlags = 0, @@ -1012,113 +1175,140 @@ void RendererVulkan::RenderScreenshot() { LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); UNREACHABLE(); } - vk::Image staging_image{unsafe_image}; + Frontend::Frame frame{}; + render_window.mailbox->ReloadRenderFrame(&frame, width, height); + renderpass_cache.ExitRenderpass(); - scheduler.Record([width, height, swapchain_image = swapchain.Image(), - staging_image](vk::CommandBuffer cmdbuf) { - const std::array read_barriers = { - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferRead, - .oldLayout = vk::ImageLayout::ePresentSrcKHR, - .newLayout = vk::ImageLayout::eTransferSrcOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = swapchain_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, + + scheduler.Record([this, framebuffer = frame.framebuffer, width = frame.width, + height = frame.height](vk::CommandBuffer cmdbuf) { + const vk::ClearValue clear{.color = clear_color}; + const vk::RenderPassBeginInfo renderpass_begin_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .framebuffer = framebuffer, + .renderArea = + vk::Rect2D{ + .offset = {0, 0}, + .extent = {width, height}, }, - }, - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eNone, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eTransferDstOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = staging_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, - }; - const std::array write_barriers = { - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferRead, - .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, - .oldLayout = vk::ImageLayout::eTransferSrcOptimal, - .newLayout = vk::ImageLayout::ePresentSrcKHR, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = swapchain_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, - vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::eGeneral, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = staging_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }, + .clearValueCount = 1, + .pClearValues = &clear, }; - const std::array offsets = { - vk::Offset3D{0, 0, 0}, - vk::Offset3D{static_cast(width), static_cast(height), 1}, - }; - - const vk::ImageBlit blit_area = { - .srcSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .srcOffsets = offsets, - .dstSubresource{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .dstOffsets = offsets, - }; - - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); - cmdbuf.blitImage(swapchain_image, vk::ImageLayout::eTransferSrcOptimal, staging_image, - vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eNearest); - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); + cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline); }); + DrawScreens(layout, false); + + scheduler.Record( + [width, height, source_image = frame.image, staging_image](vk::CommandBuffer cmdbuf) { + const std::array read_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eNone, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = staging_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + const std::array write_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferRead, + .dstAccessMask = vk::AccessFlagBits::eMemoryWrite, + .oldLayout = vk::ImageLayout::eTransferSrcOptimal, + .newLayout = vk::ImageLayout::eTransferSrcOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = source_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = staging_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }, + }; + static constexpr vk::MemoryBarrier memory_write_barrier = { + .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, + .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, + }; + + const std::array offsets = { + vk::Offset3D{0, 0, 0}, + vk::Offset3D{static_cast(width), static_cast(height), 1}, + }; + + const vk::ImageBlit blit_area = { + .srcSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .srcOffsets = offsets, + .dstSubresource{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .dstOffsets = offsets, + }; + + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, read_barriers); + cmdbuf.blitImage(source_image, vk::ImageLayout::eTransferSrcOptimal, staging_image, + vk::ImageLayout::eTransferDstOptimal, blit_area, vk::Filter::eNearest); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, + vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, write_barriers); + }); + // Ensure the copy is fully completed before saving the screenshot scheduler.Finish(); @@ -1139,8 +1329,11 @@ void RendererVulkan::RenderScreenshot() { std::memcpy(VideoCore::g_screenshot_bits, data + subresource_layout.offset, subresource_layout.size); - // Destroy staging image + // Destroy allocated resources vmaDestroyImage(instance.GetAllocator(), unsafe_image, allocation); + vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); + device.destroyFramebuffer(frame.framebuffer); + device.destroyImageView(frame.image_view); VideoCore::g_screenshot_complete_callback(); VideoCore::g_renderer_screenshot_requested = false; @@ -1149,7 +1342,12 @@ void RendererVulkan::RenderScreenshot() { void RendererVulkan::NotifySurfaceChanged() { scheduler.Finish(); vk::SurfaceKHR new_surface = CreateSurface(instance.GetInstance(), render_window); - swapchain.Create(new_surface); + { + std::scoped_lock lock{swapchain_mutex}; + swapchain.SetNeedsRecreation(true); + swapchain.Create(new_surface); + swapchain_cv.notify_one(); + } } void RendererVulkan::Report() const { diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 9df270c63..18392ee83 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -5,6 +5,8 @@ #pragma once #include +#include +#include #include #include "common/common_types.h" #include "common/math_util.h" @@ -27,7 +29,6 @@ struct FramebufferLayout; namespace Vulkan { -/// Structure used for storing information about the textures for each 3DS screen struct TextureInfo { ImageAlloc alloc; u32 width; @@ -35,7 +36,6 @@ struct TextureInfo { GPU::Regs::PixelFormat format; }; -/// Structure used for storing information about the display target for each 3DS screen struct ScreenInfo { ImageAlloc* display_texture = nullptr; Common::Rectangle display_texcoords; @@ -43,7 +43,6 @@ struct ScreenInfo { vk::Sampler sampler; }; -// Uniform data used for presenting the 3DS screens struct PresentUniformData { glm::mat4 modelview; Common::Vec4f i_resolution; @@ -52,11 +51,6 @@ struct PresentUniformData { int screen_id_r = 0; int layer = 0; int reverse_interlaced = 0; - - // Returns an immutable byte view of the uniform data - auto AsBytes() const { - return std::as_bytes(std::span{this, 1}); - } }; static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!"); @@ -75,7 +69,7 @@ public: void ShutDown() override; void SwapBuffers() override; void NotifySurfaceChanged() override; - void TryPresent(int timeout_ms, bool is_secondary) override {} + void TryPresent(int timeout_ms, bool is_secondary) override; void PrepareVideoDumping() override {} void CleanupVideoDumping() override {} void Sync() override; @@ -92,6 +86,8 @@ private: void ConfigureRenderPipeline(); void PrepareRendertarget(); void RenderScreenshot(); + void RenderToMailbox(const Layout::FramebufferLayout& layout, + std::unique_ptr& mailbox, bool flipped); void BeginRendering(); void DrawScreens(const Layout::FramebufferLayout& layout, bool flipped); @@ -121,6 +117,8 @@ private: Swapchain swapchain; StreamBuffer vertex_buffer; RasterizerVulkan rasterizer; + std::mutex swapchain_mutex; + std::condition_variable swapchain_cv; // Present pipelines (Normal, Anaglyph, Interlaced) vk::PipelineLayout present_pipeline_layout; @@ -134,7 +132,7 @@ private: u32 current_pipeline = 0; u32 current_sampler = 0; - /// Display information for top and bottom screens respectively + // Display information for top and bottom screens respectively std::array screen_infos{}; PresentUniformData draw_info{}; vk::ClearColorValue clear_color{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6999bfc9e..6b80aaee8 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -393,7 +393,8 @@ PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler, desc_manager{desc_manager}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1, "Pipeline builder"}, trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex, - GenerateTrivialVertexShader()} {} + GenerateTrivialVertexShader(instance.IsShaderClipDistanceSupported())} { +} PipelineCache::~PipelineCache() { vk::Device device = instance.GetDevice(); @@ -508,7 +509,7 @@ bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, const VertexLayout& layout) { - PicaVSConfig config{regs.rasterizer, regs.vs, setup}; + PicaVSConfig config{regs.rasterizer, regs.vs, setup, instance}; config.state.use_geometry_shader = instance.UseGeometryShaders(); for (u32 i = 0; i < layout.attribute_count; i++) { @@ -570,7 +571,7 @@ bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { return true; } - const PicaFixedGSConfig gs_config{regs}; + const PicaFixedGSConfig gs_config{regs, instance}; auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance); auto& shader = it->second; @@ -605,17 +606,20 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { const bool emit_spirv = Settings::values.spirv_shader_gen.GetValue(); const vk::Device device = instance.GetDevice(); - workers.QueueWork([config, device, emit_spirv, &shader]() { - if (emit_spirv) { - const std::vector code = GenerateFragmentShaderSPV(config); - shader.module = CompileSPV(code, device); - } else { + // When using SPIR-V emit the fragment shader on the main thread + // since it's quite fast. This also heavily reduces flicker + if (emit_spirv) { + const std::vector code = GenerateFragmentShaderSPV(config); + shader.module = CompileSPV(code, device); + shader.MarkBuilt(); + } else { + workers.QueueWork([config, device, &shader]() { const std::string code = GenerateFragmentShader(config); shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device, ShaderOptimization::High); - } - shader.MarkBuilt(); - }); + shader.MarkBuilt(); + }); + } } current_shaders[ProgramType::FS] = &shader; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 8dfe08fb6..229a27244 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -753,6 +753,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface); } + static int counter = 20; + counter--; + if (counter == 0) { + scheduler.DispatchWork(); + counter = 20; + } + return succeeded; } diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index cae4b4c72..35983ae36 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -212,7 +212,7 @@ void RenderpassCache::CreatePresentRenderpass(vk::Format format) { if (!present_renderpass) { present_renderpass = CreateRenderPass(format, vk::Format::eUndefined, vk::AttachmentLoadOp::eClear, - vk::ImageLayout::eUndefined, vk::ImageLayout::ePresentSrcKHR); + vk::ImageLayout::eUndefined, vk::ImageLayout::eTransferSrcOptimal); } } diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index 5f0787c53..0648afb3e 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -4,8 +4,8 @@ #pragma once -#include #include +#include #include #include "common/hash.h" #include "video_core/rasterizer_cache/pixel_format.h" diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 5c3e5870d..06aedcbe9 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -154,8 +154,8 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa }; try { - vk::Queue queue = instance.GetGraphicsQueue(); - queue.submit(submit_info); + std::scoped_lock lock{queue_mutex}; + instance.GetGraphicsQueue().submit(submit_info); } catch (vk::DeviceLostError& err) { LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what()); UNREACHABLE(); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 9db045acc..788a8af23 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -79,6 +79,11 @@ public: return False(state & flag); } + /// Returns the mutex used to synchronize queue access + [[nodiscard]] std::mutex& QueueMutex() noexcept { + return queue_mutex; + } + /// Returns the current command buffer tick. [[nodiscard]] u64 CurrentTick() const noexcept { return master_semaphore.CurrentTick(); @@ -208,6 +213,7 @@ private: StateFlags state{}; std::mutex reserve_mutex; std::mutex work_mutex; + std::mutex queue_mutex; std::condition_variable_any work_cv; std::condition_variable wait_cv; std::jthread worker_thread; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index b9d85468d..f2eea2e60 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -26,7 +26,7 @@ namespace Vulkan { const std::string UniformBlockDef = Pica::Shader::BuildShaderUniformDefinitions("binding = 1,"); -static std::string GetVertexInterfaceDeclaration(bool is_output) { +static std::string GetVertexInterfaceDeclaration(bool is_output, bool use_clip_planes = false) { std::string out; const auto append_variable = [&](std::string_view var, int location) { @@ -44,12 +44,12 @@ static std::string GetVertexInterfaceDeclaration(bool is_output) { if (is_output) { // gl_PerVertex redeclaration is required for separate shader object - out += R"( -out gl_PerVertex { - vec4 gl_Position; - float gl_ClipDistance[2]; -}; -)"; + out += "out gl_PerVertex {\n"; + out += " vec4 gl_Position;\n"; + if (use_clip_planes) { + out += " float gl_ClipDistance[2];\n"; + } + out += "};\n"; } return out; @@ -237,6 +237,12 @@ void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer, } } +PicaVSConfig::PicaVSConfig(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs, + Pica::Shader::ShaderSetup& setup, const Instance& instance) { + state.Init(rasterizer, regs, setup); + use_clip_planes = instance.IsShaderClipDistanceSupported(); +} + void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { vs_output_attributes = Common::BitSet(regs.vs.output_mask).Count(); gs_output_attributes = vs_output_attributes; @@ -260,6 +266,11 @@ void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { } } +PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs, const Instance& instance) { + state.Init(regs); + use_clip_planes = instance.IsShaderClipDistanceSupported(); +} + /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code) static bool IsPassThroughTevStage(const TevStageConfig& stage) { return (stage.color_op == TevStageConfig::Operation::Replace && @@ -1555,7 +1566,7 @@ do { return out; } -std::string GenerateTrivialVertexShader() { +std::string GenerateTrivialVertexShader(bool use_clip_planes) { std::string out = "#version 450 core\n" "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += @@ -1570,7 +1581,7 @@ std::string GenerateTrivialVertexShader() { ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW); - out += GetVertexInterfaceDeclaration(true); + out += GetVertexInterfaceDeclaration(true, use_clip_planes); out += UniformBlockDef; @@ -1586,15 +1597,19 @@ void main() { view = vert_view; gl_Position = vert_position; gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0; - - gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 - if (enable_clip1) { - gl_ClipDistance[1] = dot(clip_coef, vert_position); - } else { - gl_ClipDistance[1] = 0; - } -} )"; + if (use_clip_planes) { + out += R"( + gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0 + if (enable_clip1) { + gl_ClipDistance[1] = dot(clip_coef, vert_position); + } else { + gl_ClipDistance[1] = 0; + } + )"; + } + + out += "}\n"; return out; } @@ -1638,7 +1653,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config { )"; if (!config.state.use_geometry_shader) { - out += GetVertexInterfaceDeclaration(true); + out += GetVertexInterfaceDeclaration(true, config.use_clip_planes); } // input attributes declaration @@ -1693,12 +1708,14 @@ layout (set = 0, binding = 0, std140) uniform vs_config { semantic(VSOutputAttributes::POSITION_W) + ");\n"; out += " gl_Position = vtx_pos;\n"; out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; - out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 - out += " if (enable_clip1) {\n"; - out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; - out += " } else {\n"; - out += " gl_ClipDistance[1] = 0;\n"; - out += " }\n\n"; + if (config.use_clip_planes) { + out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 + out += " if (enable_clip1) {\n"; + out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; + out += " } else {\n"; + out += " gl_ClipDistance[1] = 0;\n"; + out += " }\n\n"; + } out += " normquat = GetVertexQuaternion();\n"; out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + @@ -1733,8 +1750,8 @@ layout (set = 0, binding = 0, std140) uniform vs_config { return out; } -static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config) { - std::string out = GetVertexInterfaceDeclaration(true); +static std::string GetGSCommonSource(const PicaGSConfigCommonRaw& config, bool use_clip_planes) { + std::string out = GetVertexInterfaceDeclaration(true, use_clip_planes); out += UniformBlockDef; out += OpenGL::ShaderDecompiler::GetCommonDeclarations(); @@ -1773,12 +1790,14 @@ struct Vertex { semantic(VSOutputAttributes::POSITION_W) + ");\n"; out += " gl_Position = vtx_pos;\n"; out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; - out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 - out += " if (enable_clip1) {\n"; - out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; - out += " } else {\n"; - out += " gl_ClipDistance[1] = 0;\n"; - out += " }\n\n"; + if (use_clip_planes) { + out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 + out += " if (enable_clip1) {\n"; + out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; + out += " } else {\n"; + out += " gl_ClipDistance[1] = 0;\n"; + out += " }\n\n"; + } out += " vec4 vtx_quat = GetVertexQuaternion(vtx);\n"; out += " normquat = mix(vtx_quat, -vtx_quat, bvec4(quats_opposite));\n\n"; @@ -1830,7 +1849,7 @@ layout(triangle_strip, max_vertices = 3) out; )"; - out += GetGSCommonSource(config.state); + out += GetGSCommonSource(config.state, config.use_clip_planes); out += R"( void main() { diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index be5091c87..0973052bb 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -181,9 +181,8 @@ struct PicaShaderConfigCommon { */ struct PicaVSConfig : Common::HashableStruct { explicit PicaVSConfig(const Pica::RasterizerRegs& rasterizer, const Pica::ShaderRegs& regs, - Pica::Shader::ShaderSetup& setup) { - state.Init(rasterizer, regs, setup); - } + Pica::Shader::ShaderSetup& setup, const Instance& instance); + bool use_clip_planes; }; struct PicaGSConfigCommonRaw { @@ -206,9 +205,8 @@ struct PicaGSConfigCommonRaw { * shader pipeline */ struct PicaFixedGSConfig : Common::HashableStruct { - explicit PicaFixedGSConfig(const Pica::Regs& regs) { - state.Init(regs); - } + explicit PicaFixedGSConfig(const Pica::Regs& regs, const Instance& instance); + bool use_clip_planes; }; /** @@ -217,7 +215,7 @@ struct PicaFixedGSConfig : Common::HashableStruct { * @param separable_shader generates shader that can be used for separate shader object * @returns String of the shader source code */ -std::string GenerateTrivialVertexShader(); +std::string GenerateTrivialVertexShader(bool use_clip_planes); /** * Generates the GLSL vertex shader program source code for the given VS program diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index c4983f9b1..f6c0e463c 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -21,6 +21,7 @@ Swapchain::Swapchain(const Instance& instance, Scheduler& scheduler, FindPresentFormat(); SetPresentMode(); renderpass_cache.CreatePresentRenderpass(surface_format.format); + Create(); } Swapchain::~Swapchain() { @@ -29,7 +30,6 @@ Swapchain::~Swapchain() { } void Swapchain::Create(vk::SurfaceKHR new_surface) { - scheduler.Finish(); Destroy(); if (new_surface) { @@ -55,8 +55,8 @@ void Swapchain::Create(vk::SurfaceKHR new_surface) { .imageColorSpace = surface_format.colorSpace, .imageExtent = extent, .imageArrayLayers = 1, - .imageUsage = - vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment | + vk::ImageUsageFlagBits::eTransferSrc | vk::ImageUsageFlagBits::eTransferDst, .imageSharingMode = sharing_mode, .queueFamilyIndexCount = queue_family_indices_count, .pQueueFamilyIndices = queue_family_indices.data(), @@ -76,15 +76,11 @@ void Swapchain::Create(vk::SurfaceKHR new_surface) { SetupImages(); RefreshSemaphores(); - resource_ticks.clear(); - resource_ticks.resize(image_count); - - is_outdated = false; - is_suboptimal = false; + needs_recreation = false; } MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245)); -void Swapchain::AcquireNextImage() { +bool Swapchain::AcquireNextImage() { MICROPROFILE_SCOPE(Vulkan_Acquire); vk::Device device = instance.GetDevice(); vk::Result result = @@ -95,46 +91,43 @@ void Swapchain::AcquireNextImage() { case vk::Result::eSuccess: break; case vk::Result::eSuboptimalKHR: - is_suboptimal = true; + needs_recreation = true; break; case vk::Result::eErrorOutOfDateKHR: - is_outdated = true; + needs_recreation = true; break; default: ASSERT_MSG(false, "vkAcquireNextImageKHR returned unknown result {}", result); break; } - scheduler.Wait(resource_ticks[image_index]); - resource_ticks[image_index] = scheduler.CurrentTick(); + return !needs_recreation; } MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245)); void Swapchain::Present() { - scheduler.Record([this, index = image_index](vk::CommandBuffer) { - if (NeedsRecreation()) [[unlikely]] { - return; - } + if (needs_recreation) { + return; + } - const vk::PresentInfoKHR present_info = { - .waitSemaphoreCount = 1, - .pWaitSemaphores = &present_ready[index], - .swapchainCount = 1, - .pSwapchains = &swapchain, - .pImageIndices = &index, - }; + const vk::PresentInfoKHR present_info = { + .waitSemaphoreCount = 1, + .pWaitSemaphores = &present_ready[image_index], + .swapchainCount = 1, + .pSwapchains = &swapchain, + .pImageIndices = &image_index, + }; - MICROPROFILE_SCOPE(Vulkan_Present); - vk::Queue present_queue = instance.GetPresentQueue(); - try { - [[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info); - } catch (vk::OutOfDateKHRError&) { - is_outdated = true; - } catch (...) { - LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed"); - UNREACHABLE(); - } - }); + MICROPROFILE_SCOPE(Vulkan_Present); + try { + std::scoped_lock lock{scheduler.QueueMutex()}; + [[maybe_unused]] vk::Result result = instance.GetPresentQueue().presentKHR(present_info); + } catch (vk::OutOfDateKHRError&) { + needs_recreation = true; + } catch (const vk::SystemError& err) { + LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed {}", err.what()); + UNREACHABLE(); + } frame_index = (frame_index + 1) % image_count; } @@ -230,23 +223,16 @@ void Swapchain::Destroy() { if (swapchain) { device.destroySwapchainKHR(swapchain); } - for (const vk::ImageView view : image_views) { - device.destroyImageView(view); - } - for (const vk::Framebuffer framebuffer : framebuffers) { - device.destroyFramebuffer(framebuffer); - } - for (const vk::Semaphore semaphore : image_acquired) { - device.destroySemaphore(semaphore); - } - for (const vk::Semaphore semaphore : present_ready) { - device.destroySemaphore(semaphore); - } - framebuffers.clear(); - image_views.clear(); - image_acquired.clear(); - present_ready.clear(); + const auto Clear = [&](auto& vec) { + for (const auto item : vec) { + device.destroy(item); + } + vec.clear(); + }; + + Clear(image_acquired); + Clear(present_ready); } void Swapchain::RefreshSemaphores() { @@ -267,34 +253,6 @@ void Swapchain::SetupImages() { images = device.getSwapchainImagesKHR(swapchain); image_count = static_cast(images.size()); LOG_INFO(Render_Vulkan, "Using {} images", image_count); - - for (const vk::Image image : images) { - const vk::ImageViewCreateInfo view_info = { - .image = image, - .viewType = vk::ImageViewType::e2D, - .format = surface_format.format, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - - image_views.push_back(device.createImageView(view_info)); - - const vk::FramebufferCreateInfo framebuffer_info = { - .renderPass = renderpass_cache.GetPresentRenderpass(), - .attachmentCount = 1, - .pAttachments = &image_views.back(), - .width = extent.width, - .height = extent.height, - .layers = 1, - }; - - framebuffers.push_back(device.createFramebuffer(framebuffer_info)); - } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index 302c9a42b..99ebedb65 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_common.h" @@ -23,14 +24,19 @@ public: void Create(vk::SurfaceKHR new_surface = {}); /// Acquires the next image in the swapchain. - void AcquireNextImage(); + bool AcquireNextImage(); /// Presents the current image and move to the next one void Present(); /// Returns true when the swapchain should be recreated [[nodiscard]] bool NeedsRecreation() const { - return is_suboptimal || is_outdated; + return needs_recreation; + } + + /// Notfies that the swapchain needs recreation + void SetNeedsRecreation(bool value) noexcept { + needs_recreation = value; } /// Returns current swapchain state @@ -43,11 +49,6 @@ public: return surface; } - /// Returns the current framebuffe - [[nodiscard]] vk::Framebuffer GetFramebuffer() const { - return framebuffers[image_index]; - } - /// Returns the current image [[nodiscard]] vk::Image Image() const { return images[image_index]; @@ -102,16 +103,12 @@ private: vk::SurfaceTransformFlagBitsKHR transform; vk::CompositeAlphaFlagBitsKHR composite_alpha; std::vector images; - std::vector image_views; - std::vector framebuffers; - std::vector resource_ticks; std::vector image_acquired; std::vector present_ready; u32 image_count = 0; u32 image_index = 0; u32 frame_index = 0; - bool is_outdated = true; - bool is_suboptimal = true; + bool needs_recreation = true; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_mailbox.cpp b/src/video_core/renderer_vulkan/vk_texture_mailbox.cpp new file mode 100644 index 000000000..acfe59fae --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_mailbox.cpp @@ -0,0 +1,170 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/renderer_vulkan/vk_instance.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/renderer_vulkan/vk_texture_mailbox.h" + +#include + +namespace Vulkan { + +TextureMailbox::TextureMailbox(const Instance& instance_, const Swapchain& swapchain_, + const RenderpassCache& renderpass_cache_) + : instance{instance_}, swapchain{swapchain_}, renderpass_cache{renderpass_cache_} { + + const vk::Device device = instance.GetDevice(); + const vk::CommandPoolCreateInfo pool_info = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer | + vk::CommandPoolCreateFlagBits::eTransient, + .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex(), + }; + command_pool = device.createCommandPool(pool_info); + + const vk::CommandBufferAllocateInfo alloc_info = { + .commandPool = command_pool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = SWAP_CHAIN_SIZE, + }; + const std::vector command_buffers = device.allocateCommandBuffers(alloc_info); + + for (u32 i = 0; i < SWAP_CHAIN_SIZE; i++) { + Frontend::Frame& frame = swap_chain[i]; + frame.cmdbuf = command_buffers[i]; + frame.render_ready = device.createSemaphore({}); + frame.present_done = device.createFence({.flags = vk::FenceCreateFlagBits::eSignaled}); + free_queue.push(&frame); + } +} + +TextureMailbox::~TextureMailbox() { + std::scoped_lock lock{present_mutex, free_mutex}; + free_queue = {}; + present_queue = {}; + present_cv.notify_all(); + free_cv.notify_all(); + + const vk::Device device = instance.GetDevice(); + device.destroyCommandPool(command_pool); + for (auto& frame : swap_chain) { + device.destroyImageView(frame.image_view); + device.destroyFramebuffer(frame.framebuffer); + device.destroySemaphore(frame.render_ready); + device.destroyFence(frame.present_done); + vmaDestroyImage(instance.GetAllocator(), frame.image, frame.allocation); + } +} + +void TextureMailbox::ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) { + vk::Device device = instance.GetDevice(); + if (frame->framebuffer) { + device.destroyFramebuffer(frame->framebuffer); + } + if (frame->image_view) { + device.destroyImageView(frame->image_view); + } + if (frame->image) { + vmaDestroyImage(instance.GetAllocator(), frame->image, frame->allocation); + } + + const vk::Format format = swapchain.GetSurfaceFormat().format; + const vk::ImageCreateInfo image_info = { + .imageType = vk::ImageType::e2D, + .format = format, + .extent = {width, height, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .samples = vk::SampleCountFlagBits::e1, + .usage = vk::ImageUsageFlagBits::eColorAttachment | vk::ImageUsageFlagBits::eTransferSrc, + }; + + const VmaAllocationCreateInfo alloc_info = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + }; + + VkImage unsafe_image{}; + VkImageCreateInfo unsafe_image_info = static_cast(image_info); + + VkResult result = vmaCreateImage(instance.GetAllocator(), &unsafe_image_info, &alloc_info, + &unsafe_image, &frame->allocation, nullptr); + if (result != VK_SUCCESS) [[unlikely]] { + LOG_CRITICAL(Render_Vulkan, "Failed allocating texture with error {}", result); + UNREACHABLE(); + } + frame->image = vk::Image{unsafe_image}; + + const vk::ImageViewCreateInfo view_info = { + .image = frame->image, + .viewType = vk::ImageViewType::e2D, + .format = format, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + frame->image_view = device.createImageView(view_info); + + const vk::FramebufferCreateInfo framebuffer_info = { + .renderPass = renderpass_cache.GetPresentRenderpass(), + .attachmentCount = 1, + .pAttachments = &frame->image_view, + .width = width, + .height = height, + .layers = 1, + }; + frame->framebuffer = instance.GetDevice().createFramebuffer(framebuffer_info); + + frame->width = width; + frame->height = height; +} + +Frontend::Frame* TextureMailbox::GetRenderFrame() { + std::unique_lock lock{free_mutex}; + + if (free_queue.empty()) { + free_cv.wait(lock, [&] { return !free_queue.empty(); }); + } + + Frontend::Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; +} + +void TextureMailbox::ReleaseRenderFrame(Frontend::Frame* frame) { + std::unique_lock lock{present_mutex}; + present_queue.push(frame); + present_cv.notify_one(); +} + +void TextureMailbox::ReleasePresentFrame(Frontend::Frame* frame) { + std::unique_lock lock{free_mutex}; + free_queue.push(frame); + free_cv.notify_one(); +} + +Frontend::Frame* TextureMailbox::TryGetPresentFrame(int timeout_ms) { + std::unique_lock lock{present_mutex}; + // Wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + LOG_DEBUG(Render_Vulkan, "Timed out waiting present frame"); + return nullptr; + } + + Frontend::Frame* frame = present_queue.front(); + present_queue.pop(); + return frame; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_mailbox.h b/src/video_core/renderer_vulkan/vk_texture_mailbox.h new file mode 100644 index 000000000..8e101f9d1 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_texture_mailbox.h @@ -0,0 +1,66 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include "core/frontend/emu_window.h" +#include "video_core/renderer_vulkan/vk_common.h" + +VK_DEFINE_HANDLE(VmaAllocation) + +namespace Frontend { + +struct Frame { + u32 width{}; + u32 height{}; + VmaAllocation allocation{}; + vk::Framebuffer framebuffer{}; + vk::Image image{}; + vk::ImageView image_view{}; + vk::Semaphore render_ready{}; + vk::Fence present_done{}; + std::mutex fence_mutex{}; + vk::CommandBuffer cmdbuf{}; +}; + +} // namespace Frontend + +namespace Vulkan { + +class Instance; +class Swapchain; +class RenderpassCache; + +class TextureMailbox final : public Frontend::TextureMailbox { + static constexpr std::size_t SWAP_CHAIN_SIZE = 8; + +public: + TextureMailbox(const Instance& instance, const Swapchain& swapchain, + const RenderpassCache& renderpass_cache); + ~TextureMailbox() override; + + void ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 height) override; + + Frontend::Frame* GetRenderFrame() override; + Frontend::Frame* TryGetPresentFrame(int timeout_ms) override; + + void ReleaseRenderFrame(Frontend::Frame* frame) override; + void ReleasePresentFrame(Frontend::Frame* frame) override; + +private: + const Instance& instance; + const Swapchain& swapchain; + const RenderpassCache& renderpass_cache; + vk::CommandPool command_pool; + std::mutex free_mutex; + std::mutex present_mutex; + std::condition_variable free_cv; + std::condition_variable present_cv; + std::array swap_chain{}; + std::queue free_queue{}; + std::queue present_queue{}; +}; + +} // namespace Vulkan