From 0ca25b64e105823fd73a421840a050fda9cb9ef0 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Wed, 11 Jan 2023 16:04:23 +0200 Subject: [PATCH] renderer_vulkan: Improve storage reinterpretation barriers --- .../renderer_vulkan/vk_blit_helper.cpp | 35 ++++++---- .../renderer_vulkan/vk_blit_helper.h | 6 +- .../vk_format_reinterpreter.cpp | 68 ++++++++++++------- .../renderer_vulkan/vk_rasterizer.cpp | 6 +- .../renderer_vulkan/vk_texture_runtime.cpp | 37 +--------- 5 files changed, 73 insertions(+), 79 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp index d666d626d..5154c88ea 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -14,8 +14,10 @@ namespace Vulkan { BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, - DescriptorManager& desc_manager) - : scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} { + DescriptorManager& desc_manager, + RenderpassCache& renderpass_cache) + : scheduler{scheduler}, desc_manager{desc_manager}, + renderpass_cache{renderpass_cache}, device{instance.GetDevice()} { constexpr std::string_view cs_source = R"( #version 450 core #extension GL_EXT_samplerless_texture_functions : require @@ -166,14 +168,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); + renderpass_cache.ExitRenderpass(); scheduler.Record([this, set, blit, src_image = source.alloc.image, dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) { const std::array pre_barriers = { vk::ImageMemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eShaderWrite | - vk::AccessFlagBits::eDepthStencilAttachmentWrite | - vk::AccessFlagBits::eDepthStencilAttachmentRead, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, .oldLayout = vk::ImageLayout::eGeneral, .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -203,12 +204,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, .baseArrayLayer = 0, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, - }}; + }, + }; const std::array post_barriers = { vk::ImageMemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eShaderRead, - .dstAccessMask = vk::AccessFlagBits::eShaderWrite | - vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite | + vk::AccessFlagBits::eDepthStencilAttachmentRead, .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .newLayout = vk::ImageLayout::eGeneral, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -225,7 +227,7 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, }, vk::ImageMemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eShaderWrite, - .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .dstAccessMask = vk::AccessFlagBits::eTransferRead, .oldLayout = vk::ImageLayout::eGeneral, .newLayout = vk::ImageLayout::eGeneral, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -239,9 +241,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }}; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eComputeShader, - vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); @@ -254,8 +257,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests | + vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); }); } diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h index 1a99c2b60..5caff2649 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.h +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -14,12 +14,15 @@ namespace Vulkan { class Instance; class DescriptorManager; +class RenderpassCache; class Scheduler; class Surface; class BlitHelper { public: - BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager); + BlitHelper(const Instance& instance, Scheduler& scheduler, + DescriptorManager& desc_manager, + RenderpassCache& renderpass_cache); ~BlitHelper(); /// Blits D24S8 pixel data to the provided buffer @@ -29,6 +32,7 @@ public: private: Scheduler& scheduler; DescriptorManager& desc_manager; + RenderpassCache& renderpass_cache; vk::Device device; vk::Pipeline compute_pipeline; vk::PipelineLayout compute_pipeline_layout; diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp index 298c9995e..7866da126 100644 --- a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp @@ -166,29 +166,44 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf runtime.GetRenderpassCache().ExitRenderpass(); scheduler.Record([this, set, src_rect, src_image = source.alloc.image, dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) { - const vk::ImageMemoryBarrier pre_barrier = { - .srcAccessMask = vk::AccessFlagBits::eShaderWrite | - vk::AccessFlagBits::eDepthStencilAttachmentWrite | - vk::AccessFlagBits::eDepthStencilAttachmentRead, - .dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite, - .oldLayout = vk::ImageLayout::eGeneral, - .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = src_image, - .subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, + const std::array pre_barriers = { + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderRead, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = src_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, }, + vk::ImageMemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eShaderWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = dst_image, + .subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + } }; const std::array post_barriers = { vk::ImageMemoryBarrier{ .srcAccessMask = vk::AccessFlagBits::eShaderRead, - .dstAccessMask = vk::AccessFlagBits::eShaderWrite | - vk::AccessFlagBits::eDepthStencilAttachmentWrite, + .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .newLayout = vk::ImageLayout::eGeneral, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -218,11 +233,14 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf .baseArrayLayer = 0, .layerCount = VK_REMAINING_ARRAY_LAYERS, }, - }}; + }, + }; - cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eComputeShader, - vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput | + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::PipelineStageFlagBits::eComputeShader, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); @@ -235,8 +253,10 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); + vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eEarlyFragmentTests | + vk::PipelineStageFlagBits::eLateFragmentTests, + vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index dfc09c815..c42a12496 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -21,7 +21,7 @@ namespace Vulkan { using TriangleTopology = Pica::PipelineRegs::TriangleTopology; -constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr u64 STREAM_BUFFER_SIZE = 128 * 1024 * 1024; constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer | @@ -65,7 +65,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan pipeline_cache{instance, scheduler, renderpass_cache, desc_manager}, null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime}, null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime}, - stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE}, + stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)}, texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} { @@ -751,7 +751,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.BindPipeline(pipeline_info); - const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); + const u32 max_vertices = STREAM_BUFFER_SIZE / sizeof(HardwareVertex); const u32 batch_size = static_cast(vertex_batch.size()); for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { const u32 vertices = std::min(max_vertices, batch_size - base_vertex); diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 44a683fdd..cfa941e4f 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -109,7 +109,7 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, - desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager}, + desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager, renderpass_cache}, upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE}, download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst, DOWNLOAD_BUFFER_SIZE, true} { @@ -840,41 +840,6 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, } void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { - /*renderpass_cache.ExitRenderpass(); - - // TODO: Investigate AMD single pass downsampler - s32 current_width = surface.GetScaledWidth(); - s32 current_height = surface.GetScaledHeight(); - - const u32 levels = std::bit_width(std::max(surface.width, surface.height)); - vk::ImageAspectFlags aspect = ToVkAspect(surface.type); - vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); - for (u32 i = 1; i < levels; i++) { - surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1); - surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1); - - const std::array source_offsets = {vk::Offset3D{0, 0, 0}, - vk::Offset3D{current_width, current_height, 1}}; - - const std::array dest_offsets = { - vk::Offset3D{0, 0, 0}, vk::Offset3D{current_width > 1 ? current_width / 2 : 1, - current_height > 1 ? current_height / 2 : 1, 1}}; - - const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = aspect, - .mipLevel = i - 1, - .baseArrayLayer = 0, - .layerCount = 1}, - .srcOffsets = source_offsets, - .dstSubresource = {.aspectMask = aspect, - .mipLevel = i, - .baseArrayLayer = 0, - .layerCount = 1}, - .dstOffsets = dest_offsets}; - - command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal, - surface.alloc.image, vk::ImageLayout::eTransferDstOptimal, - blit_area, vk::Filter::eLinear); - }*/ } const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(