renderer_vulkan: Improve storage reinterpretation barriers

This commit is contained in:
GPUCode
2023-01-11 16:04:23 +02:00
parent 694e49b857
commit 0ca25b64e1
5 changed files with 73 additions and 79 deletions

View File

@ -14,8 +14,10 @@
namespace Vulkan { namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager) DescriptorManager& desc_manager,
: scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} { RenderpassCache& renderpass_cache)
: scheduler{scheduler}, desc_manager{desc_manager},
renderpass_cache{renderpass_cache}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"( constexpr std::string_view cs_source = R"(
#version 450 core #version 450 core
#extension GL_EXT_samplerless_texture_functions : require #extension GL_EXT_samplerless_texture_functions : require
@ -166,14 +168,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
renderpass_cache.ExitRenderpass();
scheduler.Record([this, set, blit, src_image = source.alloc.image, scheduler.Record([this, set, blit, src_image = source.alloc.image,
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) { dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) {
const std::array pre_barriers = { const std::array pre_barriers = {
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite | .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
vk::AccessFlagBits::eDepthStencilAttachmentWrite | .dstAccessMask = vk::AccessFlagBits::eShaderRead,
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral, .oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -203,12 +204,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
}}; },
};
const std::array post_barriers = { const std::array post_barriers = {
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead, .srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite | .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite, vk::AccessFlagBits::eDepthStencilAttachmentRead,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral, .newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -225,7 +227,7 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
}, },
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite, .srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead, .dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral, .oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral, .newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -239,7 +241,8 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
}}; }};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers); vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
@ -254,7 +257,9 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1); cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests |
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
}); });
} }

View File

@ -14,12 +14,15 @@ namespace Vulkan {
class Instance; class Instance;
class DescriptorManager; class DescriptorManager;
class RenderpassCache;
class Scheduler; class Scheduler;
class Surface; class Surface;
class BlitHelper { class BlitHelper {
public: public:
BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager); BlitHelper(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager,
RenderpassCache& renderpass_cache);
~BlitHelper(); ~BlitHelper();
/// Blits D24S8 pixel data to the provided buffer /// Blits D24S8 pixel data to the provided buffer
@ -29,6 +32,7 @@ public:
private: private:
Scheduler& scheduler; Scheduler& scheduler;
DescriptorManager& desc_manager; DescriptorManager& desc_manager;
RenderpassCache& renderpass_cache;
vk::Device device; vk::Device device;
vk::Pipeline compute_pipeline; vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout; vk::PipelineLayout compute_pipeline_layout;

View File

@ -166,11 +166,10 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
runtime.GetRenderpassCache().ExitRenderpass(); runtime.GetRenderpassCache().ExitRenderpass();
scheduler.Record([this, set, src_rect, src_image = source.alloc.image, scheduler.Record([this, set, src_rect, src_image = source.alloc.image,
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) { dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier pre_barrier = { const std::array pre_barriers = {
.srcAccessMask = vk::AccessFlagBits::eShaderWrite | vk::ImageMemoryBarrier{
vk::AccessFlagBits::eDepthStencilAttachmentWrite | .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
vk::AccessFlagBits::eDepthStencilAttachmentRead, .dstAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral, .oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -183,12 +182,28 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}
}; };
const std::array post_barriers = { const std::array post_barriers = {
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead, .srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite | .dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral, .newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -218,11 +233,14 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
.baseArrayLayer = 0, .baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
}}; },
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::PipelineStageFlagBits::eComputeShader, vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
0, set, {}); 0, set, {});
@ -235,7 +253,9 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1); cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader, cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
}); });
} }

View File

@ -21,7 +21,7 @@ namespace Vulkan {
using TriangleTopology = Pica::PipelineRegs::TriangleTopology; using TriangleTopology = Pica::PipelineRegs::TriangleTopology;
constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; constexpr u64 STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer | constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer |
@ -65,7 +65,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager}, pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime}, null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime}, null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE}, stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)}, texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} { texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
@ -751,7 +751,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.UseTrivialGeometryShader();
pipeline_cache.BindPipeline(pipeline_info); pipeline_cache.BindPipeline(pipeline_info);
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex); const u32 max_vertices = STREAM_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 batch_size = static_cast<u32>(vertex_batch.size()); const u32 batch_size = static_cast<u32>(vertex_batch.size());
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) { for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
const u32 vertices = std::min(max_vertices, batch_size - base_vertex); const u32 vertices = std::min(max_vertices, batch_size - base_vertex);

View File

@ -109,7 +109,7 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager}, desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager, renderpass_cache},
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE}, upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst, download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
DOWNLOAD_BUFFER_SIZE, true} { DOWNLOAD_BUFFER_SIZE, true} {
@ -840,41 +840,6 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
} }
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
/*renderpass_cache.ExitRenderpass();
// TODO: Investigate AMD single pass downsampler
s32 current_width = surface.GetScaledWidth();
s32 current_height = surface.GetScaledHeight();
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) {
surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width, current_height, 1}};
const std::array dest_offsets = {
vk::Offset3D{0, 0, 0}, vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
current_height > 1 ? current_height / 2 : 1, 1}};
const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = aspect,
.mipLevel = i - 1,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffsets = source_offsets,
.dstSubresource = {.aspectMask = aspect,
.mipLevel = i,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffsets = dest_offsets};
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
}*/
} }
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(