renderer_vulkan: Improve storage reinterpretation barriers

This commit is contained in:
GPUCode
2023-01-11 16:04:23 +02:00
parent 694e49b857
commit 0ca25b64e1
5 changed files with 73 additions and 79 deletions

View File

@ -14,8 +14,10 @@
namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager)
: scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} {
DescriptorManager& desc_manager,
RenderpassCache& renderpass_cache)
: scheduler{scheduler}, desc_manager{desc_manager},
renderpass_cache{renderpass_cache}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
@ -166,14 +168,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
renderpass_cache.ExitRenderpass();
scheduler.Record([this, set, blit, src_image = source.alloc.image,
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) {
const std::array pre_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -203,12 +204,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}};
},
};
const std::array post_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -225,7 +227,7 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -239,9 +241,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
0, set, {});
@ -254,8 +257,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests |
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
});
}

View File

@ -14,12 +14,15 @@ namespace Vulkan {
class Instance;
class DescriptorManager;
class RenderpassCache;
class Scheduler;
class Surface;
class BlitHelper {
public:
BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager);
BlitHelper(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager,
RenderpassCache& renderpass_cache);
~BlitHelper();
/// Blits D24S8 pixel data to the provided buffer
@ -29,6 +32,7 @@ public:
private:
Scheduler& scheduler;
DescriptorManager& desc_manager;
RenderpassCache& renderpass_cache;
vk::Device device;
vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout;

View File

@ -166,29 +166,44 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
runtime.GetRenderpassCache().ExitRenderpass();
scheduler.Record([this, set, src_rect, src_image = source.alloc.image,
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) {
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
vk::AccessFlagBits::eDepthStencilAttachmentRead,
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
const std::array pre_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = src_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
},
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = dst_image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}
};
const std::array post_barriers = {
vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
.dstAccessMask = vk::AccessFlagBits::eShaderWrite |
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
@ -218,11 +233,14 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
}};
},
};
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::PipelineStageFlagBits::eComputeShader,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
0, set, {});
@ -235,8 +253,10 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
});
}

View File

@ -21,7 +21,7 @@ namespace Vulkan {
using TriangleTopology = Pica::PipelineRegs::TriangleTopology;
constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
constexpr u64 STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer |
@ -65,7 +65,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE},
stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
@ -751,7 +751,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_cache.UseTrivialGeometryShader();
pipeline_cache.BindPipeline(pipeline_info);
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 max_vertices = STREAM_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 batch_size = static_cast<u32>(vertex_batch.size());
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
const u32 vertices = std::min(max_vertices, batch_size - base_vertex);

View File

@ -109,7 +109,7 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager},
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager, renderpass_cache},
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
DOWNLOAD_BUFFER_SIZE, true} {
@ -840,41 +840,6 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
}
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
/*renderpass_cache.ExitRenderpass();
// TODO: Investigate AMD single pass downsampler
s32 current_width = surface.GetScaledWidth();
s32 current_height = surface.GetScaledHeight();
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
for (u32 i = 1; i < levels; i++) {
surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1);
const std::array source_offsets = {vk::Offset3D{0, 0, 0},
vk::Offset3D{current_width, current_height, 1}};
const std::array dest_offsets = {
vk::Offset3D{0, 0, 0}, vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
current_height > 1 ? current_height / 2 : 1, 1}};
const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = aspect,
.mipLevel = i - 1,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffsets = source_offsets,
.dstSubresource = {.aspectMask = aspect,
.mipLevel = i,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffsets = dest_offsets};
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
}*/
}
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(