renderer_vulkan: Improve storage reinterpretation barriers
This commit is contained in:
@ -14,8 +14,10 @@
|
||||
namespace Vulkan {
|
||||
|
||||
BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler,
|
||||
DescriptorManager& desc_manager)
|
||||
: scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} {
|
||||
DescriptorManager& desc_manager,
|
||||
RenderpassCache& renderpass_cache)
|
||||
: scheduler{scheduler}, desc_manager{desc_manager},
|
||||
renderpass_cache{renderpass_cache}, device{instance.GetDevice()} {
|
||||
constexpr std::string_view cs_source = R"(
|
||||
#version 450 core
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
@ -166,14 +168,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
|
||||
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
|
||||
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
|
||||
|
||||
renderpass_cache.ExitRenderpass();
|
||||
scheduler.Record([this, set, blit, src_image = source.alloc.image,
|
||||
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) {
|
||||
const std::array pre_barriers = {
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
@ -203,12 +204,13 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
}};
|
||||
},
|
||||
};
|
||||
const std::array post_barriers = {
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead,
|
||||
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
@ -225,7 +227,7 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
|
||||
},
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
@ -239,9 +241,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
}};
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
|
||||
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
|
||||
0, set, {});
|
||||
@ -254,8 +257,10 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
|
||||
cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests |
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -14,12 +14,15 @@ namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class DescriptorManager;
|
||||
class RenderpassCache;
|
||||
class Scheduler;
|
||||
class Surface;
|
||||
|
||||
class BlitHelper {
|
||||
public:
|
||||
BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager);
|
||||
BlitHelper(const Instance& instance, Scheduler& scheduler,
|
||||
DescriptorManager& desc_manager,
|
||||
RenderpassCache& renderpass_cache);
|
||||
~BlitHelper();
|
||||
|
||||
/// Blits D24S8 pixel data to the provided buffer
|
||||
@ -29,6 +32,7 @@ public:
|
||||
private:
|
||||
Scheduler& scheduler;
|
||||
DescriptorManager& desc_manager;
|
||||
RenderpassCache& renderpass_cache;
|
||||
vk::Device device;
|
||||
vk::Pipeline compute_pipeline;
|
||||
vk::PipelineLayout compute_pipeline_layout;
|
||||
|
@ -166,29 +166,44 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
|
||||
runtime.GetRenderpassCache().ExitRenderpass();
|
||||
scheduler.Record([this, set, src_rect, src_image = source.alloc.image,
|
||||
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) {
|
||||
const vk::ImageMemoryBarrier pre_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
const std::array pre_barriers = {
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
},
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderWrite,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = dst_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
}
|
||||
};
|
||||
const std::array post_barriers = {
|
||||
vk::ImageMemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eShaderRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eShaderWrite |
|
||||
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
|
||||
.oldLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
@ -218,11 +233,14 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
}};
|
||||
},
|
||||
};
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests,
|
||||
vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barriers);
|
||||
|
||||
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
|
||||
0, set, {});
|
||||
@ -235,8 +253,10 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
|
||||
cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
|
||||
|
||||
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
|
||||
vk::PipelineStageFlagBits::eAllCommands,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
|
||||
vk::PipelineStageFlagBits::eFragmentShader |
|
||||
vk::PipelineStageFlagBits::eEarlyFragmentTests |
|
||||
vk::PipelineStageFlagBits::eLateFragmentTests,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,7 @@ namespace Vulkan {
|
||||
|
||||
using TriangleTopology = Pica::PipelineRegs::TriangleTopology;
|
||||
|
||||
constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
constexpr u64 STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
@ -65,7 +65,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
|
||||
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
|
||||
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
|
||||
stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE},
|
||||
stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
|
||||
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
|
||||
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
|
||||
|
||||
@ -751,7 +751,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
pipeline_cache.UseTrivialGeometryShader();
|
||||
pipeline_cache.BindPipeline(pipeline_info);
|
||||
|
||||
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex);
|
||||
const u32 max_vertices = STREAM_BUFFER_SIZE / sizeof(HardwareVertex);
|
||||
const u32 batch_size = static_cast<u32>(vertex_batch.size());
|
||||
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
|
||||
const u32 vertices = std::min(max_vertices, batch_size - base_vertex);
|
||||
|
@ -109,7 +109,7 @@ constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
||||
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager},
|
||||
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager, renderpass_cache},
|
||||
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
|
||||
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
|
||||
DOWNLOAD_BUFFER_SIZE, true} {
|
||||
@ -840,41 +840,6 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
|
||||
}
|
||||
|
||||
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
|
||||
/*renderpass_cache.ExitRenderpass();
|
||||
|
||||
// TODO: Investigate AMD single pass downsampler
|
||||
s32 current_width = surface.GetScaledWidth();
|
||||
s32 current_height = surface.GetScaledHeight();
|
||||
|
||||
const u32 levels = std::bit_width(std::max(surface.width, surface.height));
|
||||
vk::ImageAspectFlags aspect = ToVkAspect(surface.type);
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
for (u32 i = 1; i < levels; i++) {
|
||||
surface.Transition(vk::ImageLayout::eTransferSrcOptimal, i - 1, 1);
|
||||
surface.Transition(vk::ImageLayout::eTransferDstOptimal, i, 1);
|
||||
|
||||
const std::array source_offsets = {vk::Offset3D{0, 0, 0},
|
||||
vk::Offset3D{current_width, current_height, 1}};
|
||||
|
||||
const std::array dest_offsets = {
|
||||
vk::Offset3D{0, 0, 0}, vk::Offset3D{current_width > 1 ? current_width / 2 : 1,
|
||||
current_height > 1 ? current_height / 2 : 1, 1}};
|
||||
|
||||
const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = aspect,
|
||||
.mipLevel = i - 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1},
|
||||
.srcOffsets = source_offsets,
|
||||
.dstSubresource = {.aspectMask = aspect,
|
||||
.mipLevel = i,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1},
|
||||
.dstOffsets = dest_offsets};
|
||||
|
||||
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
|
||||
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
|
||||
blit_area, vk::Filter::eLinear);
|
||||
}*/
|
||||
}
|
||||
|
||||
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(
|
||||
|
Reference in New Issue
Block a user