renderer_vulkan: Add pipeline barriers for attachments

This commit is contained in:
GPUCode
2023-02-01 22:31:37 +02:00
parent df7f1b13cb
commit d7bf139e85
10 changed files with 269 additions and 174 deletions

View File

@@ -28,7 +28,6 @@
#include <vk_mem_alloc.h> #include <vk_mem_alloc.h>
MICROPROFILE_DEFINE(Vulkan_RenderFrame, "Vulkan", "Render Frame", MP_RGB(128, 128, 64)); MICROPROFILE_DEFINE(Vulkan_RenderFrame, "Vulkan", "Render Frame", MP_RGB(128, 128, 64));
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
MICROPROFILE_DEFINE(Vulkan_SwapchainCopy, "Vulkan", "Swapchain Copy", MP_RGB(64, 64, 0)); MICROPROFILE_DEFINE(Vulkan_SwapchainCopy, "Vulkan", "Swapchain Copy", MP_RGB(64, 64, 0));
namespace Vulkan { namespace Vulkan {
@@ -195,72 +194,57 @@ void RendererVulkan::PrepareRendertarget() {
void RendererVulkan::RenderToMailbox(const Layout::FramebufferLayout& layout, void RendererVulkan::RenderToMailbox(const Layout::FramebufferLayout& layout,
std::unique_ptr<Frontend::TextureMailbox>& mailbox, std::unique_ptr<Frontend::TextureMailbox>& mailbox,
bool flipped) { bool flipped) {
const vk::Device device = instance.GetDevice(); Frontend::Frame* frame = mailbox->GetRenderFrame();
Frontend::Frame* frame; MICROPROFILE_SCOPE(Vulkan_RenderFrame);
{
MICROPROFILE_SCOPE(Vulkan_WaitPresent);
frame = mailbox->GetRenderFrame();
std::scoped_lock lock{frame->fence_mutex}; const auto [width, height] = swapchain.GetExtent();
[[maybe_unused]] vk::Result result = if (width != frame->width || height != frame->height) {
device.waitForFences(frame->present_done, false, std::numeric_limits<u64>::max()); mailbox->ReloadRenderFrame(frame, width, height);
device.resetFences(frame->present_done);
} }
{ scheduler.Record([layout](vk::CommandBuffer cmdbuf) {
MICROPROFILE_SCOPE(Vulkan_RenderFrame); const vk::Viewport viewport = {
.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(layout.width),
.height = static_cast<float>(layout.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const auto [width, height] = swapchain.GetExtent(); const vk::Rect2D scissor = {
if (width != frame->width || height != frame->height) { .offset = {0, 0},
mailbox->ReloadRenderFrame(frame, width, height); .extent = {layout.width, layout.height},
} };
scheduler.Record([layout](vk::CommandBuffer cmdbuf) { cmdbuf.setViewport(0, viewport);
const vk::Viewport viewport = { cmdbuf.setScissor(0, scissor);
.x = 0.0f, });
.y = 0.0f,
.width = static_cast<float>(layout.width),
.height = static_cast<float>(layout.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const vk::Rect2D scissor = { renderpass_cache.ExitRenderpass();
.offset = {0, 0}, scheduler.Record([this, framebuffer = frame->framebuffer, width = frame->width,
.extent = {layout.width, layout.height}, height = frame->height](vk::CommandBuffer cmdbuf) {
}; const vk::ClearValue clear{.color = clear_color};
const vk::RenderPassBeginInfo renderpass_begin_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
.framebuffer = framebuffer,
.renderArea =
vk::Rect2D{
.offset = {0, 0},
.extent = {width, height},
},
.clearValueCount = 1,
.pClearValues = &clear,
};
cmdbuf.setViewport(0, viewport); cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
cmdbuf.setScissor(0, scissor); });
});
renderpass_cache.ExitRenderpass(); DrawScreens(layout, flipped);
scheduler.Record([this, framebuffer = frame->framebuffer, width = frame->width, scheduler.Flush(frame->render_ready);
height = frame->height](vk::CommandBuffer cmdbuf) { scheduler.Record([&mailbox, frame](vk::CommandBuffer) { mailbox->ReleaseRenderFrame(frame); });
const vk::ClearValue clear{.color = clear_color}; scheduler.DispatchWork();
const vk::RenderPassBeginInfo renderpass_begin_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
.framebuffer = framebuffer,
.renderArea =
vk::Rect2D{
.offset = {0, 0},
.extent = {width, height},
},
.clearValueCount = 1,
.pClearValues = &clear,
};
cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
});
DrawScreens(layout, flipped);
scheduler.Flush(frame->render_ready);
scheduler.Record(
[&mailbox, frame](vk::CommandBuffer) { mailbox->ReleaseRenderFrame(frame); });
scheduler.DispatchWork();
}
} }
void RendererVulkan::BeginRendering() { void RendererVulkan::BeginRendering() {
@@ -1100,7 +1084,7 @@ void RendererVulkan::TryPresent(int timeout_ms, bool is_secondary) {
cmdbuf.end(); cmdbuf.end();
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = { static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eColorAttachmentOutput, vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands,
}; };

View File

@@ -318,12 +318,12 @@ bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest,
const std::array textures = { const std::array textures = {
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.sampler = nearest_sampler, .sampler = nearest_sampler,
.imageView = source.GetDepthView(), .imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eGeneral, .imageLayout = vk::ImageLayout::eGeneral,
}, },
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.sampler = nearest_sampler, .sampler = nearest_sampler,
.imageView = source.GetStencilView(), .imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eGeneral, .imageLayout = vk::ImageLayout::eGeneral,
}, },
}; };
@@ -348,15 +348,15 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) { const VideoCore::TextureBlit& blit) {
const std::array textures = { const std::array textures = {
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.imageView = source.GetDepthView(), .imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
}, },
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.imageView = source.GetStencilView(), .imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
}, },
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.imageView = dest.GetImageView(), .imageView = dest.ImageView(),
.imageLayout = vk::ImageLayout::eGeneral, .imageLayout = vk::ImageLayout::eGeneral,
}, },
}; };
@@ -365,8 +365,8 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
device.updateDescriptorSetWithTemplate(set, compute_update_template, textures[0]); device.updateDescriptorSetWithTemplate(set, compute_update_template, textures[0]);
renderpass_cache.ExitRenderpass(); renderpass_cache.ExitRenderpass();
scheduler.Record([this, set, blit, src_image = source.alloc.image, scheduler.Record([this, set, blit, src_image = source.Image(),
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) { dst_image = dest.Image()](vk::CommandBuffer cmdbuf) {
const std::array pre_barriers = { const std::array pre_barriers = {
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,

View File

@@ -147,15 +147,15 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
VideoCore::Rect2D dst_rect) { VideoCore::Rect2D dst_rect) {
const std::array textures = { const std::array textures = {
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.imageView = source.GetDepthView(), .imageView = source.DepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
}, },
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.imageView = source.GetStencilView(), .imageView = source.StencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal, .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal,
}, },
vk::DescriptorImageInfo{ vk::DescriptorImageInfo{
.imageView = dest.GetImageView(), .imageView = dest.ImageView(),
.imageLayout = vk::ImageLayout::eGeneral, .imageLayout = vk::ImageLayout::eGeneral,
}, },
}; };
@@ -164,8 +164,8 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
runtime.GetRenderpassCache().ExitRenderpass(); runtime.GetRenderpassCache().ExitRenderpass();
scheduler.Record([this, set, src_rect, src_image = source.alloc.image, scheduler.Record([this, set, src_rect, src_image = source.Image(),
dst_image = dest.alloc.image](vk::CommandBuffer cmdbuf) { dst_image = dest.Image()](vk::CommandBuffer cmdbuf) {
const std::array pre_barriers = { const std::array pre_barriers = {
vk::ImageMemoryBarrier{ vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite, .srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,

View File

@@ -607,7 +607,8 @@ void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const vk::Device device = instance.GetDevice(); const vk::Device device = instance.GetDevice();
// When using SPIR-V emit the fragment shader on the main thread // When using SPIR-V emit the fragment shader on the main thread
// since it's quite fast. This also heavily reduces flicker // since it's quite fast. This also heavily reduces flicker when
// using asychronous shader compilation
if (emit_spirv) { if (emit_spirv) {
const std::vector code = GenerateFragmentShaderSPV(config); const std::vector code = GenerateFragmentShaderSPV(config);
shader.module = CompileSPV(code, device); shader.module = CompileSPV(code, device);

View File

@@ -123,12 +123,12 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
pipeline_cache.BindTexelBuffer(4, texture_rgba_view); pipeline_cache.BindTexelBuffer(4, texture_rgba_view);
for (u32 i = 0; i < 4; i++) { for (u32 i = 0; i < 4; i++) {
pipeline_cache.BindTexture(i, null_surface.GetImageView()); pipeline_cache.BindTexture(i, null_surface.ImageView());
pipeline_cache.BindSampler(i, default_sampler); pipeline_cache.BindSampler(i, default_sampler);
} }
for (u32 i = 0; i < 7; i++) { for (u32 i = 0; i < 7; i++) {
pipeline_cache.BindStorageImage(i, null_storage_surface.GetImageView()); pipeline_cache.BindStorageImage(i, null_storage_surface.ImageView());
} }
// Explicitly call the derived version to avoid warnings about calling virtual // Explicitly call the derived version to avoid warnings about calling virtual
@@ -542,9 +542,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
const u32 binding = static_cast<u32>(face); const u32 binding = static_cast<u32>(face);
if (surface) { if (surface) {
pipeline_cache.BindStorageImage(binding, surface->GetImageView()); pipeline_cache.BindStorageImage(binding, surface->ImageView());
} else { } else {
pipeline_cache.BindStorageImage(binding, null_storage_surface.GetImageView()); pipeline_cache.BindStorageImage(binding, null_storage_surface.ImageView());
} }
}; };
@@ -585,9 +585,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
case TextureType::Shadow2D: { case TextureType::Shadow2D: {
auto surface = res_cache.GetTextureSurface(texture); auto surface = res_cache.GetTextureSurface(texture);
if (surface) { if (surface) {
pipeline_cache.BindStorageImage(0, surface->GetStorageView()); pipeline_cache.BindStorageImage(0, surface->StorageView());
} else { } else {
pipeline_cache.BindStorageImage(0, null_storage_surface.GetImageView()); pipeline_cache.BindStorageImage(0, null_storage_surface.ImageView());
} }
continue; continue;
} }
@@ -617,9 +617,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureCube(config); auto surface = res_cache.GetTextureCube(config);
if (surface) { if (surface) {
pipeline_cache.BindTexture(3, surface->GetImageView()); pipeline_cache.BindTexture(3, surface->ImageView());
} else { } else {
pipeline_cache.BindTexture(3, null_surface.GetImageView()); pipeline_cache.BindTexture(3, null_surface.ImageView());
} }
BindSampler(3, texture_cube_sampler, texture.config); BindSampler(3, texture_cube_sampler, texture.config);
@@ -635,7 +635,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
auto surface = res_cache.GetTextureSurface(texture); auto surface = res_cache.GetTextureSurface(texture);
if (surface) { if (surface) {
if (color_surface && color_surface->GetImageView() == surface->GetImageView()) { if (color_surface && color_surface->ImageView() == surface->ImageView()) {
Surface temp{*color_surface, runtime}; Surface temp{*color_surface, runtime};
const VideoCore::TextureCopy copy = { const VideoCore::TextureCopy copy = {
.src_level = 0, .src_level = 0,
@@ -647,9 +647,9 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
.extent = VideoCore::Extent{temp.GetScaledWidth(), temp.GetScaledHeight()}}; .extent = VideoCore::Extent{temp.GetScaledWidth(), temp.GetScaledHeight()}};
runtime.CopyTextures(*color_surface, temp, copy); runtime.CopyTextures(*color_surface, temp, copy);
pipeline_cache.BindTexture(texture_index, temp.GetImageView()); pipeline_cache.BindTexture(texture_index, temp.ImageView());
} else { } else {
pipeline_cache.BindTexture(texture_index, surface->GetImageView()); pipeline_cache.BindTexture(texture_index, surface->ImageView());
} }
} else { } else {
@@ -660,10 +660,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
// the geometry in question. // the geometry in question.
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
// on the male character's face, which in the OpenGL default appear black. // on the male character's face, which in the OpenGL default appear black.
pipeline_cache.BindTexture(texture_index, null_surface.GetImageView()); pipeline_cache.BindTexture(texture_index, null_surface.ImageView());
} }
} else { } else {
pipeline_cache.BindTexture(texture_index, null_surface.GetImageView()); pipeline_cache.BindTexture(texture_index, null_surface.ImageView());
pipeline_cache.BindSampler(texture_index, default_sampler); pipeline_cache.BindSampler(texture_index, default_sampler);
} }
} }
@@ -1016,7 +1016,7 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
(float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width, (float)src_rect.bottom / (float)scaled_height, (float)src_rect.left / (float)scaled_width,
(float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width); (float)src_rect.top / (float)scaled_height, (float)src_rect.right / (float)scaled_width);
screen_info.image_view = src_surface->GetImageView(); screen_info.image_view = src_surface->ImageView();
return true; return true;
} }

View File

@@ -48,6 +48,7 @@ void RenderpassCache::EnterRenderpass(Surface* const color, Surface* const depth
u32 height = UINT32_MAX; u32 height = UINT32_MAX;
u32 cursor = 0; u32 cursor = 0;
std::array<VideoCore::PixelFormat, 2> formats{}; std::array<VideoCore::PixelFormat, 2> formats{};
std::array<vk::Image, 2> images{};
std::array<vk::ImageView, 2> views{}; std::array<vk::ImageView, 2> views{};
const auto Prepare = [&](Surface* const surface) { const auto Prepare = [&](Surface* const surface) {
@@ -59,12 +60,37 @@ void RenderpassCache::EnterRenderpass(Surface* const color, Surface* const depth
width = std::min(width, surface->GetScaledWidth()); width = std::min(width, surface->GetScaledWidth());
height = std::min(height, surface->GetScaledHeight()); height = std::min(height, surface->GetScaledHeight());
formats[cursor] = surface->pixel_format; formats[cursor] = surface->pixel_format;
views[cursor++] = surface->GetFramebufferView(); images[cursor] = surface->Image();
views[cursor++] = surface->FramebufferView();
}; };
Prepare(color); Prepare(color);
Prepare(depth_stencil); Prepare(depth_stencil);
const RenderingInfo new_info = {
.color =
RenderTarget{
.aspect = vk::ImageAspectFlagBits::eColor,
.image = images[0],
.image_view = views[0],
},
.depth =
RenderTarget{
.aspect = depth_stencil ? depth_stencil->Aspect() : vk::ImageAspectFlagBits::eDepth,
.image = images[1],
.image_view = views[1],
},
.render_area = render_area,
.clear = clear,
.do_clear = do_clear,
};
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Renderpass);
if (info == new_info && rendering && !is_dirty) {
cmd_count++;
return;
}
const vk::RenderPass renderpass = GetRenderpass(formats[0], formats[1], do_clear); const vk::RenderPass renderpass = GetRenderpass(formats[0], formats[1], do_clear);
const FramebufferInfo framebuffer_info = { const FramebufferInfo framebuffer_info = {
@@ -79,38 +105,24 @@ void RenderpassCache::EnterRenderpass(Surface* const color, Surface* const depth
it->second = CreateFramebuffer(framebuffer_info, renderpass); it->second = CreateFramebuffer(framebuffer_info, renderpass);
} }
const RenderpassState new_state = {
.renderpass = renderpass,
.framebuffer = it->second,
.render_area = render_area,
.clear = clear,
};
const u64 new_state_hash = Common::ComputeStructHash64(new_state);
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Renderpass);
if (state_hash == new_state_hash && rendering && !is_dirty) {
cmd_count++;
return;
}
if (rendering) { if (rendering) {
ExitRenderpass(); ExitRenderpass();
} }
scheduler.Record(
[render_area, clear, renderpass, framebuffer = it->second](vk::CommandBuffer cmdbuf) {
const vk::RenderPassBeginInfo renderpass_begin_info = {
.renderPass = renderpass,
.framebuffer = framebuffer,
.renderArea = render_area,
.clearValueCount = 1,
.pClearValues = &clear,
};
scheduler.Record([new_state](vk::CommandBuffer cmdbuf) { cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
const vk::RenderPassBeginInfo renderpass_begin_info = { });
.renderPass = new_state.renderpass,
.framebuffer = new_state.framebuffer,
.renderArea = new_state.render_area,
.clearValueCount = 1,
.pClearValues = &new_state.clear,
};
cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
});
scheduler.MarkStateNonDirty(StateFlags::Renderpass); scheduler.MarkStateNonDirty(StateFlags::Renderpass);
state_hash = new_state_hash; info = new_info;
rendering = true; rendering = true;
} }
@@ -120,13 +132,65 @@ void RenderpassCache::ExitRenderpass() {
} }
rendering = false; rendering = false;
scheduler.Record([dynamic_rendering = dynamic_rendering](vk::CommandBuffer cmdbuf) { scheduler.Record(
if (dynamic_rendering) { [info = info, dynamic_rendering = dynamic_rendering](vk::CommandBuffer cmdbuf) {
cmdbuf.endRenderingKHR(); u32 num_barriers = 0;
} else { std::array<vk::ImageMemoryBarrier, 2> barriers;
cmdbuf.endRenderPass(); vk::PipelineStageFlags src_stage{};
} vk::PipelineStageFlags dst_stage{};
});
if (info.color) {
barriers[num_barriers++] = vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eColorAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eShaderRead,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = info.color.image,
.subresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
src_stage |= vk::PipelineStageFlagBits::eColorAttachmentOutput;
dst_stage |= vk::PipelineStageFlagBits::eFragmentShader;
}
if (info.depth) {
barriers[num_barriers++] = vk::ImageMemoryBarrier{
.srcAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.dstAccessMask = vk::AccessFlagBits::eDepthStencilAttachmentRead |
vk::AccessFlagBits::eDepthStencilAttachmentWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = info.depth.image,
.subresourceRange{
.aspectMask = info.depth.aspect,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
src_stage |= vk::PipelineStageFlagBits::eEarlyFragmentTests |
vk::PipelineStageFlagBits::eLateFragmentTests;
dst_stage |= vk::PipelineStageFlagBits::eLateFragmentTests;
}
if (dynamic_rendering) {
cmdbuf.endRenderingKHR();
} else {
cmdbuf.endRenderPass();
}
cmdbuf.pipelineBarrier(src_stage, dst_stage, vk::DependencyFlagBits::eByRegion, 0,
nullptr, 0, nullptr, num_barriers, barriers.data());
});
// The Mali guide recommends flushing at the end of each major renderpass // The Mali guide recommends flushing at the end of each major renderpass
// Testing has shown this has a significant effect on rendering performance // Testing has shown this has a significant effect on rendering performance
@@ -138,33 +202,40 @@ void RenderpassCache::ExitRenderpass() {
void RenderpassCache::BeginRendering(Surface* const color, Surface* const depth_stencil, void RenderpassCache::BeginRendering(Surface* const color, Surface* const depth_stencil,
vk::Rect2D render_area, bool do_clear, vk::ClearValue clear) { vk::Rect2D render_area, bool do_clear, vk::ClearValue clear) {
RenderingState new_state = { RenderingInfo new_info = {
.render_area = render_area, .render_area = render_area,
.clear = clear, .clear = clear,
.do_clear = do_clear, .do_clear = do_clear,
}; };
if (color) { if (color) {
new_state.color_view = color->GetFramebufferView(); new_info.color = RenderTarget{
.aspect = vk::ImageAspectFlagBits::eColor,
.image = color->Image(),
.image_view = color->FramebufferView(),
};
} }
if (depth_stencil) { if (depth_stencil) {
new_state.depth_view = depth_stencil->GetFramebufferView(); new_info.depth = RenderTarget{
.aspect = depth_stencil->Aspect(),
.image = depth_stencil->Image(),
.image_view = depth_stencil->FramebufferView(),
};
} }
const u64 new_state_hash = Common::ComputeStructHash64(new_state);
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Renderpass); const bool is_dirty = scheduler.IsStateDirty(StateFlags::Renderpass);
if (state_hash == new_state_hash && rendering && !is_dirty) { if (info == new_info && rendering && !is_dirty) {
cmd_count++; cmd_count++;
return; return;
} }
const bool has_stencil =
depth_stencil && depth_stencil->type == VideoCore::SurfaceType::DepthStencil;
if (rendering) { if (rendering) {
ExitRenderpass(); ExitRenderpass();
} }
scheduler.Record([new_info, has_stencil](vk::CommandBuffer cmdbuf) {
const bool has_stencil =
depth_stencil && depth_stencil->type == VideoCore::SurfaceType::DepthStencil;
scheduler.Record([new_state, has_stencil](vk::CommandBuffer cmdbuf) {
u32 cursor = 0; u32 cursor = 0;
std::array<vk::RenderingAttachmentInfoKHR, 2> infos{}; std::array<vk::RenderingAttachmentInfoKHR, 2> infos{};
@@ -178,21 +249,20 @@ void RenderpassCache::BeginRendering(Surface* const color, Surface* const depth_
.imageView = image_view, .imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral, .imageLayout = vk::ImageLayout::eGeneral,
.loadOp = .loadOp =
new_state.do_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad, new_info.do_clear ? vk::AttachmentLoadOp::eClear : vk::AttachmentLoadOp::eLoad,
.storeOp = vk::AttachmentStoreOp::eStore, .storeOp = vk::AttachmentStoreOp::eStore,
.clearValue = new_state.clear, .clearValue = new_info.clear,
}; };
}; };
Prepare(new_state.color_view); Prepare(new_info.color.image_view);
Prepare(new_state.depth_view); Prepare(new_info.depth.image_view);
const u32 color_attachment_count = new_state.color_view ? 1u : 0u; const u32 color_attachment_count = new_info.color ? 1u : 0u;
const vk::RenderingAttachmentInfoKHR* depth_info = const vk::RenderingAttachmentInfoKHR* depth_info = new_info.depth ? &infos[1] : nullptr;
new_state.depth_view ? &infos[1] : nullptr;
const vk::RenderingAttachmentInfoKHR* stencil_info = has_stencil ? &infos[1] : nullptr; const vk::RenderingAttachmentInfoKHR* stencil_info = has_stencil ? &infos[1] : nullptr;
const vk::RenderingInfoKHR rendering_info = { const vk::RenderingInfoKHR rendering_info = {
.renderArea = new_state.render_area, .renderArea = new_info.render_area,
.layerCount = 1, .layerCount = 1,
.colorAttachmentCount = color_attachment_count, .colorAttachmentCount = color_attachment_count,
.pColorAttachments = &infos[0], .pColorAttachments = &infos[0],
@@ -204,7 +274,7 @@ void RenderpassCache::BeginRendering(Surface* const color, Surface* const depth_
}); });
scheduler.MarkStateNonDirty(StateFlags::Renderpass); scheduler.MarkStateNonDirty(StateFlags::Renderpass);
state_hash = new_state_hash; info = new_info;
rendering = true; rendering = true;
} }

View File

@@ -40,8 +40,8 @@ struct hash<Vulkan::FramebufferInfo> {
namespace Vulkan { namespace Vulkan {
class RenderpassCache { class RenderpassCache {
static constexpr u32 MAX_COLOR_FORMATS = 5; static constexpr std::size_t MAX_COLOR_FORMATS = 5;
static constexpr u32 MAX_DEPTH_FORMATS = 4; static constexpr std::size_t MAX_DEPTH_FORMATS = 4;
public: public:
RenderpassCache(const Instance& instance, Scheduler& scheduler); RenderpassCache(const Instance& instance, Scheduler& scheduler);
@@ -80,26 +80,31 @@ private:
vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info, vk::RenderPass renderpass); vk::Framebuffer CreateFramebuffer(const FramebufferInfo& info, vk::RenderPass renderpass);
private: private:
struct RenderpassState { struct RenderTarget {
vk::RenderPass renderpass; vk::ImageAspectFlags aspect;
vk::Framebuffer framebuffer; vk::Image image;
vk::Rect2D render_area; vk::ImageView image_view;
vk::ClearValue clear;
[[nodiscard]] bool operator==(const RenderpassState& other) const { operator bool() const noexcept {
return std::memcmp(this, &other, sizeof(RenderpassState)) == 0; return image;
}
[[nodiscard]] bool operator==(const RenderTarget& other) const {
return image_view == other.image_view;
} }
}; };
struct RenderingState { struct RenderingInfo {
vk::ImageView color_view; RenderTarget color;
vk::ImageView depth_view; RenderTarget depth;
vk::Rect2D render_area; vk::Rect2D render_area;
vk::ClearValue clear; vk::ClearValue clear;
bool do_clear; bool do_clear;
[[nodiscard]] bool operator==(const RenderpassState& other) const { [[nodiscard]] bool operator==(const RenderingInfo& other) const {
return std::memcmp(this, &other, sizeof(RenderpassState)) == 0; return color == other.color && depth == other.depth &&
render_area == other.render_area && do_clear == other.do_clear &&
std::memcmp(&clear, &other.clear, sizeof(vk::ClearValue)) == 0;
} }
}; };
@@ -108,10 +113,10 @@ private:
vk::RenderPass present_renderpass{}; vk::RenderPass present_renderpass{};
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2]; vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2];
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers; std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
RenderingInfo info{};
bool rendering = false; bool rendering = false;
bool dynamic_rendering = false; bool dynamic_rendering = false;
u32 cmd_count{}; u32 cmd_count{};
u64 state_hash{};
}; };
} // namespace Vulkan } // namespace Vulkan

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -9,6 +10,8 @@
#include <vk_mem_alloc.h> #include <vk_mem_alloc.h>
MICROPROFILE_DEFINE(Vulkan_WaitPresent, "Vulkan", "Wait For Present", MP_RGB(128, 128, 128));
namespace Vulkan { namespace Vulkan {
TextureMailbox::TextureMailbox(const Instance& instance_, const Swapchain& swapchain_, TextureMailbox::TextureMailbox(const Instance& instance_, const Swapchain& swapchain_,
@@ -129,14 +132,44 @@ void TextureMailbox::ReloadRenderFrame(Frontend::Frame* frame, u32 width, u32 he
} }
Frontend::Frame* TextureMailbox::GetRenderFrame() { Frontend::Frame* TextureMailbox::GetRenderFrame() {
std::unique_lock lock{free_mutex}; MICROPROFILE_SCOPE(Vulkan_WaitPresent);
if (free_queue.empty()) { Frontend::Frame* frame{};
free_cv.wait(lock, [&] { return !free_queue.empty(); }); {
std::unique_lock lock{free_mutex};
if (free_queue.empty()) {
free_cv.wait(lock, [&] { return !free_queue.empty(); });
}
frame = free_queue.front();
free_queue.pop();
} }
Frontend::Frame* frame = free_queue.front(); std::scoped_lock lock{frame->fence_mutex};
free_queue.pop();
vk::Device device = instance.GetDevice();
vk::Result result{};
const auto Wait = [&]() {
result = device.waitForFences(frame->present_done, false, std::numeric_limits<u64>::max());
return result;
};
while (Wait() != vk::Result::eSuccess) {
// Retry if the waiting time out
if (result == vk::Result::eTimeout) {
continue;
}
// eErrorInitializationFailed occurs on Mali GPU drivers due to them
// using the ppoll() syscall which isn't correctly restarted after a signal,
// we need to manually retry waiting in that case
if (result == vk::Result::eErrorInitializationFailed) {
continue;
}
}
device.resetFences(frame->present_done);
return frame; return frame;
} }

View File

@@ -935,7 +935,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
// to avoid having the interleave the data later. These should(?) be // to avoid having the interleave the data later. These should(?) be
// uncommon anyways and the perf hit is very small // uncommon anyways and the perf hit is very small
if (type == VideoCore::SurfaceType::DepthStencil) { if (type == VideoCore::SurfaceType::DepthStencil) {
return DepthStencilDownload(download, staging); return /*DepthStencilDownload(download, staging)*/;
} }
const bool is_scaled = res_scale != 1; const bool is_scaled = res_scale != 1;

View File

@@ -162,7 +162,6 @@ private:
class Surface : public VideoCore::SurfaceBase<Surface> { class Surface : public VideoCore::SurfaceBase<Surface> {
friend class TextureRuntime; friend class TextureRuntime;
friend class RasterizerVulkan;
public: public:
Surface(TextureRuntime& runtime); Surface(TextureRuntime& runtime);
@@ -186,29 +185,39 @@ public:
/// Returns the pipeline stage flags indicative of the surface /// Returns the pipeline stage flags indicative of the surface
vk::PipelineStageFlags PipelineStageFlags() const noexcept; vk::PipelineStageFlags PipelineStageFlags() const noexcept;
/// Returns the surface aspect
vk::ImageAspectFlags Aspect() const noexcept {
return alloc.aspect;
}
/// Returns the surface image handle
vk::Image Image() const noexcept {
return alloc.image;
}
/// Returns an image view used to sample the surface from a shader /// Returns an image view used to sample the surface from a shader
vk::ImageView GetImageView() const noexcept { vk::ImageView ImageView() const noexcept {
return alloc.image_view; return alloc.image_view;
} }
/// Returns an image view used to create a framebuffer /// Returns an image view used to create a framebuffer
vk::ImageView GetFramebufferView() noexcept { vk::ImageView FramebufferView() noexcept {
is_framebuffer = true; is_framebuffer = true;
return alloc.base_view; return alloc.base_view;
} }
/// Returns the depth only image view of the surface, null otherwise /// Returns the depth only image view of the surface, null otherwise
vk::ImageView GetDepthView() const noexcept { vk::ImageView DepthView() const noexcept {
return alloc.depth_view; return alloc.depth_view;
} }
/// Returns the stencil only image view of the surface, null otherwise /// Returns the stencil only image view of the surface, null otherwise
vk::ImageView GetStencilView() const noexcept { vk::ImageView StencilView() const noexcept {
return alloc.stencil_view; return alloc.stencil_view;
} }
/// Returns the R32 image view used for atomic load/store /// Returns the R32 image view used for atomic load/store
vk::ImageView GetStorageView() noexcept { vk::ImageView StorageView() noexcept {
if (!alloc.storage_view) { if (!alloc.storage_view) {
LOG_CRITICAL(Render_Vulkan, LOG_CRITICAL(Render_Vulkan,
"Surface with pixel format {} and internal format {} " "Surface with pixel format {} and internal format {} "
@@ -220,11 +229,6 @@ public:
return alloc.storage_view; return alloc.storage_view;
} }
/// Returns the internal format of the allocated texture
vk::Format GetInternalFormat() const noexcept {
return alloc.format;
}
private: private:
/// Uploads pixel data to scaled texture /// Uploads pixel data to scaled texture
void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
@@ -240,12 +244,10 @@ private:
TextureRuntime& runtime; TextureRuntime& runtime;
const Instance& instance; const Instance& instance;
Scheduler& scheduler; Scheduler& scheduler;
public:
bool is_framebuffer{};
bool is_storage{};
ImageAlloc alloc; ImageAlloc alloc;
FormatTraits traits; FormatTraits traits;
bool is_framebuffer{};
bool is_storage{};
}; };
struct Traits { struct Traits {