renderer_vulkan: Emulate depth-stencil blits with VK_EXT_shader_stencil_export

* Should fix depth blits on AMD
This commit is contained in:
GPUCode
2023-01-28 20:20:54 +02:00
parent 69b66cb41d
commit b18710e4df
11 changed files with 494 additions and 124 deletions

View File

@ -12,14 +12,17 @@ set(SHADER_FILES
texture_filtering/xbrz_freescale.vert
texture_filtering/x_gradient.frag
texture_filtering/y_gradient.frag
full_screen_triangle.vert
opengl_present.frag
opengl_present.vert
opengl_present_anaglyph.frag
opengl_present_interlaced.frag
vulkan_d32s8_to_r32.comp
vulkan_present.frag
vulkan_present.vert
vulkan_present_anaglyph.frag
vulkan_present_interlaced.frag
vulkan_blit_depth_stencil.frag
)
find_program(GLSLANGVALIDATOR "glslangValidator")

View File

@ -0,0 +1,29 @@
// Copyright 2020 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 460 core
#ifdef VULKAN
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
#define END_PUSH_CONSTANTS };
#define UNIFORM(n)
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
#define BEGIN_PUSH_CONSTANTS
#define END_PUSH_CONSTANTS
#define UNIFORM(n) layout (location = n) uniform
#endif
BEGIN_PUSH_CONSTANTS
UNIFORM(0) vec2 tex_scale;
UNIFORM(1) vec2 tex_offset;
END_PUSH_CONSTANTS
layout(location = 0) out vec2 texcoord;
void main() {
float x = float((gl_VertexIndex & 1) << 2);
float y = float((gl_VertexIndex & 2) << 1);
gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
}

View File

@ -0,0 +1,16 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 450 core
#extension GL_ARB_shader_stencil_export : require
layout(binding = 0) uniform sampler2D depth_tex;
layout(binding = 1) uniform usampler2D stencil_tex;
layout(location = 0) in vec2 texcoord;
void main() {
gl_FragDepth = textureLod(depth_tex, texcoord, 0).r;
gl_FragStencilRefARB = int(textureLod(stencil_tex, texcoord, 0).r);
}

View File

@ -0,0 +1,24 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding = 0) uniform highp texture2D depth;
layout(binding = 1) uniform lowp utexture2D stencil;
layout(binding = 2, r32ui) uniform highp writeonly uimage2D color;
layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset;
};
void main() {
ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy);
ivec2 tex_coord = src_offset + dst_coord;
highp uint depth_val = uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uint value = stencil_val | (depth_val << 8);
imageStore(color, dst_coord, uvec4(value));
}

View File

@ -11,140 +11,337 @@
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
#include "video_core/host_shaders/vulkan_d32s8_to_r32_comp_spv.h"
namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager, RenderpassCache& renderpass_cache)
: scheduler{scheduler}, desc_manager{desc_manager},
renderpass_cache{renderpass_cache}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform highp texture2D depth;
layout(set = 0, binding = 1) uniform lowp utexture2D stencil;
layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color;
layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset;
namespace {
struct PushConstants {
std::array<float, 2> tex_scale;
std::array<float, 2> tex_offset;
};
void main() {
ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy);
ivec2 tex_coord = src_offset + dst_coord;
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uint value = stencil_val | (depth_val << 8);
imageStore(color, dst_coord, uvec4(value));
}
)";
compute_shader =
Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High);
const std::array compute_layout_bindings = {
vk::DescriptorSetLayoutBinding{
.binding = 0,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
vk::DescriptorSetLayoutBinding{
.binding = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
vk::DescriptorSetLayoutBinding{
.binding = 2,
.descriptorType = vk::DescriptorType::eStorageImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute,
},
};
template <u32 binding, vk::DescriptorType type, vk::ShaderStageFlagBits stage>
inline constexpr vk::DescriptorSetLayoutBinding TEXTURE_DESC_LAYOUT{
.binding = binding,
.descriptorType = type,
.descriptorCount = 1,
.stageFlags = stage,
};
template <u32 binding, vk::DescriptorType type>
inline constexpr vk::DescriptorUpdateTemplateEntry TEXTURE_TEMPLATE{
.dstBinding = binding,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = type,
.offset = sizeof(vk::DescriptorImageInfo),
.stride = 0,
};
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
.bindingCount = static_cast<u32>(compute_layout_bindings.size()),
.pBindings = compute_layout_bindings.data(),
};
constexpr std::array COMPUTE_DESCRIPTOR_SET_BINDINGS = {
TEXTURE_DESC_LAYOUT<0, vk::DescriptorType::eSampledImage, vk::ShaderStageFlagBits::eCompute>,
TEXTURE_DESC_LAYOUT<1, vk::DescriptorType::eSampledImage, vk::ShaderStageFlagBits::eCompute>,
TEXTURE_DESC_LAYOUT<2, vk::DescriptorType::eStorageImage, vk::ShaderStageFlagBits::eCompute>,
};
constexpr vk::DescriptorSetLayoutCreateInfo COMPUTE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.bindingCount = static_cast<u32>(COMPUTE_DESCRIPTOR_SET_BINDINGS.size()),
.pBindings = COMPUTE_DESCRIPTOR_SET_BINDINGS.data(),
};
const std::array COMPUTE_UPDATE_TEMPLATES = {
TEXTURE_TEMPLATE<0, vk::DescriptorType::eSampledImage>,
TEXTURE_TEMPLATE<1, vk::DescriptorType::eSampledImage>,
TEXTURE_TEMPLATE<2, vk::DescriptorType::eStorageImage>,
};
inline constexpr vk::PushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(Common::Vec2i),
};
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
TEXTURE_DESC_LAYOUT<0, vk::DescriptorType::eCombinedImageSampler,
vk::ShaderStageFlagBits::eFragment>,
TEXTURE_DESC_LAYOUT<1, vk::DescriptorType::eCombinedImageSampler,
vk::ShaderStageFlagBits::eFragment>,
};
constexpr vk::DescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
.bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
.pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
};
const std::array TWO_TEXTURES_UPDATE_TEMPLATES = {
TEXTURE_TEMPLATE<0, vk::DescriptorType::eCombinedImageSampler>,
TEXTURE_TEMPLATE<1, vk::DescriptorType::eCombinedImageSampler>,
};
const std::array update_template_entries = {
vk::DescriptorUpdateTemplateEntry{
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = 0,
.stride = sizeof(vk::DescriptorImageInfo),
},
vk::DescriptorUpdateTemplateEntry{
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = sizeof(vk::DescriptorImageInfo),
.stride = 0,
},
vk::DescriptorUpdateTemplateEntry{
.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage,
.offset = 2 * sizeof(vk::DescriptorImageInfo),
.stride = 0,
},
};
inline constexpr vk::PushConstantRange PUSH_CONSTANT_RANGE{
.stageFlags = vk::ShaderStageFlagBits::eVertex,
.offset = 0,
.size = sizeof(PushConstants),
};
constexpr vk::PipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
.vertexBindingDescriptionCount = 0,
.pVertexBindingDescriptions = nullptr,
.vertexAttributeDescriptionCount = 0,
.pVertexAttributeDescriptions = nullptr,
};
constexpr vk::PipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
.topology = vk::PrimitiveTopology::eTriangleList,
.primitiveRestartEnable = VK_FALSE,
};
constexpr vk::PipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
.viewportCount = 1,
.pViewports = nullptr,
.scissorCount = 1,
.pScissors = nullptr,
};
constexpr vk::PipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
.depthClampEnable = VK_FALSE,
.rasterizerDiscardEnable = VK_FALSE,
.polygonMode = vk::PolygonMode::eFill,
.cullMode = vk::CullModeFlagBits::eBack,
.frontFace = vk::FrontFace::eClockwise,
.depthBiasEnable = VK_FALSE,
.depthBiasConstantFactor = 0.0f,
.depthBiasClamp = 0.0f,
.depthBiasSlopeFactor = 0.0f,
.lineWidth = 1.0f,
};
constexpr vk::PipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
.rasterizationSamples = vk::SampleCountFlagBits::e1,
.sampleShadingEnable = VK_FALSE,
.minSampleShading = 0.0f,
.pSampleMask = nullptr,
.alphaToCoverageEnable = VK_FALSE,
.alphaToOneEnable = VK_FALSE,
};
constexpr std::array DYNAMIC_STATES{
vk::DynamicState::eViewport,
vk::DynamicState::eScissor,
};
constexpr vk::PipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
.dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
.pDynamicStates = DYNAMIC_STATES.data(),
};
constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
.logicOpEnable = VK_FALSE,
.logicOp = vk::LogicOp::eClear,
.attachmentCount = 0,
.pAttachments = nullptr,
.blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr vk::PipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
.blendEnable = VK_FALSE,
.srcColorBlendFactor = vk::BlendFactor::eZero,
.dstColorBlendFactor = vk::BlendFactor::eZero,
.colorBlendOp = vk::BlendOp::eAdd,
.srcAlphaBlendFactor = vk::BlendFactor::eZero,
.dstAlphaBlendFactor = vk::BlendFactor::eZero,
.alphaBlendOp = vk::BlendOp::eAdd,
.colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA,
};
constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
.logicOpEnable = VK_FALSE,
.logicOp = vk::LogicOp::eClear,
.attachmentCount = 1,
.pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
.blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f},
};
constexpr vk::PipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
.depthTestEnable = VK_TRUE,
.depthWriteEnable = VK_TRUE,
.depthCompareOp = vk::CompareOp::eAlways,
.depthBoundsTestEnable = VK_FALSE,
.stencilTestEnable = VK_FALSE,
.front = vk::StencilOpState{},
.back = vk::StencilOpState{},
.minDepthBounds = 0.0f,
.maxDepthBounds = 0.0f,
};
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
.pDescriptorUpdateEntries = update_template_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_layout,
};
template <vk::Filter filter>
inline constexpr vk::SamplerCreateInfo SAMPLER_CREATE_INFO{
.magFilter = filter,
.minFilter = filter,
.mipmapMode = vk::SamplerMipmapMode::eNearest,
.addressModeU = vk::SamplerAddressMode::eClampToBorder,
.addressModeV = vk::SamplerAddressMode::eClampToBorder,
.addressModeW = vk::SamplerAddressMode::eClampToBorder,
.mipLodBias = 0.0f,
.anisotropyEnable = VK_FALSE,
.maxAnisotropy = 0.0f,
.compareEnable = VK_FALSE,
.compareOp = vk::CompareOp::eNever,
.minLod = 0.0f,
.maxLod = 0.0f,
.borderColor = vk::BorderColor::eFloatOpaqueWhite,
.unnormalizedCoordinates = VK_TRUE,
};
update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(Common::Vec2i),
};
const vk::PipelineLayoutCreateInfo layout_info = {
constexpr vk::PipelineLayoutCreateInfo PipelineLayoutCreateInfo(
const vk::DescriptorSetLayout* set_layout, bool compute = false) {
return vk::PipelineLayoutCreateInfo{
.setLayoutCount = 1,
.pSetLayouts = &descriptor_layout,
.pSetLayouts = set_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range,
.pPushConstantRanges = (compute ? &COMPUTE_PUSH_CONSTANT_RANGE : &PUSH_CONSTANT_RANGE),
};
}
compute_pipeline_layout = device.createPipelineLayout(layout_info);
constexpr vk::DescriptorUpdateTemplateCreateInfo DescriptorUpdateTemplateCreateInfo(
std::span<const vk::DescriptorUpdateTemplateEntry> entries, vk::DescriptorSetLayout layout) {
return vk::DescriptorUpdateTemplateCreateInfo{
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
.pDescriptorUpdateEntries = entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = layout,
};
}
const vk::PipelineShaderStageCreateInfo compute_stage = {
constexpr std::array<vk::PipelineShaderStageCreateInfo, 2> MakeStages(
vk::ShaderModule vertex_shader, vk::ShaderModule fragment_shader) {
return std::array{
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eVertex,
.module = vertex_shader,
.pName = "main",
},
vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eFragment,
.module = fragment_shader,
.pName = "main",
},
};
}
constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_shader) {
return vk::PipelineShaderStageCreateInfo{
.stage = vk::ShaderStageFlagBits::eCompute,
.module = compute_shader,
.pName = "main",
};
}
const vk::ComputePipelineCreateInfo compute_info = {
.stage = compute_stage,
.layout = compute_pipeline_layout,
};
} // Anonymous namespace
if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) {
compute_pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
UNREACHABLE();
}
BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_,
DescriptorManager& desc_manager_, RenderpassCache& renderpass_cache_)
: instance{instance_}, scheduler{scheduler_}, desc_manager{desc_manager_},
renderpass_cache{renderpass_cache_}, device{instance.GetDevice()},
compute_descriptor_layout{
device.createDescriptorSetLayout(COMPUTE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)},
two_textures_descriptor_layout{
device.createDescriptorSetLayout(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)},
compute_update_template{device.createDescriptorUpdateTemplate(
DescriptorUpdateTemplateCreateInfo(COMPUTE_UPDATE_TEMPLATES, compute_descriptor_layout))},
two_textures_update_template{
device.createDescriptorUpdateTemplate(DescriptorUpdateTemplateCreateInfo(
TWO_TEXTURES_UPDATE_TEMPLATES, two_textures_descriptor_layout))},
compute_pipeline_layout{
device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_descriptor_layout, true))},
two_textures_pipeline_layout{
device.createPipelineLayout(PipelineLayoutCreateInfo(&two_textures_descriptor_layout))},
full_screen_vert{CompileSPV(FULL_SCREEN_TRIANGLE_VERT_SPV, device)},
copy_d24s8_to_r32_comp{CompileSPV(VULKAN_D32S8_TO_R32_COMP_SPV, device)},
blit_depth_stencil_frag{CompileSPV(VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV, device)},
depth_blit_pipeline{MakeDepthStencilBlitPipeline()},
linear_sampler{device.createSampler(SAMPLER_CREATE_INFO<vk::Filter::eLinear>)},
nearest_sampler{device.createSampler(SAMPLER_CREATE_INFO<vk::Filter::eNearest>)} {
MakeComputePipelines();
}
BlitHelper::~BlitHelper() {
device.destroyPipeline(compute_pipeline);
device.destroyPipelineLayout(compute_pipeline_layout);
device.destroyDescriptorUpdateTemplate(update_template);
device.destroyDescriptorSetLayout(descriptor_layout);
device.destroyShaderModule(compute_shader);
device.destroyPipelineLayout(two_textures_pipeline_layout);
device.destroyDescriptorUpdateTemplate(compute_update_template);
device.destroyDescriptorUpdateTemplate(two_textures_update_template);
device.destroyDescriptorSetLayout(compute_descriptor_layout);
device.destroyDescriptorSetLayout(two_textures_descriptor_layout);
device.destroyShaderModule(full_screen_vert);
device.destroyShaderModule(copy_d24s8_to_r32_comp);
device.destroyShaderModule(blit_depth_stencil_frag);
device.destroyPipeline(copy_d24s8_to_r32_pipeline);
device.destroyPipeline(depth_blit_pipeline);
device.destroySampler(linear_sampler);
device.destroySampler(nearest_sampler);
}
void BindBlitState(vk::CommandBuffer cmdbuf, vk::PipelineLayout layout,
const VideoCore::TextureBlit& blit) {
const vk::Offset2D offset{
.x = std::min<s32>(blit.dst_rect.left, blit.dst_rect.right),
.y = std::min<s32>(blit.dst_rect.bottom, blit.dst_rect.top),
};
const vk::Extent2D extent{
.width = blit.dst_rect.GetWidth(),
.height = blit.dst_rect.GetHeight(),
};
const vk::Viewport viewport{
.x = static_cast<float>(offset.x),
.y = static_cast<float>(offset.y),
.width = static_cast<float>(extent.width),
.height = static_cast<float>(extent.height),
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
const vk::Rect2D scissor{
.offset = offset,
.extent = extent,
};
const float scale_x = static_cast<float>(blit.src_rect.GetWidth());
const float scale_y = static_cast<float>(blit.src_rect.GetHeight());
const PushConstants push_constants{
.tex_scale = {scale_x, scale_y},
.tex_offset = {static_cast<float>(blit.src_rect.left),
static_cast<float>(blit.src_rect.bottom)},
};
cmdbuf.setViewport(0, viewport);
cmdbuf.setScissor(0, scissor);
cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eVertex, 0, sizeof(push_constants),
&push_constants);
}
bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
if (!instance.IsShaderStencilExportSupported()) {
LOG_ERROR(Render_Vulkan, "Unable to emulate depth stencil images");
return false;
}
const vk::Rect2D dst_render_area = {
.offset = {0, 0},
.extent = {dest.GetScaledWidth(), dest.GetScaledHeight()},
};
const std::array textures = {
vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.GetDepthView(),
.imageLayout = vk::ImageLayout::eGeneral,
},
vk::DescriptorImageInfo{
.sampler = nearest_sampler,
.imageView = source.GetStencilView(),
.imageLayout = vk::ImageLayout::eGeneral,
},
};
vk::DescriptorSet set = desc_manager.AllocateSet(two_textures_descriptor_layout);
device.updateDescriptorSetWithTemplate(set, two_textures_update_template, textures[0]);
renderpass_cache.EnterRenderpass(nullptr, &dest, dst_render_area);
scheduler.Record([blit, set, this](vk::CommandBuffer cmdbuf) {
const vk::PipelineLayout layout = two_textures_pipeline_layout;
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_blit_pipeline);
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, set, {});
BindBlitState(cmdbuf, layout, blit);
cmdbuf.draw(3, 1, 0, 0);
});
scheduler.MakeDirty(StateFlags::Pipeline);
return true;
}
void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
@ -164,8 +361,8 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
},
};
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
vk::DescriptorSet set = desc_manager.AllocateSet(compute_descriptor_layout);
device.updateDescriptorSetWithTemplate(set, compute_update_template, textures[0]);
renderpass_cache.ExitRenderpass();
scheduler.Record([this, set, blit, src_image = source.alloc.image,
@ -247,7 +444,7 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set,
{});
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, copy_d24s8_to_r32_pipeline);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
@ -263,4 +460,67 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
});
}
void BlitHelper::MakeComputePipelines() {
const vk::ComputePipelineCreateInfo compute_info = {
.stage = MakeStages(copy_d24s8_to_r32_comp),
.layout = compute_pipeline_layout,
};
if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) {
copy_d24s8_to_r32_pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "D24S8->R32 compute pipeline creation failed!");
UNREACHABLE();
}
}
vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() {
const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag);
const VideoCore::PixelFormat depth_stencil = VideoCore::PixelFormat::D24S8;
const vk::Format depth_stencil_format = instance.GetTraits(depth_stencil).native;
vk::GraphicsPipelineCreateInfo depth_stencil_info = {
.stageCount = static_cast<u32>(stages.size()),
.pStages = stages.data(),
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pTessellationState = nullptr,
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.layout = two_textures_pipeline_layout,
};
if (!instance.IsDynamicRenderingSupported()) {
depth_stencil_info.renderPass =
renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, depth_stencil, false);
}
vk::StructureChain depth_blit_chain = {
depth_stencil_info,
vk::PipelineRenderingCreateInfoKHR{
.colorAttachmentCount = 0,
.pColorAttachmentFormats = nullptr,
.depthAttachmentFormat = depth_stencil_format,
.stencilAttachmentFormat = depth_stencil_format,
},
};
if (!instance.IsDynamicRenderingSupported()) {
depth_blit_chain.unlink<vk::PipelineRenderingCreateInfoKHR>();
}
if (const auto result = device.createGraphicsPipeline({}, depth_blit_chain.get());
result.result == vk::Result::eSuccess) {
return result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "Depth stencil blit pipeline creation failed!");
UNREACHABLE();
}
return VK_NULL_HANDLE;
}
} // namespace Vulkan

View File

@ -4,6 +4,7 @@
#pragma once
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace VideoCore {
@ -24,20 +25,44 @@ public:
RenderpassCache& renderpass_cache);
~BlitHelper();
/// Blits depth-stencil textures using helper shader
bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
/// Blits D24S8 pixel data to the provided buffer
void BlitD24S8ToR32(Surface& depth_surface, Surface& r32_surface,
const VideoCore::TextureBlit& blit);
private:
/// Creates compute pipelines used for blit
void MakeComputePipelines();
/// Creates graphics pipelines used for blit
vk::Pipeline MakeDepthStencilBlitPipeline();
private:
const Instance& instance;
Scheduler& scheduler;
DescriptorManager& desc_manager;
RenderpassCache& renderpass_cache;
vk::Device device;
vk::Pipeline compute_pipeline;
vk::RenderPass r32_renderpass;
vk::DescriptorSetLayout compute_descriptor_layout;
vk::DescriptorSetLayout two_textures_descriptor_layout;
vk::DescriptorUpdateTemplate compute_update_template;
vk::DescriptorUpdateTemplate two_textures_update_template;
vk::PipelineLayout compute_pipeline_layout;
vk::DescriptorSetLayout descriptor_layout;
vk::DescriptorUpdateTemplate update_template;
vk::ShaderModule compute_shader;
vk::PipelineLayout two_textures_pipeline_layout;
vk::ShaderModule full_screen_vert;
vk::ShaderModule copy_d24s8_to_r32_comp;
vk::ShaderModule blit_depth_stencil_frag;
vk::Pipeline copy_d24s8_to_r32_pipeline;
vk::Pipeline depth_blit_pipeline;
vk::Sampler linear_sampler;
vk::Sampler nearest_sampler;
};
} // namespace Vulkan

View File

@ -568,6 +568,7 @@ bool Instance::CreateDevice() {
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
image_format_list = AddExtension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
pipeline_creation_feedback = AddExtension(VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME);
shader_stencil_export = AddExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME);
bool has_portability_subset = AddExtension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME);
bool has_dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME);
bool has_extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);

View File

@ -183,6 +183,11 @@ public:
return pipeline_creation_feedback;
}
/// Returns true when VK_EXT_shader_stencil_export is supported
bool IsShaderStencilExportSupported() const {
return shader_stencil_export;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
@ -316,6 +321,7 @@ private:
bool image_format_list{};
bool pipeline_creation_cache_control{};
bool pipeline_creation_feedback{};
bool shader_stencil_export{};
bool enable_validation{};
bool dump_command_buffers{};
};

View File

@ -57,8 +57,8 @@ public:
void CreatePresentRenderpass(vk::Format format);
/// Returns the renderpass associated with the color-depth format pair
[[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color,
VideoCore::PixelFormat depth, bool is_clear);
vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth,
bool is_clear);
/// Returns the swapchain clear renderpass
[[nodiscard]] vk::RenderPass GetPresentRenderpass() const {

View File

@ -69,6 +69,11 @@ public:
state |= flag;
}
/// Marks the provided state as dirty
void MakeDirty(StateFlags flag) noexcept {
state &= ~flag;
}
/// Returns true if the state is dirty
[[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept {
return False(state & flag);

View File

@ -645,6 +645,12 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
const bool is_depth_stencil = source.type == VideoCore::SurfaceType::DepthStencil;
const auto& depth_traits = instance.GetTraits(source.pixel_format);
if (is_depth_stencil && !depth_traits.blit_support) {
return blit_helper.BlitDepthStencil(source, dest, blit);
}
renderpass_cache.ExitRenderpass();
const RecordParams params = {
@ -826,11 +832,6 @@ MICROPROFILE_DEFINE(Vulkan_Upload, "Vulkan", "Texture Upload", MP_RGB(128, 192,
void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
MICROPROFILE_SCOPE(Vulkan_Upload);
if (type == VideoCore::SurfaceType::DepthStencil && !traits.blit_support) {
LOG_ERROR(Render_Vulkan, "Depth blit unsupported by hardware, ignoring");
return;
}
runtime.renderpass_cache.ExitRenderpass();
const bool is_scaled = res_scale != 1;