diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 83e811c9c..100d9ee24 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -12,14 +12,17 @@ set(SHADER_FILES texture_filtering/xbrz_freescale.vert texture_filtering/x_gradient.frag texture_filtering/y_gradient.frag + full_screen_triangle.vert opengl_present.frag opengl_present.vert opengl_present_anaglyph.frag opengl_present_interlaced.frag + vulkan_d32s8_to_r32.comp vulkan_present.frag vulkan_present.vert vulkan_present_anaglyph.frag vulkan_present_interlaced.frag + vulkan_blit_depth_stencil.frag ) find_program(GLSLANGVALIDATOR "glslangValidator") diff --git a/src/video_core/host_shaders/full_screen_triangle.vert b/src/video_core/host_shaders/full_screen_triangle.vert new file mode 100644 index 000000000..f4ace615f --- /dev/null +++ b/src/video_core/host_shaders/full_screen_triangle.vert @@ -0,0 +1,29 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core + +#ifdef VULKAN +#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants { +#define END_PUSH_CONSTANTS }; +#define UNIFORM(n) +#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv +#define BEGIN_PUSH_CONSTANTS +#define END_PUSH_CONSTANTS +#define UNIFORM(n) layout (location = n) uniform +#endif + +BEGIN_PUSH_CONSTANTS +UNIFORM(0) vec2 tex_scale; +UNIFORM(1) vec2 tex_offset; +END_PUSH_CONSTANTS + +layout(location = 0) out vec2 texcoord; + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset); +} diff --git a/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag new file mode 100644 index 000000000..eb327361c --- /dev/null +++ b/src/video_core/host_shaders/vulkan_blit_depth_stencil.frag @@ -0,0 +1,16 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 core +#extension GL_ARB_shader_stencil_export : require + +layout(binding = 0) uniform sampler2D depth_tex; +layout(binding = 1) uniform usampler2D stencil_tex; + +layout(location = 0) in vec2 texcoord; + +void main() { + gl_FragDepth = textureLod(depth_tex, texcoord, 0).r; + gl_FragStencilRefARB = int(textureLod(stencil_tex, texcoord, 0).r); +} diff --git a/src/video_core/host_shaders/vulkan_d32s8_to_r32.comp b/src/video_core/host_shaders/vulkan_d32s8_to_r32.comp new file mode 100644 index 000000000..e08a44737 --- /dev/null +++ b/src/video_core/host_shaders/vulkan_d32s8_to_r32.comp @@ -0,0 +1,24 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 450 core +#extension GL_EXT_samplerless_texture_functions : require + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +layout(binding = 0) uniform highp texture2D depth; +layout(binding = 1) uniform lowp utexture2D stencil; +layout(binding = 2, r32ui) uniform highp writeonly uimage2D color; + +layout(push_constant, std140) uniform ComputeInfo { + mediump ivec2 src_offset; +}; + +void main() { + ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy); + ivec2 tex_coord = src_offset + dst_coord; + highp uint depth_val = uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + highp uint value = stencil_val | (depth_val << 8); + imageStore(color, dst_coord, uvec4(value)); +} diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.cpp b/src/video_core/renderer_vulkan/vk_blit_helper.cpp index 087882fa9..6e8033ed3 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_helper.cpp @@ -11,140 +11,337 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" +#include "video_core/host_shaders/full_screen_triangle_vert_spv.h" +#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" +#include "video_core/host_shaders/vulkan_d32s8_to_r32_comp_spv.h" + namespace Vulkan { -BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, - DescriptorManager& desc_manager, RenderpassCache& renderpass_cache) - : scheduler{scheduler}, desc_manager{desc_manager}, - renderpass_cache{renderpass_cache}, device{instance.GetDevice()} { - constexpr std::string_view cs_source = R"( -#version 450 core -#extension GL_EXT_samplerless_texture_functions : require -layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; -layout(set = 0, binding = 0) uniform highp texture2D depth; -layout(set = 0, binding = 1) uniform lowp utexture2D stencil; -layout(set = 0, binding = 2, r32ui) uniform highp writeonly uimage2D color; -layout(push_constant, std140) uniform ComputeInfo { -mediump ivec2 src_offset; +namespace { +struct PushConstants { + std::array tex_scale; + std::array tex_offset; }; -void main() { -ivec2 dst_coord = ivec2(gl_GlobalInvocationID.xy); -ivec2 tex_coord = src_offset + dst_coord; -highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(24.0) - 1.0)); -lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; -highp uint value = stencil_val | (depth_val << 8); -imageStore(color, dst_coord, uvec4(value)); -} -)"; - compute_shader = - Compile(cs_source, vk::ShaderStageFlagBits::eCompute, device, ShaderOptimization::High); - const std::array compute_layout_bindings = { - vk::DescriptorSetLayoutBinding{ - .binding = 0, - .descriptorType = vk::DescriptorType::eSampledImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute, - }, - vk::DescriptorSetLayoutBinding{ - .binding = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute, - }, - vk::DescriptorSetLayoutBinding{ - .binding = 2, - .descriptorType = vk::DescriptorType::eStorageImage, - .descriptorCount = 1, - .stageFlags = vk::ShaderStageFlagBits::eCompute, - }, - }; +template +inline constexpr vk::DescriptorSetLayoutBinding TEXTURE_DESC_LAYOUT{ + .binding = binding, + .descriptorType = type, + .descriptorCount = 1, + .stageFlags = stage, +}; +template +inline constexpr vk::DescriptorUpdateTemplateEntry TEXTURE_TEMPLATE{ + .dstBinding = binding, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = type, + .offset = sizeof(vk::DescriptorImageInfo), + .stride = 0, +}; - const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { - .bindingCount = static_cast(compute_layout_bindings.size()), - .pBindings = compute_layout_bindings.data(), - }; +constexpr std::array COMPUTE_DESCRIPTOR_SET_BINDINGS = { + TEXTURE_DESC_LAYOUT<0, vk::DescriptorType::eSampledImage, vk::ShaderStageFlagBits::eCompute>, + TEXTURE_DESC_LAYOUT<1, vk::DescriptorType::eSampledImage, vk::ShaderStageFlagBits::eCompute>, + TEXTURE_DESC_LAYOUT<2, vk::DescriptorType::eStorageImage, vk::ShaderStageFlagBits::eCompute>, +}; +constexpr vk::DescriptorSetLayoutCreateInfo COMPUTE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .bindingCount = static_cast(COMPUTE_DESCRIPTOR_SET_BINDINGS.size()), + .pBindings = COMPUTE_DESCRIPTOR_SET_BINDINGS.data(), +}; +const std::array COMPUTE_UPDATE_TEMPLATES = { + TEXTURE_TEMPLATE<0, vk::DescriptorType::eSampledImage>, + TEXTURE_TEMPLATE<1, vk::DescriptorType::eSampledImage>, + TEXTURE_TEMPLATE<2, vk::DescriptorType::eStorageImage>, +}; +inline constexpr vk::PushConstantRange COMPUTE_PUSH_CONSTANT_RANGE{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(Common::Vec2i), +}; - descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); +constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{ + TEXTURE_DESC_LAYOUT<0, vk::DescriptorType::eCombinedImageSampler, + vk::ShaderStageFlagBits::eFragment>, + TEXTURE_DESC_LAYOUT<1, vk::DescriptorType::eCombinedImageSampler, + vk::ShaderStageFlagBits::eFragment>, +}; +constexpr vk::DescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{ + .bindingCount = static_cast(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()), + .pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(), +}; +const std::array TWO_TEXTURES_UPDATE_TEMPLATES = { + TEXTURE_TEMPLATE<0, vk::DescriptorType::eCombinedImageSampler>, + TEXTURE_TEMPLATE<1, vk::DescriptorType::eCombinedImageSampler>, +}; - const std::array update_template_entries = { - vk::DescriptorUpdateTemplateEntry{ - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .offset = 0, - .stride = sizeof(vk::DescriptorImageInfo), - }, - vk::DescriptorUpdateTemplateEntry{ - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eSampledImage, - .offset = sizeof(vk::DescriptorImageInfo), - .stride = 0, - }, - vk::DescriptorUpdateTemplateEntry{ - .dstBinding = 2, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = vk::DescriptorType::eStorageImage, - .offset = 2 * sizeof(vk::DescriptorImageInfo), - .stride = 0, - }, - }; +inline constexpr vk::PushConstantRange PUSH_CONSTANT_RANGE{ + .stageFlags = vk::ShaderStageFlagBits::eVertex, + .offset = 0, + .size = sizeof(PushConstants), +}; +constexpr vk::PipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{ + .vertexBindingDescriptionCount = 0, + .pVertexBindingDescriptions = nullptr, + .vertexAttributeDescriptionCount = 0, + .pVertexAttributeDescriptions = nullptr, +}; +constexpr vk::PipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{ + .topology = vk::PrimitiveTopology::eTriangleList, + .primitiveRestartEnable = VK_FALSE, +}; +constexpr vk::PipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{ + .viewportCount = 1, + .pViewports = nullptr, + .scissorCount = 1, + .pScissors = nullptr, +}; +constexpr vk::PipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{ + .depthClampEnable = VK_FALSE, + .rasterizerDiscardEnable = VK_FALSE, + .polygonMode = vk::PolygonMode::eFill, + .cullMode = vk::CullModeFlagBits::eBack, + .frontFace = vk::FrontFace::eClockwise, + .depthBiasEnable = VK_FALSE, + .depthBiasConstantFactor = 0.0f, + .depthBiasClamp = 0.0f, + .depthBiasSlopeFactor = 0.0f, + .lineWidth = 1.0f, +}; +constexpr vk::PipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{ + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = VK_FALSE, + .minSampleShading = 0.0f, + .pSampleMask = nullptr, + .alphaToCoverageEnable = VK_FALSE, + .alphaToOneEnable = VK_FALSE, +}; +constexpr std::array DYNAMIC_STATES{ + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, +}; +constexpr vk::PipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{ + .dynamicStateCount = static_cast(DYNAMIC_STATES.size()), + .pDynamicStates = DYNAMIC_STATES.data(), +}; +constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{ + .logicOpEnable = VK_FALSE, + .logicOp = vk::LogicOp::eClear, + .attachmentCount = 0, + .pAttachments = nullptr, + .blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr vk::PipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = vk::BlendFactor::eZero, + .dstColorBlendFactor = vk::BlendFactor::eZero, + .colorBlendOp = vk::BlendOp::eAdd, + .srcAlphaBlendFactor = vk::BlendFactor::eZero, + .dstAlphaBlendFactor = vk::BlendFactor::eZero, + .alphaBlendOp = vk::BlendOp::eAdd, + .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, +}; +constexpr vk::PipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{ + .logicOpEnable = VK_FALSE, + .logicOp = vk::LogicOp::eClear, + .attachmentCount = 1, + .pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE, + .blendConstants = std::array{0.0f, 0.0f, 0.0f, 0.0f}, +}; +constexpr vk::PipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{ + .depthTestEnable = VK_TRUE, + .depthWriteEnable = VK_TRUE, + .depthCompareOp = vk::CompareOp::eAlways, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_FALSE, + .front = vk::StencilOpState{}, + .back = vk::StencilOpState{}, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, +}; - const vk::DescriptorUpdateTemplateCreateInfo template_info = { - .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), - .pDescriptorUpdateEntries = update_template_entries.data(), - .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, - .descriptorSetLayout = descriptor_layout, - }; +template +inline constexpr vk::SamplerCreateInfo SAMPLER_CREATE_INFO{ + .magFilter = filter, + .minFilter = filter, + .mipmapMode = vk::SamplerMipmapMode::eNearest, + .addressModeU = vk::SamplerAddressMode::eClampToBorder, + .addressModeV = vk::SamplerAddressMode::eClampToBorder, + .addressModeW = vk::SamplerAddressMode::eClampToBorder, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = vk::CompareOp::eNever, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = vk::BorderColor::eFloatOpaqueWhite, + .unnormalizedCoordinates = VK_TRUE, +}; - update_template = device.createDescriptorUpdateTemplate(template_info); - - const vk::PushConstantRange push_range = { - .stageFlags = vk::ShaderStageFlagBits::eCompute, - .offset = 0, - .size = sizeof(Common::Vec2i), - }; - - const vk::PipelineLayoutCreateInfo layout_info = { +constexpr vk::PipelineLayoutCreateInfo PipelineLayoutCreateInfo( + const vk::DescriptorSetLayout* set_layout, bool compute = false) { + return vk::PipelineLayoutCreateInfo{ .setLayoutCount = 1, - .pSetLayouts = &descriptor_layout, + .pSetLayouts = set_layout, .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_range, + .pPushConstantRanges = (compute ? &COMPUTE_PUSH_CONSTANT_RANGE : &PUSH_CONSTANT_RANGE), }; +} - compute_pipeline_layout = device.createPipelineLayout(layout_info); +constexpr vk::DescriptorUpdateTemplateCreateInfo DescriptorUpdateTemplateCreateInfo( + std::span entries, vk::DescriptorSetLayout layout) { + return vk::DescriptorUpdateTemplateCreateInfo{ + .descriptorUpdateEntryCount = static_cast(entries.size()), + .pDescriptorUpdateEntries = entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = layout, + }; +} - const vk::PipelineShaderStageCreateInfo compute_stage = { +constexpr std::array MakeStages( + vk::ShaderModule vertex_shader, vk::ShaderModule fragment_shader) { + return std::array{ + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eVertex, + .module = vertex_shader, + .pName = "main", + }, + vk::PipelineShaderStageCreateInfo{ + .stage = vk::ShaderStageFlagBits::eFragment, + .module = fragment_shader, + .pName = "main", + }, + }; +} + +constexpr vk::PipelineShaderStageCreateInfo MakeStages(vk::ShaderModule compute_shader) { + return vk::PipelineShaderStageCreateInfo{ .stage = vk::ShaderStageFlagBits::eCompute, .module = compute_shader, .pName = "main", }; +} - const vk::ComputePipelineCreateInfo compute_info = { - .stage = compute_stage, - .layout = compute_pipeline_layout, - }; +} // Anonymous namespace - if (const auto result = device.createComputePipeline({}, compute_info); - result.result == vk::Result::eSuccess) { - compute_pipeline = result.value; - } else { - LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); - UNREACHABLE(); - } +BlitHelper::BlitHelper(const Instance& instance_, Scheduler& scheduler_, + DescriptorManager& desc_manager_, RenderpassCache& renderpass_cache_) + : instance{instance_}, scheduler{scheduler_}, desc_manager{desc_manager_}, + renderpass_cache{renderpass_cache_}, device{instance.GetDevice()}, + compute_descriptor_layout{ + device.createDescriptorSetLayout(COMPUTE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)}, + two_textures_descriptor_layout{ + device.createDescriptorSetLayout(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)}, + compute_update_template{device.createDescriptorUpdateTemplate( + DescriptorUpdateTemplateCreateInfo(COMPUTE_UPDATE_TEMPLATES, compute_descriptor_layout))}, + two_textures_update_template{ + device.createDescriptorUpdateTemplate(DescriptorUpdateTemplateCreateInfo( + TWO_TEXTURES_UPDATE_TEMPLATES, two_textures_descriptor_layout))}, + compute_pipeline_layout{ + device.createPipelineLayout(PipelineLayoutCreateInfo(&compute_descriptor_layout, true))}, + two_textures_pipeline_layout{ + device.createPipelineLayout(PipelineLayoutCreateInfo(&two_textures_descriptor_layout))}, + full_screen_vert{CompileSPV(FULL_SCREEN_TRIANGLE_VERT_SPV, device)}, + copy_d24s8_to_r32_comp{CompileSPV(VULKAN_D32S8_TO_R32_COMP_SPV, device)}, + blit_depth_stencil_frag{CompileSPV(VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV, device)}, + depth_blit_pipeline{MakeDepthStencilBlitPipeline()}, + linear_sampler{device.createSampler(SAMPLER_CREATE_INFO)}, + nearest_sampler{device.createSampler(SAMPLER_CREATE_INFO)} { + MakeComputePipelines(); } BlitHelper::~BlitHelper() { - device.destroyPipeline(compute_pipeline); device.destroyPipelineLayout(compute_pipeline_layout); - device.destroyDescriptorUpdateTemplate(update_template); - device.destroyDescriptorSetLayout(descriptor_layout); - device.destroyShaderModule(compute_shader); + device.destroyPipelineLayout(two_textures_pipeline_layout); + device.destroyDescriptorUpdateTemplate(compute_update_template); + device.destroyDescriptorUpdateTemplate(two_textures_update_template); + device.destroyDescriptorSetLayout(compute_descriptor_layout); + device.destroyDescriptorSetLayout(two_textures_descriptor_layout); + device.destroyShaderModule(full_screen_vert); + device.destroyShaderModule(copy_d24s8_to_r32_comp); + device.destroyShaderModule(blit_depth_stencil_frag); + device.destroyPipeline(copy_d24s8_to_r32_pipeline); + device.destroyPipeline(depth_blit_pipeline); + device.destroySampler(linear_sampler); + device.destroySampler(nearest_sampler); +} + +void BindBlitState(vk::CommandBuffer cmdbuf, vk::PipelineLayout layout, + const VideoCore::TextureBlit& blit) { + const vk::Offset2D offset{ + .x = std::min(blit.dst_rect.left, blit.dst_rect.right), + .y = std::min(blit.dst_rect.bottom, blit.dst_rect.top), + }; + const vk::Extent2D extent{ + .width = blit.dst_rect.GetWidth(), + .height = blit.dst_rect.GetHeight(), + }; + const vk::Viewport viewport{ + .x = static_cast(offset.x), + .y = static_cast(offset.y), + .width = static_cast(extent.width), + .height = static_cast(extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const vk::Rect2D scissor{ + .offset = offset, + .extent = extent, + }; + const float scale_x = static_cast(blit.src_rect.GetWidth()); + const float scale_y = static_cast(blit.src_rect.GetHeight()); + const PushConstants push_constants{ + .tex_scale = {scale_x, scale_y}, + .tex_offset = {static_cast(blit.src_rect.left), + static_cast(blit.src_rect.bottom)}, + }; + cmdbuf.setViewport(0, viewport); + cmdbuf.setScissor(0, scissor); + cmdbuf.pushConstants(layout, vk::ShaderStageFlagBits::eVertex, 0, sizeof(push_constants), + &push_constants); +} + +bool BlitHelper::BlitDepthStencil(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + if (!instance.IsShaderStencilExportSupported()) { + LOG_ERROR(Render_Vulkan, "Unable to emulate depth stencil images"); + return false; + } + + const vk::Rect2D dst_render_area = { + .offset = {0, 0}, + .extent = {dest.GetScaledWidth(), dest.GetScaledHeight()}, + }; + + const std::array textures = { + vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.GetDepthView(), + .imageLayout = vk::ImageLayout::eGeneral, + }, + vk::DescriptorImageInfo{ + .sampler = nearest_sampler, + .imageView = source.GetStencilView(), + .imageLayout = vk::ImageLayout::eGeneral, + }, + }; + + vk::DescriptorSet set = desc_manager.AllocateSet(two_textures_descriptor_layout); + device.updateDescriptorSetWithTemplate(set, two_textures_update_template, textures[0]); + + renderpass_cache.EnterRenderpass(nullptr, &dest, dst_render_area); + scheduler.Record([blit, set, this](vk::CommandBuffer cmdbuf) { + const vk::PipelineLayout layout = two_textures_pipeline_layout; + + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, depth_blit_pipeline); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, set, {}); + BindBlitState(cmdbuf, layout, blit); + cmdbuf.draw(3, 1, 0, 0); + }); + scheduler.MakeDirty(StateFlags::Pipeline); + return true; } void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, @@ -164,8 +361,8 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, }, }; - vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout); - device.updateDescriptorSetWithTemplate(set, update_template, textures[0]); + vk::DescriptorSet set = desc_manager.AllocateSet(compute_descriptor_layout); + device.updateDescriptorSetWithTemplate(set, compute_update_template, textures[0]); renderpass_cache.ExitRenderpass(); scheduler.Record([this, set, blit, src_image = source.alloc.image, @@ -247,7 +444,7 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {}); - cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, copy_d24s8_to_r32_pipeline); const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom); cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0, @@ -263,4 +460,67 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest, }); } +void BlitHelper::MakeComputePipelines() { + const vk::ComputePipelineCreateInfo compute_info = { + .stage = MakeStages(copy_d24s8_to_r32_comp), + .layout = compute_pipeline_layout, + }; + + if (const auto result = device.createComputePipeline({}, compute_info); + result.result == vk::Result::eSuccess) { + copy_d24s8_to_r32_pipeline = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "D24S8->R32 compute pipeline creation failed!"); + UNREACHABLE(); + } +} + +vk::Pipeline BlitHelper::MakeDepthStencilBlitPipeline() { + const std::array stages = MakeStages(full_screen_vert, blit_depth_stencil_frag); + const VideoCore::PixelFormat depth_stencil = VideoCore::PixelFormat::D24S8; + const vk::Format depth_stencil_format = instance.GetTraits(depth_stencil).native; + vk::GraphicsPipelineCreateInfo depth_stencil_info = { + .stageCount = static_cast(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = two_textures_pipeline_layout, + }; + + if (!instance.IsDynamicRenderingSupported()) { + depth_stencil_info.renderPass = + renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid, depth_stencil, false); + } + + vk::StructureChain depth_blit_chain = { + depth_stencil_info, + vk::PipelineRenderingCreateInfoKHR{ + .colorAttachmentCount = 0, + .pColorAttachmentFormats = nullptr, + .depthAttachmentFormat = depth_stencil_format, + .stencilAttachmentFormat = depth_stencil_format, + }, + }; + + if (!instance.IsDynamicRenderingSupported()) { + depth_blit_chain.unlink(); + } + + if (const auto result = device.createGraphicsPipeline({}, depth_blit_chain.get()); + result.result == vk::Result::eSuccess) { + return result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "Depth stencil blit pipeline creation failed!"); + UNREACHABLE(); + } + return VK_NULL_HANDLE; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_helper.h b/src/video_core/renderer_vulkan/vk_blit_helper.h index 064fdb42a..20bb3fdca 100644 --- a/src/video_core/renderer_vulkan/vk_blit_helper.h +++ b/src/video_core/renderer_vulkan/vk_blit_helper.h @@ -4,6 +4,7 @@ #pragma once +#include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_vulkan/vk_common.h" namespace VideoCore { @@ -24,20 +25,44 @@ public: RenderpassCache& renderpass_cache); ~BlitHelper(); + /// Blits depth-stencil textures using helper shader + bool BlitDepthStencil(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + /// Blits D24S8 pixel data to the provided buffer void BlitD24S8ToR32(Surface& depth_surface, Surface& r32_surface, const VideoCore::TextureBlit& blit); private: + /// Creates compute pipelines used for blit + void MakeComputePipelines(); + + /// Creates graphics pipelines used for blit + vk::Pipeline MakeDepthStencilBlitPipeline(); + +private: + const Instance& instance; Scheduler& scheduler; DescriptorManager& desc_manager; RenderpassCache& renderpass_cache; + vk::Device device; - vk::Pipeline compute_pipeline; + vk::RenderPass r32_renderpass; + + vk::DescriptorSetLayout compute_descriptor_layout; + vk::DescriptorSetLayout two_textures_descriptor_layout; + vk::DescriptorUpdateTemplate compute_update_template; + vk::DescriptorUpdateTemplate two_textures_update_template; vk::PipelineLayout compute_pipeline_layout; - vk::DescriptorSetLayout descriptor_layout; - vk::DescriptorUpdateTemplate update_template; - vk::ShaderModule compute_shader; + vk::PipelineLayout two_textures_pipeline_layout; + + vk::ShaderModule full_screen_vert; + vk::ShaderModule copy_d24s8_to_r32_comp; + vk::ShaderModule blit_depth_stencil_frag; + + vk::Pipeline copy_d24s8_to_r32_pipeline; + vk::Pipeline depth_blit_pipeline; + vk::Sampler linear_sampler; + vk::Sampler nearest_sampler; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 6cb5f32fd..8bf20dc97 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -568,6 +568,7 @@ bool Instance::CreateDevice() { timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); image_format_list = AddExtension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); pipeline_creation_feedback = AddExtension(VK_EXT_PIPELINE_CREATION_FEEDBACK_EXTENSION_NAME); + shader_stencil_export = AddExtension(VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME); bool has_portability_subset = AddExtension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); bool has_dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME); bool has_extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 0334f0594..121bebd87 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -183,6 +183,11 @@ public: return pipeline_creation_feedback; } + /// Returns true when VK_EXT_shader_stencil_export is supported + bool IsShaderStencilExportSupported() const { + return shader_stencil_export; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -316,6 +321,7 @@ private: bool image_format_list{}; bool pipeline_creation_cache_control{}; bool pipeline_creation_feedback{}; + bool shader_stencil_export{}; bool enable_validation{}; bool dump_command_buffers{}; }; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index a18830133..7d66b5cfe 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -57,8 +57,8 @@ public: void CreatePresentRenderpass(vk::Format format); /// Returns the renderpass associated with the color-depth format pair - [[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, - VideoCore::PixelFormat depth, bool is_clear); + vk::RenderPass GetRenderpass(VideoCore::PixelFormat color, VideoCore::PixelFormat depth, + bool is_clear); /// Returns the swapchain clear renderpass [[nodiscard]] vk::RenderPass GetPresentRenderpass() const { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index fd6152f4e..9db045acc 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -69,6 +69,11 @@ public: state |= flag; } + /// Marks the provided state as dirty + void MakeDirty(StateFlags flag) noexcept { + state &= ~flag; + } + /// Returns true if the state is dirty [[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept { return False(state & flag); diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index a8ec5cccc..6f0e42262 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -645,6 +645,12 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit) { + const bool is_depth_stencil = source.type == VideoCore::SurfaceType::DepthStencil; + const auto& depth_traits = instance.GetTraits(source.pixel_format); + if (is_depth_stencil && !depth_traits.blit_support) { + return blit_helper.BlitDepthStencil(source, dest, blit); + } + renderpass_cache.ExitRenderpass(); const RecordParams params = { @@ -826,11 +832,6 @@ MICROPROFILE_DEFINE(Vulkan_Upload, "Vulkan", "Texture Upload", MP_RGB(128, 192, void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { MICROPROFILE_SCOPE(Vulkan_Upload); - if (type == VideoCore::SurfaceType::DepthStencil && !traits.blit_support) { - LOG_ERROR(Render_Vulkan, "Depth blit unsupported by hardware, ignoring"); - return; - } - runtime.renderpass_cache.ExitRenderpass(); const bool is_scaled = res_scale != 1;