diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 3891393fb..01debd898 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -373,6 +373,7 @@ bool Instance::CreateDevice() { vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT, vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR, vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, + vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>(); const vk::StructureChain properties_chain = physical_device.getProperties2(); } + if (has_fragment_shader_interlock) { + FEAT_SET(vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, fragmentShaderPixelInterlock, + fragment_shader_interlock) + } else { + device_chain.unlink(); + } + if (has_extended_dynamic_state) { FEAT_SET(vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, extendedDynamicState, extended_dynamic_state) @@ -639,9 +651,6 @@ bool Instance::SetMoltenVkConfig() { mvk_config.resumeLostDevice = true; // Maximize concurrency to improve shader compilation performance. mvk_config.shouldMaximizeConcurrentCompilation = true; - // Use Metal argument buffers as otherwise we run into issues with shadow rendering - // image atomics. - mvk_config.useMetalArgumentBuffers = MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS; if (_vkSetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size) != VK_SUCCESS) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 1601f67d7..4d7d7f55e 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -140,6 +140,11 @@ public: return index_type_uint8; } + /// Returns true when VK_EXT_fragment_shader_interlock is supported + bool IsFragmentShaderInterlockSupported() const { + return fragment_shader_interlock; + } + /// Returns true when VK_KHR_image_format_list is supported bool IsImageFormatListSupported() const { return image_format_list; @@ -291,6 +296,7 @@ private: bool extended_dynamic_state{}; bool custom_border_color{}; bool index_type_uint8{}; + bool fragment_shader_interlock{}; bool image_format_list{}; bool pipeline_creation_cache_control{}; bool shader_stencil_export{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index bec4d316e..8286e1068 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -236,6 +236,10 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) { state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode == FramebufferRegs::FragmentOperationMode::Shadow); state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0); + + // We only need fragment shader interlock when shadow rendering. + state.use_fragment_shader_interlock.Assign(state.shadow_rendering && + instance.IsFragmentShaderInterlockSupported()); } void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer, @@ -1194,8 +1198,31 @@ float ProcTexNoiseCoef(vec2 x) { std::string GenerateFragmentShader(const PicaFSConfig& config) { const auto& state = config.state; - std::string out = "#version 450 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n\n"; + std::string out = R"( +#version 450 core +#extension GL_ARB_separate_shader_objects : enable +)"; + + if (state.use_fragment_shader_interlock) { + out += R"( +#if defined(GL_ARB_fragment_shader_interlock) +#extension GL_ARB_fragment_shader_interlock : enable +#define beginInvocationInterlock beginInvocationInterlockARB +#define endInvocationInterlock endInvocationInterlockARB +#elif defined(GL_NV_fragment_shader_interlock) +#extension GL_NV_fragment_shader_interlock : enable +#define beginInvocationInterlock beginInvocationInterlockNV +#define endInvocationInterlock endInvocationInterlockNV +#elif defined(GL_INTEL_fragment_shader_ordering) +#extension GL_INTEL_fragment_shader_ordering : enable +#define beginInvocationInterlock beginFragmentShaderOrderingINTEL +#define endInvocationInterlock +#endif + +layout(pixel_interlock_ordered) in; +)"; + } + out += GetVertexInterfaceDeclaration(false); out += R"( @@ -1280,6 +1307,19 @@ uint EncodeShadow(uvec2 pixel) { return (pixel.x << 8) | pixel.y; } +uint UpdateShadow(uint pixel, uint d, uint s) { + uvec2 ref = DecodeShadow(pixel); + if (d < ref.x) { + if (s == 0u) { + ref.x = d; + } else { + s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); + ref.y = min(s, ref.y); + } + } + return EncodeShadow(ref); +} + float CompareShadow(uint pixel, uint z) { uvec2 p = DecodeShadow(pixel); return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z); @@ -1578,26 +1618,26 @@ vec4 secondary_fragment_color = vec4(0.0); uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF)); uint s = uint(last_tex_env_out.g * float(0xFF)); ivec2 image_coord = ivec2(gl_FragCoord.xy); - +)"; + if (state.use_fragment_shader_interlock) { + out += R"( +beginInvocationInterlock(); +uint old_shadow = imageLoad(shadow_buffer, image_coord).x; +uint new_shadow = UpdateShadow(old_shadow, d, s); +imageStore(shadow_buffer, image_coord, uvec4(new_shadow)); +endInvocationInterlock(); +)"; + } else { + out += R"( uint old = imageLoad(shadow_buffer, image_coord).x; uint new1; uint old2; do { old2 = old; - - uvec2 ref = DecodeShadow(old); - if (d < ref.x) { - if (s == 0u) { - ref.x = d; - } else { - s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x))); - ref.y = min(s, ref.y); - } - } - new1 = EncodeShadow(ref); - + new1 = UpdateShadow(old, d, s); } while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new1)) != old2); )"; + } } else { out += "gl_FragDepth = depth;\n"; // Round the final fragment color to maintain the PICA's 8 bits of precision diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 52fa832ff..7ccc01969 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -55,6 +55,7 @@ struct PicaFSConfigState { BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op; BitField<27, 1, u32> shadow_rendering; BitField<28, 1, u32> shadow_texture_orthographic; + BitField<29, 1, u32> use_fragment_shader_interlock; }; union {