renderer/vulkan: Support VK_EXT_fragment_shader_interlock for shadow rendering. (#51)

This commit is contained in:
Steveice10
2023-09-07 14:09:09 -07:00
committed by GPUCode
parent ba89673d77
commit da891af113
4 changed files with 74 additions and 18 deletions

View File

@@ -373,6 +373,7 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR,
vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT,
vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT,
vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>();
const vk::StructureChain properties_chain =
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
@@ -424,6 +425,9 @@ bool Instance::CreateDevice() {
add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm,
"it is broken on most Qualcomm driver versions");
const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
const bool has_fragment_shader_interlock =
add_extension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, is_nvidia,
"it is broken on Nvidia drivers");
const bool has_pipeline_creation_cache_control =
add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, is_nvidia,
"it is broken on Nvidia drivers");
@@ -480,6 +484,7 @@ bool Instance::CreateDevice() {
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{},
vk::PhysicalDeviceCustomBorderColorFeaturesEXT{},
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{},
vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT{},
vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT{},
};
@@ -518,6 +523,13 @@ bool Instance::CreateDevice() {
device_chain.unlink<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
}
if (has_fragment_shader_interlock) {
FEAT_SET(vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, fragmentShaderPixelInterlock,
fragment_shader_interlock)
} else {
device_chain.unlink<vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT>();
}
if (has_extended_dynamic_state) {
FEAT_SET(vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, extendedDynamicState,
extended_dynamic_state)
@@ -639,9 +651,6 @@ bool Instance::SetMoltenVkConfig() {
mvk_config.resumeLostDevice = true;
// Maximize concurrency to improve shader compilation performance.
mvk_config.shouldMaximizeConcurrentCompilation = true;
// Use Metal argument buffers as otherwise we run into issues with shadow rendering
// image atomics.
mvk_config.useMetalArgumentBuffers = MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS;
if (_vkSetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size) !=
VK_SUCCESS) {

View File

@@ -140,6 +140,11 @@ public:
return index_type_uint8;
}
/// Returns true when VK_EXT_fragment_shader_interlock is supported
bool IsFragmentShaderInterlockSupported() const {
return fragment_shader_interlock;
}
/// Returns true when VK_KHR_image_format_list is supported
bool IsImageFormatListSupported() const {
return image_format_list;
@@ -291,6 +296,7 @@ private:
bool extended_dynamic_state{};
bool custom_border_color{};
bool index_type_uint8{};
bool fragment_shader_interlock{};
bool image_format_list{};
bool pipeline_creation_cache_control{};
bool shader_stencil_export{};

View File

@@ -236,6 +236,10 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode ==
FramebufferRegs::FragmentOperationMode::Shadow);
state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
// We only need fragment shader interlock when shadow rendering.
state.use_fragment_shader_interlock.Assign(state.shadow_rendering &&
instance.IsFragmentShaderInterlockSupported());
}
void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer,
@@ -1194,8 +1198,31 @@ float ProcTexNoiseCoef(vec2 x) {
std::string GenerateFragmentShader(const PicaFSConfig& config) {
const auto& state = config.state;
std::string out = "#version 450 core\n"
"#extension GL_ARB_separate_shader_objects : enable\n\n";
std::string out = R"(
#version 450 core
#extension GL_ARB_separate_shader_objects : enable
)";
if (state.use_fragment_shader_interlock) {
out += R"(
#if defined(GL_ARB_fragment_shader_interlock)
#extension GL_ARB_fragment_shader_interlock : enable
#define beginInvocationInterlock beginInvocationInterlockARB
#define endInvocationInterlock endInvocationInterlockARB
#elif defined(GL_NV_fragment_shader_interlock)
#extension GL_NV_fragment_shader_interlock : enable
#define beginInvocationInterlock beginInvocationInterlockNV
#define endInvocationInterlock endInvocationInterlockNV
#elif defined(GL_INTEL_fragment_shader_ordering)
#extension GL_INTEL_fragment_shader_ordering : enable
#define beginInvocationInterlock beginFragmentShaderOrderingINTEL
#define endInvocationInterlock
#endif
layout(pixel_interlock_ordered) in;
)";
}
out += GetVertexInterfaceDeclaration(false);
out += R"(
@@ -1280,6 +1307,19 @@ uint EncodeShadow(uvec2 pixel) {
return (pixel.x << 8) | pixel.y;
}
uint UpdateShadow(uint pixel, uint d, uint s) {
uvec2 ref = DecodeShadow(pixel);
if (d < ref.x) {
if (s == 0u) {
ref.x = d;
} else {
s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
ref.y = min(s, ref.y);
}
}
return EncodeShadow(ref);
}
float CompareShadow(uint pixel, uint z) {
uvec2 p = DecodeShadow(pixel);
return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z);
@@ -1578,26 +1618,26 @@ vec4 secondary_fragment_color = vec4(0.0);
uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
uint s = uint(last_tex_env_out.g * float(0xFF));
ivec2 image_coord = ivec2(gl_FragCoord.xy);
)";
if (state.use_fragment_shader_interlock) {
out += R"(
beginInvocationInterlock();
uint old_shadow = imageLoad(shadow_buffer, image_coord).x;
uint new_shadow = UpdateShadow(old_shadow, d, s);
imageStore(shadow_buffer, image_coord, uvec4(new_shadow));
endInvocationInterlock();
)";
} else {
out += R"(
uint old = imageLoad(shadow_buffer, image_coord).x;
uint new1;
uint old2;
do {
old2 = old;
uvec2 ref = DecodeShadow(old);
if (d < ref.x) {
if (s == 0u) {
ref.x = d;
} else {
s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
ref.y = min(s, ref.y);
}
}
new1 = EncodeShadow(ref);
new1 = UpdateShadow(old, d, s);
} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new1)) != old2);
)";
}
} else {
out += "gl_FragDepth = depth;\n";
// Round the final fragment color to maintain the PICA's 8 bits of precision

View File

@@ -55,6 +55,7 @@ struct PicaFSConfigState {
BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
BitField<27, 1, u32> shadow_rendering;
BitField<28, 1, u32> shadow_texture_orthographic;
BitField<29, 1, u32> use_fragment_shader_interlock;
};
union {