renderer/vulkan: Support VK_EXT_fragment_shader_interlock for shadow rendering. (#51)
This commit is contained in:
@@ -373,6 +373,7 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT,
|
||||
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR,
|
||||
vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT,
|
||||
vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT,
|
||||
vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>();
|
||||
const vk::StructureChain properties_chain =
|
||||
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
|
||||
@@ -424,6 +425,9 @@ bool Instance::CreateDevice() {
|
||||
add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, is_qualcomm,
|
||||
"it is broken on most Qualcomm driver versions");
|
||||
const bool has_index_type_uint8 = add_extension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||
const bool has_fragment_shader_interlock =
|
||||
add_extension(VK_EXT_FRAGMENT_SHADER_INTERLOCK_EXTENSION_NAME, is_nvidia,
|
||||
"it is broken on Nvidia drivers");
|
||||
const bool has_pipeline_creation_cache_control =
|
||||
add_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, is_nvidia,
|
||||
"it is broken on Nvidia drivers");
|
||||
@@ -480,6 +484,7 @@ bool Instance::CreateDevice() {
|
||||
vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{},
|
||||
vk::PhysicalDeviceCustomBorderColorFeaturesEXT{},
|
||||
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{},
|
||||
vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT{},
|
||||
vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT{},
|
||||
};
|
||||
|
||||
@@ -518,6 +523,13 @@ bool Instance::CreateDevice() {
|
||||
device_chain.unlink<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
|
||||
}
|
||||
|
||||
if (has_fragment_shader_interlock) {
|
||||
FEAT_SET(vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT, fragmentShaderPixelInterlock,
|
||||
fragment_shader_interlock)
|
||||
} else {
|
||||
device_chain.unlink<vk::PhysicalDeviceFragmentShaderInterlockFeaturesEXT>();
|
||||
}
|
||||
|
||||
if (has_extended_dynamic_state) {
|
||||
FEAT_SET(vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, extendedDynamicState,
|
||||
extended_dynamic_state)
|
||||
@@ -639,9 +651,6 @@ bool Instance::SetMoltenVkConfig() {
|
||||
mvk_config.resumeLostDevice = true;
|
||||
// Maximize concurrency to improve shader compilation performance.
|
||||
mvk_config.shouldMaximizeConcurrentCompilation = true;
|
||||
// Use Metal argument buffers as otherwise we run into issues with shadow rendering
|
||||
// image atomics.
|
||||
mvk_config.useMetalArgumentBuffers = MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_ALWAYS;
|
||||
|
||||
if (_vkSetMoltenVKConfigurationMVK(VK_NULL_HANDLE, &mvk_config, &mvk_config_size) !=
|
||||
VK_SUCCESS) {
|
||||
|
@@ -140,6 +140,11 @@ public:
|
||||
return index_type_uint8;
|
||||
}
|
||||
|
||||
/// Returns true when VK_EXT_fragment_shader_interlock is supported
|
||||
bool IsFragmentShaderInterlockSupported() const {
|
||||
return fragment_shader_interlock;
|
||||
}
|
||||
|
||||
/// Returns true when VK_KHR_image_format_list is supported
|
||||
bool IsImageFormatListSupported() const {
|
||||
return image_format_list;
|
||||
@@ -291,6 +296,7 @@ private:
|
||||
bool extended_dynamic_state{};
|
||||
bool custom_border_color{};
|
||||
bool index_type_uint8{};
|
||||
bool fragment_shader_interlock{};
|
||||
bool image_format_list{};
|
||||
bool pipeline_creation_cache_control{};
|
||||
bool shader_stencil_export{};
|
||||
|
@@ -236,6 +236,10 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode ==
|
||||
FramebufferRegs::FragmentOperationMode::Shadow);
|
||||
state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
|
||||
|
||||
// We only need fragment shader interlock when shadow rendering.
|
||||
state.use_fragment_shader_interlock.Assign(state.shadow_rendering &&
|
||||
instance.IsFragmentShaderInterlockSupported());
|
||||
}
|
||||
|
||||
void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer,
|
||||
@@ -1194,8 +1198,31 @@ float ProcTexNoiseCoef(vec2 x) {
|
||||
|
||||
std::string GenerateFragmentShader(const PicaFSConfig& config) {
|
||||
const auto& state = config.state;
|
||||
std::string out = "#version 450 core\n"
|
||||
"#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||
std::string out = R"(
|
||||
#version 450 core
|
||||
#extension GL_ARB_separate_shader_objects : enable
|
||||
)";
|
||||
|
||||
if (state.use_fragment_shader_interlock) {
|
||||
out += R"(
|
||||
#if defined(GL_ARB_fragment_shader_interlock)
|
||||
#extension GL_ARB_fragment_shader_interlock : enable
|
||||
#define beginInvocationInterlock beginInvocationInterlockARB
|
||||
#define endInvocationInterlock endInvocationInterlockARB
|
||||
#elif defined(GL_NV_fragment_shader_interlock)
|
||||
#extension GL_NV_fragment_shader_interlock : enable
|
||||
#define beginInvocationInterlock beginInvocationInterlockNV
|
||||
#define endInvocationInterlock endInvocationInterlockNV
|
||||
#elif defined(GL_INTEL_fragment_shader_ordering)
|
||||
#extension GL_INTEL_fragment_shader_ordering : enable
|
||||
#define beginInvocationInterlock beginFragmentShaderOrderingINTEL
|
||||
#define endInvocationInterlock
|
||||
#endif
|
||||
|
||||
layout(pixel_interlock_ordered) in;
|
||||
)";
|
||||
}
|
||||
|
||||
out += GetVertexInterfaceDeclaration(false);
|
||||
|
||||
out += R"(
|
||||
@@ -1280,6 +1307,19 @@ uint EncodeShadow(uvec2 pixel) {
|
||||
return (pixel.x << 8) | pixel.y;
|
||||
}
|
||||
|
||||
uint UpdateShadow(uint pixel, uint d, uint s) {
|
||||
uvec2 ref = DecodeShadow(pixel);
|
||||
if (d < ref.x) {
|
||||
if (s == 0u) {
|
||||
ref.x = d;
|
||||
} else {
|
||||
s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
|
||||
ref.y = min(s, ref.y);
|
||||
}
|
||||
}
|
||||
return EncodeShadow(ref);
|
||||
}
|
||||
|
||||
float CompareShadow(uint pixel, uint z) {
|
||||
uvec2 p = DecodeShadow(pixel);
|
||||
return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z);
|
||||
@@ -1578,26 +1618,26 @@ vec4 secondary_fragment_color = vec4(0.0);
|
||||
uint d = uint(clamp(depth, 0.0, 1.0) * float(0xFFFFFF));
|
||||
uint s = uint(last_tex_env_out.g * float(0xFF));
|
||||
ivec2 image_coord = ivec2(gl_FragCoord.xy);
|
||||
|
||||
)";
|
||||
if (state.use_fragment_shader_interlock) {
|
||||
out += R"(
|
||||
beginInvocationInterlock();
|
||||
uint old_shadow = imageLoad(shadow_buffer, image_coord).x;
|
||||
uint new_shadow = UpdateShadow(old_shadow, d, s);
|
||||
imageStore(shadow_buffer, image_coord, uvec4(new_shadow));
|
||||
endInvocationInterlock();
|
||||
)";
|
||||
} else {
|
||||
out += R"(
|
||||
uint old = imageLoad(shadow_buffer, image_coord).x;
|
||||
uint new1;
|
||||
uint old2;
|
||||
do {
|
||||
old2 = old;
|
||||
|
||||
uvec2 ref = DecodeShadow(old);
|
||||
if (d < ref.x) {
|
||||
if (s == 0u) {
|
||||
ref.x = d;
|
||||
} else {
|
||||
s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
|
||||
ref.y = min(s, ref.y);
|
||||
}
|
||||
}
|
||||
new1 = EncodeShadow(ref);
|
||||
|
||||
new1 = UpdateShadow(old, d, s);
|
||||
} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new1)) != old2);
|
||||
)";
|
||||
}
|
||||
} else {
|
||||
out += "gl_FragDepth = depth;\n";
|
||||
// Round the final fragment color to maintain the PICA's 8 bits of precision
|
||||
|
@@ -55,6 +55,7 @@ struct PicaFSConfigState {
|
||||
BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
|
||||
BitField<27, 1, u32> shadow_rendering;
|
||||
BitField<28, 1, u32> shadow_texture_orthographic;
|
||||
BitField<29, 1, u32> use_fragment_shader_interlock;
|
||||
};
|
||||
|
||||
union {
|
||||
|
Reference in New Issue
Block a user