glsl: Implement VOTE for subgroup size potentially larger
This commit is contained in:
		| @@ -122,9 +122,11 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | |||||||
|  |  | ||||||
| void EmitContext::SetupExtensions(std::string&) { | void EmitContext::SetupExtensions(std::string&) { | ||||||
|     header += "#extension GL_ARB_separate_shader_objects : enable\n"; |     header += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||||||
|     header += "#extension GL_ARB_sparse_texture2 : enable\n"; |     if (stage != Stage::Compute) { | ||||||
|     header += "#extension GL_EXT_texture_shadow_lod : enable\n"; |         // TODO: track this usage | ||||||
|     // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; |         header += "#extension GL_ARB_sparse_texture2 : enable\n"; | ||||||
|  |         header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | ||||||
|  |     } | ||||||
|     if (info.uses_int64) { |     if (info.uses_int64) { | ||||||
|         header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; |         header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||||||
|     } |     } | ||||||
| @@ -149,6 +151,10 @@ void EmitContext::SetupExtensions(std::string&) { | |||||||
|         info.uses_subgroup_shuffles || info.uses_fswzadd) { |         info.uses_subgroup_shuffles || info.uses_fswzadd) { | ||||||
|         header += "#extension GL_ARB_shader_ballot : enable\n"; |         header += "#extension GL_ARB_shader_ballot : enable\n"; | ||||||
|         header += "#extension GL_ARB_shader_group_vote : enable\n"; |         header += "#extension GL_ARB_shader_group_vote : enable\n"; | ||||||
|  |         header += "#extension GL_KHR_shader_subgroup_basic : enable\n"; | ||||||
|  |         if (!info.uses_int64) { | ||||||
|  |             header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -42,31 +42,42 @@ void EmitLaneId([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& in | |||||||
| } | } | ||||||
|  |  | ||||||
| void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | void EmitVoteAll(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||||||
|     ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|     // TODO: |         ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||||||
|     // if (ctx.profile.warp_size_potentially_larger_than_guest) { |     } else { | ||||||
|     // } |         const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; | ||||||
|  |         const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; | ||||||
|  |         ctx.AddU1("{}=({}&{})=={};", inst, ballot, active_mask, active_mask); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | void EmitVoteAny(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||||||
|     ctx.AddU1("{}=anyInvocationARB({});", inst, pred); |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|     // TODO: |         ctx.AddU1("{}=anyInvocationARB({});", inst, pred); | ||||||
|     // if (ctx.profile.warp_size_potentially_larger_than_guest) { |     } else { | ||||||
|     // } |         const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; | ||||||
|  |         const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; | ||||||
|  |         ctx.AddU1("{}=({}&{})!=0u;", inst, ballot, active_mask, active_mask); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | void EmitVoteEqual(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||||||
|     ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|     // TODO: |         ctx.AddU1("{}=allInvocationsEqualARB({});", inst, pred); | ||||||
|     // if (ctx.profile.warp_size_potentially_larger_than_guest) { |     } else { | ||||||
|     // } |         const auto active_mask{fmt::format("uvec2(ballotARB(true))[gl_SubgroupInvocationID]")}; | ||||||
|  |         const auto ballot{fmt::format("uvec2(ballotARB({}))[gl_SubgroupInvocationID]", pred)}; | ||||||
|  |         const auto value{fmt::format("({}^{})", ballot, active_mask)}; | ||||||
|  |         ctx.AddU1("{}=({}==0)||({}=={});", inst, value, value, active_mask); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | void EmitSubgroupBallot(EmitContext& ctx, IR::Inst& inst, std::string_view pred) { | ||||||
|     ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); |     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||||
|     // TODO: |         ctx.AddU32("{}=uvec2(ballotARB({})).x;", inst, pred); | ||||||
|     // if (ctx.profile.warp_size_potentially_larger_than_guest) { |     } else { | ||||||
|     // } |         ctx.AddU32("{}=uvec2(ballotARB({}))[gl_SubgroupInvocationID];", inst, pred); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | void EmitSubgroupEqMask(EmitContext& ctx, IR::Inst& inst) { | ||||||
|   | |||||||
| @@ -160,6 +160,7 @@ Device::Device() { | |||||||
|     has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); |     has_depth_buffer_float = HasExtension(extensions, "GL_NV_depth_buffer_float"); | ||||||
|     has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; |     has_nv_gpu_shader_5 = GLAD_GL_NV_gpu_shader5; | ||||||
|     has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; |     has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; | ||||||
|  |     warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | ||||||
|  |  | ||||||
|     // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive |     // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | ||||||
|     // uniform buffers as "push constants" |     // uniform buffers as "push constants" | ||||||
|   | |||||||
| @@ -128,6 +128,10 @@ public: | |||||||
|         return has_amd_shader_half_float; |         return has_amd_shader_half_float; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     bool IsWarpSizePotentiallyLargerThanGuest() const { | ||||||
|  |         return warp_size_potentially_larger_than_guest; | ||||||
|  |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     static bool TestVariableAoffi(); |     static bool TestVariableAoffi(); | ||||||
|     static bool TestPreciseBug(); |     static bool TestPreciseBug(); | ||||||
| @@ -161,6 +165,7 @@ private: | |||||||
|     bool has_depth_buffer_float{}; |     bool has_depth_buffer_float{}; | ||||||
|     bool has_nv_gpu_shader_5{}; |     bool has_nv_gpu_shader_5{}; | ||||||
|     bool has_amd_shader_half_float{}; |     bool has_amd_shader_half_float{}; | ||||||
|  |     bool warp_size_potentially_larger_than_guest{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } // namespace OpenGL | } // namespace OpenGL | ||||||
|   | |||||||
| @@ -220,7 +220,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||||||
|           .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), |           .support_gl_nv_gpu_shader_5 = device.HasNvGpuShader5(), | ||||||
|           .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), |           .support_gl_amd_gpu_shader_half_float = device.HasAmdShaderHalfFloat(), | ||||||
|  |  | ||||||
|           .warp_size_potentially_larger_than_guest = true, |           .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyLargerThanGuest(), | ||||||
|  |  | ||||||
|           .lower_left_origin_mode = true, |           .lower_left_origin_mode = true, | ||||||
|           .need_declared_frag_colors = true, |           .need_declared_frag_colors = true, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user