shader: Implement SHFL
This commit is contained in:
		| @@ -274,7 +274,8 @@ void EmitContext::DefineInputs(const Info& info) { | ||||
|     if (info.uses_local_invocation_id) { | ||||
|         local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId); | ||||
|     } | ||||
|     if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) { | ||||
|     if (info.uses_subgroup_invocation_id || | ||||
|         (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote)) { | ||||
|         subgroup_local_invocation_id = | ||||
|             DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId); | ||||
|     } | ||||
|   | ||||
| @@ -224,7 +224,7 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | ||||
|         ctx.AddExtension("SPV_KHR_shader_draw_parameters"); | ||||
|         ctx.AddCapability(spv::Capability::DrawParameters); | ||||
|     } | ||||
|     if (info.uses_subgroup_vote && profile.support_vote) { | ||||
|     if ((info.uses_subgroup_vote || info.uses_subgroup_invocation_id) && profile.support_vote) { | ||||
|         ctx.AddExtension("SPV_KHR_shader_ballot"); | ||||
|         ctx.AddCapability(spv::Capability::SubgroupBallotKHR); | ||||
|         if (!profile.warp_size_potentially_larger_than_guest) { | ||||
| @@ -315,4 +315,8 @@ void EmitGetSparseFromOp(EmitContext&) { | ||||
|     throw LogicError("Unreachable instruction"); | ||||
| } | ||||
|  | ||||
| void EmitGetInBoundsFromOp(EmitContext&) { | ||||
|     throw LogicError("Unreachable instruction"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -158,6 +158,7 @@ void EmitGetSignFromOp(EmitContext& ctx); | ||||
| void EmitGetCarryFromOp(EmitContext& ctx); | ||||
| void EmitGetOverflowFromOp(EmitContext& ctx); | ||||
| void EmitGetSparseFromOp(EmitContext& ctx); | ||||
| void EmitGetInBoundsFromOp(EmitContext& ctx); | ||||
| Id EmitFPAbs16(EmitContext& ctx, Id value); | ||||
| Id EmitFPAbs32(EmitContext& ctx, Id value); | ||||
| Id EmitFPAbs64(EmitContext& ctx, Id value); | ||||
| @@ -355,5 +356,13 @@ Id EmitVoteAll(EmitContext& ctx, Id pred); | ||||
| Id EmitVoteAny(EmitContext& ctx, Id pred); | ||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred); | ||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred); | ||||
| Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                     Id segmentation_mask); | ||||
| Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                  Id segmentation_mask); | ||||
| Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                    Id segmentation_mask); | ||||
| Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                         Id segmentation_mask); | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -1,58 +0,0 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||||
|     const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||||
|     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||||
| } | ||||
|  | ||||
| Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||||
| } | ||||
|  | ||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||||
|                            ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||||
| } | ||||
|  | ||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||||
|     const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||||
|     } | ||||
|     return LargeWarpBallot(ctx, ballot); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
							
								
								
									
										135
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_warp.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,135 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| Id LargeWarpBallot(EmitContext& ctx, Id ballot) { | ||||
|     const Id shift{ctx.Constant(ctx.U32[1], 5)}; | ||||
|     const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index); | ||||
| } | ||||
|  | ||||
| void SetInBoundsFlag(IR::Inst* inst, Id result) { | ||||
|     IR::Inst* const in_bounds{inst->GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; | ||||
|     if (!in_bounds) { | ||||
|         return; | ||||
|     } | ||||
|     in_bounds->SetDefinition(result); | ||||
|     in_bounds->Invalidate(); | ||||
| } | ||||
|  | ||||
| Id ComputeMinThreadId(EmitContext& ctx, Id thread_id, Id segmentation_mask) { | ||||
|     return ctx.OpBitwiseAnd(ctx.U32[1], thread_id, segmentation_mask); | ||||
| } | ||||
|  | ||||
| Id ComputeMaxThreadId(EmitContext& ctx, Id min_thread_id, Id clamp, Id not_seg_mask) { | ||||
|     return ctx.OpBitwiseOr(ctx.U32[1], min_thread_id, | ||||
|                            ctx.OpBitwiseAnd(ctx.U32[1], clamp, not_seg_mask)); | ||||
| } | ||||
|  | ||||
| Id GetMaxThreadId(EmitContext& ctx, Id thread_id, Id clamp, Id segmentation_mask) { | ||||
|     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||||
|     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||||
|     return ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask); | ||||
| } | ||||
|  | ||||
| Id SelectValue(EmitContext& ctx, Id in_range, Id value, Id src_thread_id) { | ||||
|     return ctx.OpSelect(ctx.U32[1], in_range, | ||||
|                         ctx.OpSubgroupReadInvocationKHR(ctx.U32[1], value, src_thread_id), value); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| Id EmitVoteAll(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpIEqual(ctx.U1, lhs, active_mask); | ||||
| } | ||||
|  | ||||
| Id EmitVoteAny(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAnyKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value); | ||||
| } | ||||
|  | ||||
| Id EmitVoteEqual(EmitContext& ctx, Id pred) { | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred); | ||||
|     } | ||||
|     const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)}; | ||||
|     const Id active_mask{LargeWarpBallot(ctx, mask_ballot)}; | ||||
|     const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))}; | ||||
|     const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)}; | ||||
|     return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value), | ||||
|                            ctx.OpIEqual(ctx.U1, lhs, active_mask)); | ||||
| } | ||||
|  | ||||
| Id EmitSubgroupBallot(EmitContext& ctx, Id pred) { | ||||
|     const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)}; | ||||
|     if (!ctx.profile.warp_size_potentially_larger_than_guest) { | ||||
|         return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U); | ||||
|     } | ||||
|     return LargeWarpBallot(ctx, ballot); | ||||
| } | ||||
|  | ||||
| Id EmitShuffleIndex(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                     Id segmentation_mask) { | ||||
|     const Id not_seg_mask{ctx.OpNot(ctx.U32[1], segmentation_mask)}; | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id min_thread_id{ComputeMinThreadId(ctx, thread_id, segmentation_mask)}; | ||||
|     const Id max_thread_id{ComputeMaxThreadId(ctx, min_thread_id, clamp, not_seg_mask)}; | ||||
|  | ||||
|     const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], index, not_seg_mask)}; | ||||
|     const Id src_thread_id{ctx.OpBitwiseOr(ctx.U32[1], lhs, min_thread_id)}; | ||||
|     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
|  | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
|  | ||||
| Id EmitShuffleUp(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                  Id segmentation_mask) { | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||
|     const Id src_thread_id{ctx.OpISub(ctx.U32[1], thread_id, index)}; | ||||
|     const Id in_range{ctx.OpSGreaterThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
|  | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
|  | ||||
| Id EmitShuffleDown(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                    Id segmentation_mask) { | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||
|     const Id src_thread_id{ctx.OpIAdd(ctx.U32[1], thread_id, index)}; | ||||
|     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
|  | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
|  | ||||
| Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id clamp, | ||||
|                         Id segmentation_mask) { | ||||
|     const Id thread_id{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)}; | ||||
|     const Id max_thread_id{GetMaxThreadId(ctx, thread_id, clamp, segmentation_mask)}; | ||||
|     const Id src_thread_id{ctx.OpBitwiseXor(ctx.U32[1], thread_id, index)}; | ||||
|     const Id in_range{ctx.OpSLessThanEqual(ctx.U1, src_thread_id, max_thread_id)}; | ||||
|  | ||||
|     SetInBoundsFlag(inst, in_range); | ||||
|     return SelectValue(ctx, in_range, value, src_thread_id); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
		Reference in New Issue
	
	Block a user