shader: Implement fine derivates constant propagation
This commit is contained in:
		| @@ -341,6 +341,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | ||||
|     if (!ctx.profile.xfb_varyings.empty()) { | ||||
|         ctx.AddCapability(spv::Capability::TransformFeedback); | ||||
|     } | ||||
|     if (info.uses_derivates) { | ||||
|         ctx.AddCapability(spv::Capability::DerivativeControl); | ||||
|     } | ||||
|     // TODO: Track this usage | ||||
|     ctx.AddCapability(spv::Capability::ImageGatherExtended); | ||||
|     ctx.AddCapability(spv::Capability::ImageQuery); | ||||
|   | ||||
| @@ -529,4 +529,8 @@ Id EmitShuffleButterfly(EmitContext& ctx, IR::Inst* inst, Id value, Id index, Id | ||||
|                         Id segmentation_mask); | ||||
| Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle); | ||||
|  | ||||
| Id EmitDPdxFine(EmitContext& ctx, Id op_a); | ||||
|  | ||||
| Id EmitDPdyFine(EmitContext& ctx, Id op_a); | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -183,4 +183,12 @@ Id EmitFSwizzleAdd(EmitContext& ctx, Id op_a, Id op_b, Id swizzle) { | ||||
|     return ctx.OpFAdd(ctx.F32[1], result_a, result_b); | ||||
| } | ||||
|  | ||||
| Id EmitDPdxFine(EmitContext& ctx, Id op_a) { | ||||
|     return ctx.OpDPdxFine(ctx.F32[1], op_a); | ||||
| } | ||||
|  | ||||
| Id EmitDPdyFine(EmitContext& ctx, Id op_a) { | ||||
|     return ctx.OpDPdyFine(ctx.F32[1], op_a); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -1925,4 +1925,12 @@ F32 IREmitter::FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, FpCon | ||||
|     return Inst<F32>(Opcode::FSwizzleAdd, Flags{control}, a, b, swizzle); | ||||
| } | ||||
|  | ||||
| F32 IREmitter::DPdxFine(const F32& a) { | ||||
|     return Inst<F32>(Opcode::DPdxFine, a); | ||||
| } | ||||
|  | ||||
| F32 IREmitter::DPdyFine(const F32& a) { | ||||
|     return Inst<F32>(Opcode::DPdyFine, a); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::IR | ||||
|   | ||||
| @@ -353,6 +353,10 @@ public: | ||||
|     [[nodiscard]] F32 FSwizzleAdd(const F32& a, const F32& b, const U32& swizzle, | ||||
|                                   FpControl control = {}); | ||||
|  | ||||
|     [[nodiscard]] F32 DPdxFine(const F32& a); | ||||
|  | ||||
|     [[nodiscard]] F32 DPdyFine(const F32& a); | ||||
|  | ||||
| private: | ||||
|     IR::Block::iterator insertion_point; | ||||
|  | ||||
|   | ||||
| @@ -511,3 +511,5 @@ OPCODE(ShuffleUp,                                           U32,            U32, | ||||
| OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(FSwizzleAdd,                                         F32,            F32,            F32,            U32,                                            ) | ||||
| OPCODE(DPdxFine,                                            F32,            F32,                                                                            ) | ||||
| OPCODE(DPdyFine,                                            F32,            F32,                                                                            ) | ||||
|   | ||||
| @@ -530,6 +530,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::FSwizzleAdd: | ||||
|         info.uses_fswzadd = true; | ||||
|         break; | ||||
|     case IR::Opcode::DPdxFine: | ||||
|     case IR::Opcode::DPdyFine: | ||||
|         info.uses_derivates = true; | ||||
|         break; | ||||
|     case IR::Opcode::LoadStorageU8: | ||||
|     case IR::Opcode::LoadStorageS8: | ||||
|     case IR::Opcode::WriteStorageU8: | ||||
|   | ||||
| @@ -412,6 +412,71 @@ void FoldCompositeExtract(IR::Inst& inst, IR::Opcode construct, IR::Opcode inser | ||||
|     inst.ReplaceUsesWith(*result); | ||||
| } | ||||
|  | ||||
| IR::Value GetThroughCast(IR::Value value, IR::Opcode expected_cast) { | ||||
|     if (value.IsImmediate()) { | ||||
|         return value; | ||||
|     } | ||||
|     IR::Inst* const inst{value.InstRecursive()}; | ||||
|     if (inst->GetOpcode() == expected_cast) { | ||||
|         return inst->Arg(0).Resolve(); | ||||
|     } | ||||
|     return value; | ||||
| } | ||||
|  | ||||
| void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||||
|     const IR::Value swizzle{inst.Arg(2)}; | ||||
|     if (!swizzle.IsImmediate()) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     const IR::Value value_1{GetThroughCast(inst.Arg(0).Resolve(), IR::Opcode::BitCastF32U32)}; | ||||
|     const IR::Value value_2{GetThroughCast(inst.Arg(1).Resolve(), IR::Opcode::BitCastF32U32)}; | ||||
|  | ||||
|     if (value_1.IsImmediate()) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     const u32 swizzle_value{swizzle.U32()}; | ||||
|     if (swizzle_value != 0x99 && swizzle_value != 0xA5) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     IR::Inst* const inst2{value_1.InstRecursive()}; | ||||
|     if (inst2->GetOpcode() != IR::Opcode::ShuffleButterfly) { | ||||
|         return; | ||||
|     } | ||||
|     const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)}; | ||||
|     if (value_2 != value_3) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     const IR::Value index{inst2->Arg(1)}; | ||||
|     const IR::Value clamp{inst2->Arg(2)}; | ||||
|     const IR::Value segmentation_mask{inst2->Arg(3)}; | ||||
|  | ||||
|     if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     if (clamp.U32() != 3 || segmentation_mask.U32() != 28) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     if (swizzle_value == 0x99) { | ||||
|         // DPdxFine | ||||
|         if (index.U32() == 1) { | ||||
|             IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|             inst.ReplaceUsesWith(ir.DPdxFine(IR::F32{value_2})); | ||||
|         } | ||||
|     } else if (swizzle_value == 0xA5) { | ||||
|         // DPdyFine | ||||
|         if (index.U32() == 2) { | ||||
|             IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|             inst.ReplaceUsesWith(ir.DPdyFine(IR::F32{value_2})); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||||
|     switch (inst.GetOpcode()) { | ||||
|     case IR::Opcode::GetRegister: | ||||
| @@ -532,6 +597,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||||
|     case IR::Opcode::CompositeExtractF16x4: | ||||
|         return FoldCompositeExtract(inst, IR::Opcode::CompositeConstructF16x4, | ||||
|                                     IR::Opcode::CompositeInsertF16x4); | ||||
|     case IR::Opcode::FSwizzleAdd: | ||||
|         return FoldFSwizzleAdd(block, inst); | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|   | ||||
| @@ -147,6 +147,7 @@ struct Info { | ||||
|     bool uses_subgroup_vote{}; | ||||
|     bool uses_subgroup_mask{}; | ||||
|     bool uses_fswzadd{}; | ||||
|     bool uses_derivates{}; | ||||
|     bool uses_typeless_image_reads{}; | ||||
|     bool uses_typeless_image_writes{}; | ||||
|     bool uses_shared_increment{}; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user