glasm: Implement FSWZADD
This commit is contained in:
		| @@ -281,7 +281,8 @@ void SetupOptions(const IR::Program& program, const Profile& profile, std::strin | |||||||
|     if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { |     if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||||||
|         header += "OPTION NV_shader_atomic_fp16_vector;"; |         header += "OPTION NV_shader_atomic_fp16_vector;"; | ||||||
|     } |     } | ||||||
|     if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote) { |     if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || | ||||||
|  |         info.uses_fswzadd) { | ||||||
|         header += "OPTION NV_shader_thread_group;"; |         header += "OPTION NV_shader_thread_group;"; | ||||||
|     } |     } | ||||||
|     if (info.uses_subgroup_shuffles) { |     if (info.uses_subgroup_shuffles) { | ||||||
| @@ -416,12 +417,25 @@ std::string EmitGLASM(const Profile& profile, IR::Program& program, Bindings& bi | |||||||
|     if (program.local_memory_size > 0) { |     if (program.local_memory_size > 0) { | ||||||
|         header += fmt::format("lmem[{}],", program.local_memory_size); |         header += fmt::format("lmem[{}],", program.local_memory_size); | ||||||
|     } |     } | ||||||
|  |     if (program.info.uses_fswzadd) { | ||||||
|  |         header += "FSWZA[4],FSWZB[4],"; | ||||||
|  |     } | ||||||
|     header += "RC;" |     header += "RC;" | ||||||
|               "LONG TEMP "; |               "LONG TEMP "; | ||||||
|     for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { |     for (size_t index = 0; index < ctx.reg_alloc.NumUsedLongRegisters(); ++index) { | ||||||
|         header += fmt::format("D{},", index); |         header += fmt::format("D{},", index); | ||||||
|     } |     } | ||||||
|     header += "DC;"; |     header += "DC;"; | ||||||
|  |     if (program.info.uses_fswzadd) { | ||||||
|  |         header += "MOV.F FSWZA[0],-1;" | ||||||
|  |                   "MOV.F FSWZA[1],1;" | ||||||
|  |                   "MOV.F FSWZA[2],-1;" | ||||||
|  |                   "MOV.F FSWZA[3],0;" | ||||||
|  |                   "MOV.F FSWZB[0],-1;" | ||||||
|  |                   "MOV.F FSWZB[1],-1;" | ||||||
|  |                   "MOV.F FSWZB[2],1;" | ||||||
|  |                   "MOV.F FSWZB[3],-1;"; | ||||||
|  |     } | ||||||
|     ctx.code.insert(0, header); |     ctx.code.insert(0, header); | ||||||
|     ctx.code += "END"; |     ctx.code += "END"; | ||||||
|     return ctx.code; |     return ctx.code; | ||||||
|   | |||||||
| @@ -616,7 +616,8 @@ void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU3 | |||||||
|                      const IR::Value& clamp, const IR::Value& segmentation_mask); |                      const IR::Value& clamp, const IR::Value& segmentation_mask); | ||||||
| void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, | ||||||
|                           const IR::Value& clamp, const IR::Value& segmentation_mask); |                           const IR::Value& clamp, const IR::Value& segmentation_mask); | ||||||
| void EmitFSwizzleAdd(EmitContext& ctx, ScalarF32 op_a, ScalarF32 op_b, ScalarU32 swizzle); | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||||||
|  |                      ScalarU32 swizzle); | ||||||
| void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||||||
| void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | void EmitDPdyFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||||||
| void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | void EmitDPdxCoarse(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a); | ||||||
|   | |||||||
| @@ -95,8 +95,17 @@ void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, Sca | |||||||
|     Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); |     Shuffle(ctx, inst, value, index, clamp, segmentation_mask, "XOR"); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitFSwizzleAdd(EmitContext&, ScalarF32, ScalarF32, ScalarU32) { | void EmitFSwizzleAdd(EmitContext& ctx, IR::Inst& inst, ScalarF32 op_a, ScalarF32 op_b, | ||||||
|     throw NotImplementedException("GLASM instruction"); |                      ScalarU32 swizzle) { | ||||||
|  |     const auto ret{ctx.reg_alloc.Define(inst)}; | ||||||
|  |     ctx.Add("AND.U RC.z,{}.threadid,3;" | ||||||
|  |             "SHL.U RC.z,RC.z,1;" | ||||||
|  |             "SHR.U RC.z,{},RC.z;" | ||||||
|  |             "AND.U RC.z,RC.z,3;" | ||||||
|  |             "MUL.F RC.x,{},FSWZA[RC.z];" | ||||||
|  |             "MUL.F RC.y,{},FSWZB[RC.z];" | ||||||
|  |             "ADD.F {}.x,RC.x,RC.y;", | ||||||
|  |             ctx.stage_name, swizzle, op_a, op_b, ret); | ||||||
| } | } | ||||||
|  |  | ||||||
| void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | void EmitDPdxFine(EmitContext& ctx, IR::Inst& inst, ScalarF32 p) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user