shader: Fix floating point comparison for FP16
This commit is contained in:
		@@ -72,7 +72,7 @@ bool IsCompareOpOrdered(FPCompareOp op) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2,
 | 
			
		||||
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2,
 | 
			
		||||
                            FPCompareOp compare_op, IR::FpControl control) {
 | 
			
		||||
    const bool ordered{IsCompareOpOrdered(compare_op)};
 | 
			
		||||
    switch (compare_op) {
 | 
			
		||||
 
 | 
			
		||||
@@ -18,7 +18,7 @@ namespace Shader::Maxwell {
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
 | 
			
		||||
 | 
			
		||||
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1,
 | 
			
		||||
                                          const IR::F32& operand_2, FPCompareOp compare_op,
 | 
			
		||||
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1,
 | 
			
		||||
                                          const IR::F16F32F64& operand_2, FPCompareOp compare_op,
 | 
			
		||||
                                          IR::FpControl control = {});
 | 
			
		||||
} // namespace Shader::Maxwell
 | 
			
		||||
 
 | 
			
		||||
@@ -22,8 +22,8 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
 | 
			
		||||
 | 
			
		||||
    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)};
 | 
			
		||||
    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
 | 
			
		||||
    // TODO: Implement FP16 FloatingPointCompare
 | 
			
		||||
    //if (lhs_a.Type() != lhs_b.Type()) {
 | 
			
		||||
 | 
			
		||||
    if (lhs_a.Type() != lhs_b.Type()) {
 | 
			
		||||
        if (lhs_a.Type() == IR::Type::F16) {
 | 
			
		||||
            lhs_a = v.ir.FPConvert(32, lhs_a);
 | 
			
		||||
            rhs_a = v.ir.FPConvert(32, rhs_a);
 | 
			
		||||
@@ -32,7 +32,7 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f
 | 
			
		||||
            lhs_b = v.ir.FPConvert(32, lhs_b);
 | 
			
		||||
            rhs_b = v.ir.FPConvert(32, rhs_b);
 | 
			
		||||
        }
 | 
			
		||||
    //}
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
 | 
			
		||||
    rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0);
 | 
			
		||||
@@ -94,22 +94,22 @@ void TranslatorVisitor::HSET2_cbuf(u64 insn) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void TranslatorVisitor::HSET2_imm(u64 insn) {
 | 
			
		||||
  union {
 | 
			
		||||
      u64 insn;
 | 
			
		||||
      BitField<53, 1, u64> bf;
 | 
			
		||||
      BitField<54, 1, u64> ftz;
 | 
			
		||||
      BitField<49, 4, FPCompareOp> compare_op;
 | 
			
		||||
      BitField<56, 1, u64> neg_high;
 | 
			
		||||
      BitField<30, 9, u64> high;
 | 
			
		||||
      BitField<29, 1, u64> neg_low;
 | 
			
		||||
      BitField<20, 9, u64> low;
 | 
			
		||||
  } const hset2{insn};
 | 
			
		||||
    union {
 | 
			
		||||
        u64 insn;
 | 
			
		||||
        BitField<53, 1, u64> bf;
 | 
			
		||||
        BitField<54, 1, u64> ftz;
 | 
			
		||||
        BitField<49, 4, FPCompareOp> compare_op;
 | 
			
		||||
        BitField<56, 1, u64> neg_high;
 | 
			
		||||
        BitField<30, 9, u64> high;
 | 
			
		||||
        BitField<29, 1, u64> neg_low;
 | 
			
		||||
        BitField<20, 9, u64> low;
 | 
			
		||||
    } const hset2{insn};
 | 
			
		||||
 | 
			
		||||
  const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) |
 | 
			
		||||
                static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)};
 | 
			
		||||
    const u32 imm{static_cast<u32>(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) |
 | 
			
		||||
                  static_cast<u32>(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)};
 | 
			
		||||
 | 
			
		||||
  HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false,
 | 
			
		||||
        hset2.compare_op, Swizzle::H1_H0);
 | 
			
		||||
    HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op,
 | 
			
		||||
          Swizzle::H1_H0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
} // namespace Shader::Maxwell
 | 
			
		||||
 
 | 
			
		||||
@@ -24,17 +24,17 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo
 | 
			
		||||
 | 
			
		||||
    auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)};
 | 
			
		||||
    auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
 | 
			
		||||
    // TODO: Implement FP16 FloatingPointCompare
 | 
			
		||||
    // if (lhs_a.Type() != lhs_b.Type()) {
 | 
			
		||||
    if (lhs_a.Type() == IR::Type::F16) {
 | 
			
		||||
        lhs_a = v.ir.FPConvert(32, lhs_a);
 | 
			
		||||
        rhs_a = v.ir.FPConvert(32, rhs_a);
 | 
			
		||||
 | 
			
		||||
    if (lhs_a.Type() != lhs_b.Type()) {
 | 
			
		||||
        if (lhs_a.Type() == IR::Type::F16) {
 | 
			
		||||
            lhs_a = v.ir.FPConvert(32, lhs_a);
 | 
			
		||||
            rhs_a = v.ir.FPConvert(32, rhs_a);
 | 
			
		||||
        }
 | 
			
		||||
        if (lhs_b.Type() == IR::Type::F16) {
 | 
			
		||||
            lhs_b = v.ir.FPConvert(32, lhs_b);
 | 
			
		||||
            rhs_b = v.ir.FPConvert(32, rhs_b);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    if (lhs_b.Type() == IR::Type::F16) {
 | 
			
		||||
        lhs_b = v.ir.FPConvert(32, lhs_b);
 | 
			
		||||
        rhs_b = v.ir.FPConvert(32, rhs_b);
 | 
			
		||||
    }
 | 
			
		||||
    //}
 | 
			
		||||
 | 
			
		||||
    lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
 | 
			
		||||
    rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0);
 | 
			
		||||
 
 | 
			
		||||
@@ -50,6 +50,30 @@ IR::Opcode Replace(IR::Opcode op) {
 | 
			
		||||
        return IR::Opcode::CompositeInsertF32x3;
 | 
			
		||||
    case IR::Opcode::CompositeInsertF16x4:
 | 
			
		||||
        return IR::Opcode::CompositeInsertF32x4;
 | 
			
		||||
    case IR::Opcode::FPOrdEqual16:
 | 
			
		||||
        return IR::Opcode::FPOrdEqual32;
 | 
			
		||||
    case IR::Opcode::FPUnordEqual16:
 | 
			
		||||
        return IR::Opcode::FPUnordEqual32;
 | 
			
		||||
    case IR::Opcode::FPOrdNotEqual16:
 | 
			
		||||
        return IR::Opcode::FPOrdNotEqual32;
 | 
			
		||||
    case IR::Opcode::FPUnordNotEqual16:
 | 
			
		||||
        return IR::Opcode::FPUnordNotEqual32;
 | 
			
		||||
    case IR::Opcode::FPOrdLessThan16:
 | 
			
		||||
        return IR::Opcode::FPOrdLessThan32;
 | 
			
		||||
    case IR::Opcode::FPUnordLessThan16:
 | 
			
		||||
        return IR::Opcode::FPUnordLessThan32;
 | 
			
		||||
    case IR::Opcode::FPOrdGreaterThan16:
 | 
			
		||||
        return IR::Opcode::FPOrdGreaterThan32;
 | 
			
		||||
    case IR::Opcode::FPUnordGreaterThan16:
 | 
			
		||||
        return IR::Opcode::FPUnordGreaterThan32;
 | 
			
		||||
    case IR::Opcode::FPOrdLessThanEqual16:
 | 
			
		||||
        return IR::Opcode::FPOrdLessThanEqual32;
 | 
			
		||||
    case IR::Opcode::FPUnordLessThanEqual16:
 | 
			
		||||
        return IR::Opcode::FPUnordLessThanEqual32;
 | 
			
		||||
    case IR::Opcode::FPOrdGreaterThanEqual16:
 | 
			
		||||
        return IR::Opcode::FPOrdGreaterThanEqual32;
 | 
			
		||||
    case IR::Opcode::FPUnordGreaterThanEqual16:
 | 
			
		||||
        return IR::Opcode::FPUnordGreaterThanEqual32;
 | 
			
		||||
    case IR::Opcode::ConvertS16F16:
 | 
			
		||||
        return IR::Opcode::ConvertS16F32;
 | 
			
		||||
    case IR::Opcode::ConvertS32F16:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user