From 08d751d88278542dcbd03e5f9cbf76588475e49d Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Oct 2018 01:58:30 -0300
Subject: [PATCH 1/6] gl_shader_decompiler: Setup base for half float unpacking
 and setting

---
 src/video_core/engines/shader_bytecode.h      | 20 +++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 78 +++++++++++++++++++
 2 files changed, 98 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 9a59b65b3..d6d46d277 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -335,6 +335,26 @@ enum class IsberdMode : u64 {
 
 enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 };
 
+enum class HalfType : u64 {
+    H0_H1 = 0,
+    F32 = 1,
+    H0_H0 = 2,
+    H1_H1 = 3,
+};
+
+enum class HalfMerge : u64 {
+    H0_H1 = 0,
+    F32 = 1,
+    Mrg_H0 = 2,
+    Mrg_H1 = 3,
+};
+
+enum class HalfPrecision : u64 {
+    None = 0,
+    FTZ = 1,
+    FMZ = 2,
+};
+
 enum class IpaInterpMode : u64 {
     Linear = 0,
     Perspective = 1,
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8dfb49507..c6ae8c3b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -375,6 +375,49 @@ public:
         }
     }
 
+    /**
+     * Writes code that does a register assignment to a half float value operation.
+     * @param reg The destination register to use.
+     * @param elem The element to use for the operation.
+     * @param value The code representing the value to assign. Type has to be half float.
+     * @param type Half float kind of assignment.
+     * @param dest_num_components Number of components in the destionation.
+     * @param value_num_components Number of components in the value.
+     * @param is_saturated Optional, when True, saturates the provided value.
+     * @param dest_elem Optional, the destination element to use for the operation.
+     */
+    void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value,
+                                Tegra::Shader::HalfMerge merge, u64 dest_num_components,
+                                u64 value_num_components, bool is_saturated = false,
+                                u64 dest_elem = 0) {
+        ASSERT_MSG(!is_saturated, "Unimplemented");
+
+        const std::string result = [&]() {
+            switch (merge) {
+            case Tegra::Shader::HalfMerge::H0_H1:
+                return "uintBitsToFloat(packHalf2x16(" + value + "))";
+            case Tegra::Shader::HalfMerge::F32:
+                // Half float instructions take the first component when doing a float cast.
+                return "float(" + value + ".x)";
+            case Tegra::Shader::HalfMerge::Mrg_H0:
+                // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the
+                // pack. I couldn't test this on hardware but it shouldn't really matter since most
+                // of the time when a Mrg_* flag is used both components will be mirrored. That
+                // being said, it deserves a test.
+                return "((" + GetRegisterAsInteger(reg, 0, false) +
+                       " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))";
+            case Tegra::Shader::HalfMerge::Mrg_H1:
+                return "((" + GetRegisterAsInteger(reg, 0, false) +
+                       " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))";
+            default:
+                UNREACHABLE();
+                return std::string("0");
+            }
+        }();
+
+        SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem);
+    }
+
     /**
      * Writes code that does a register assignment to input attribute operation. Input attributes
      * are stored as floats, so this may require conversion.
@@ -1012,6 +1055,41 @@ private:
         return result;
     }
 
+    /*
+     * Transforms the input string GLSL operand into an unpacked half float pair.
+     * @note This function returns a float type pair instead of a half float pair. This is because
+     * real half floats are not standarized in GLSL but unpackHalf2x16 (which returns a vec2) is.
+     * @param operand Input operand. It has to be an unsigned integer.
+     * @param type How to unpack the unsigned integer to a half float pair.
+     * @param abs Get the absolute value of unpacked half floats.
+     * @param neg Get the negative value of unpacked half floats.
+     * @returns String corresponding to a half float pair.
+     */
+    static std::string GetHalfFloat(const std::string& operand,
+                                    Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1,
+                                    bool abs = false, bool neg = false) {
+        // "vec2" calls emitted in this function are intended to alias components.
+        const std::string value = [&]() {
+            switch (type) {
+            case Tegra::Shader::HalfType::H0_H1:
+                return "unpackHalf2x16(" + operand + ')';
+            case Tegra::Shader::HalfType::F32:
+                return "vec2(uintBitsToFloat(" + operand + "))";
+            case Tegra::Shader::HalfType::H0_H0:
+            case Tegra::Shader::HalfType::H1_H1: {
+                const bool high = type == Tegra::Shader::HalfType::H1_H1;
+                const char unpack_index = "xy"[high ? 1 : 0];
+                return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')';
+            }
+            default:
+                UNREACHABLE();
+                return std::string("vec2(0)");
+            }
+        }();
+
+        return GetOperandAbsNeg(value, abs, neg);
+    }
+
     /*
      * Returns whether the instruction at the specified offset is a 'sched' instruction.
      * Sched instructions always appear before a sequence of 3 instructions.

From d46e2a6e7ac22661d6debe090c8c6b25d565613a Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Oct 2018 02:04:31 -0300
Subject: [PATCH 2/6] gl_shader_decompiler: Implement non-immediate HADD2 and
 HMUL2 instructions

---
 src/video_core/engines/shader_bytecode.h      | 25 ++++++++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 50 +++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d6d46d277..f84b9883c 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -573,6 +573,22 @@ union Instruction {
         BitField<49, 1, u64> negate_a;
     } alu_integer;
 
+    union {
+        BitField<39, 1, u64> ftz;
+        BitField<32, 1, u64> saturate;
+        BitField<49, 2, HalfMerge> merge;
+
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_a;
+        BitField<47, 2, HalfType> type_a;
+
+        BitField<31, 1, u64> negate_b;
+        BitField<30, 1, u64> abs_b;
+        BitField<47, 2, HalfType> type_b;
+
+        BitField<35, 2, HalfType> type_c;
+    } alu_half;
+
     union {
         BitField<40, 1, u64> invert;
     } popc;
@@ -1165,6 +1181,10 @@ public:
         LEA_RZ,
         LEA_IMM,
         LEA_HI,
+        HADD2_C,
+        HADD2_R,
+        HMUL2_C,
+        HMUL2_R,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -1238,6 +1258,7 @@ public:
         ArithmeticImmediate,
         ArithmeticInteger,
         ArithmeticIntegerImmediate,
+        ArithmeticHalf,
         Bfe,
         Shift,
         Ffma,
@@ -1409,6 +1430,10 @@ private:
             INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
             INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
             INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
+            INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
+            INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
+            INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
+            INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index c6ae8c3b4..a1a0babe8 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1827,6 +1827,56 @@ private:
 
             break;
         }
+        case OpCode::Type::ArithmeticHalf: {
+            if (opcode->GetId() == OpCode::Id::HADD2_C || opcode->GetId() == OpCode::Id::HADD2_R) {
+                ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented");
+            }
+            const bool negate_a =
+                opcode->GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0;
+            const bool negate_b =
+                opcode->GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0;
+
+            const std::string op_a =
+                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a,
+                             instr.alu_half.abs_a != 0, negate_a);
+
+            std::string op_b;
+            switch (opcode->GetId()) {
+            case OpCode::Id::HADD2_C:
+            case OpCode::Id::HMUL2_C:
+                op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                       GLSLRegister::Type::UnsignedInteger);
+                break;
+            case OpCode::Id::HADD2_R:
+            case OpCode::Id::HMUL2_R:
+                op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false);
+                break;
+            default:
+                UNREACHABLE();
+                op_b = "0";
+                break;
+            }
+            op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b);
+
+            const std::string result = [&]() {
+                switch (opcode->GetId()) {
+                case OpCode::Id::HADD2_C:
+                case OpCode::Id::HADD2_R:
+                    return '(' + op_a + " + " + op_b + ')';
+                case OpCode::Id::HMUL2_C:
+                case OpCode::Id::HMUL2_R:
+                    return '(' + op_a + " * " + op_b + ')';
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", opcode->GetName());
+                    UNREACHABLE();
+                    return std::string("0");
+                }
+            }();
+
+            regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1,
+                                        instr.alu_half.saturate != 0);
+            break;
+        }
         case OpCode::Type::Ffma: {
             const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
             std::string op_b = instr.ffma.negate_b ? "-" : "";

From d93cdc27505f86633d3a947fb48a6bcb92d790d0 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Oct 2018 02:07:16 -0300
Subject: [PATCH 3/6] gl_shader_decompiler: Implement HADD2_IMM and HMUL2_IMM

---
 src/video_core/engines/shader_bytecode.h      | 30 +++++++++++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 43 +++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index f84b9883c..3fbdd20b8 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -589,6 +589,31 @@ union Instruction {
         BitField<35, 2, HalfType> type_c;
     } alu_half;
 
+    union {
+        BitField<39, 2, HalfPrecision> precision;
+        BitField<39, 1, u64> ftz;
+        BitField<52, 1, u64> saturate;
+        BitField<49, 2, HalfMerge> merge;
+
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_a;
+        BitField<47, 2, HalfType> type_a;
+    } alu_half_imm;
+
+    union {
+        BitField<29, 1, u64> first_negate;
+        BitField<20, 9, u64> first;
+
+        BitField<56, 1, u64> second_negate;
+        BitField<30, 9, u64> second;
+
+        u32 PackImmediates() const {
+            // Immediates are half floats shifted.
+            constexpr u32 imm_shift = 6;
+            return static_cast<u32>((first << imm_shift) | (second << (16 + imm_shift)));
+        }
+    } half_imm;
+
     union {
         BitField<40, 1, u64> invert;
     } popc;
@@ -1183,8 +1208,10 @@ public:
         LEA_HI,
         HADD2_C,
         HADD2_R,
+        HADD2_IMM,
         HMUL2_C,
         HMUL2_R,
+        HMUL2_IMM,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -1259,6 +1286,7 @@ public:
         ArithmeticInteger,
         ArithmeticIntegerImmediate,
         ArithmeticHalf,
+        ArithmeticHalfImmediate,
         Bfe,
         Shift,
         Ffma,
@@ -1432,8 +1460,10 @@ private:
             INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
             INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"),
             INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"),
+            INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"),
             INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
             INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
+            INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index a1a0babe8..ab30aafc3 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -920,6 +920,19 @@ private:
         return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32());
     }
 
+    /// Generates code representing a vec2 pair unpacked from a half float immediate
+    static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) {
+        const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates()));
+        if (!negate) {
+            return immediate;
+        }
+        const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : "";
+        const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : "";
+        const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)";
+
+        return '(' + immediate + " * " + negate_vec + ')';
+    }
+
     /// Generates code representing a texture sampler.
     std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array,
                            bool is_shadow) {
@@ -1877,6 +1890,36 @@ private:
                                         instr.alu_half.saturate != 0);
             break;
         }
+        case OpCode::Type::ArithmeticHalfImmediate: {
+            if (opcode->GetId() == OpCode::Id::HADD2_IMM) {
+                ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented");
+            } else {
+                ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None,
+                           "Unimplemented");
+            }
+
+            const std::string op_a = GetHalfFloat(
+                regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a,
+                instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0);
+
+            const std::string op_b = UnpackHalfImmediate(instr, true);
+
+            const std::string result = [&]() {
+                switch (opcode->GetId()) {
+                case OpCode::Id::HADD2_IMM:
+                    return op_a + " + " + op_b;
+                case OpCode::Id::HMUL2_IMM:
+                    return op_a + " * " + op_b;
+                default:
+                    UNREACHABLE();
+                    return std::string("0");
+                }
+            }();
+
+            regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1,
+                                        instr.alu_half_imm.saturate != 0);
+            break;
+        }
         case OpCode::Type::Ffma: {
             const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
             std::string op_b = instr.ffma.negate_b ? "-" : "";

From 3d65aa4caf88a440eeaf2082b1f5ca3e2c41317c Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Oct 2018 02:08:31 -0300
Subject: [PATCH 4/6] gl_shader_decompiler: Implement HFMA2 instructions

---
 src/video_core/engines/shader_bytecode.h      | 32 +++++++++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 53 +++++++++++++++++++
 2 files changed, 85 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 3fbdd20b8..23bfd8988 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -614,6 +614,29 @@ union Instruction {
         }
     } half_imm;
 
+    union {
+        union {
+            BitField<37, 2, HalfPrecision> precision;
+            BitField<32, 1, u64> saturate;
+
+            BitField<30, 1, u64> negate_c;
+            BitField<35, 2, HalfType> type_c;
+        } rr;
+
+        BitField<57, 2, HalfPrecision> precision;
+        BitField<52, 1, u64> saturate;
+
+        BitField<49, 2, HalfMerge> merge;
+
+        BitField<47, 2, HalfType> type_a;
+
+        BitField<56, 1, u64> negate_b;
+        BitField<28, 2, HalfType> type_b;
+
+        BitField<51, 1, u64> negate_c;
+        BitField<53, 2, HalfType> type_reg39;
+    } hfma2;
+
     union {
         BitField<40, 1, u64> invert;
     } popc;
@@ -1212,6 +1235,10 @@ public:
         HMUL2_C,
         HMUL2_R,
         HMUL2_IMM,
+        HFMA2_CR,
+        HFMA2_RC,
+        HFMA2_RR,
+        HFMA2_IMM_R,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -1290,6 +1317,7 @@ public:
         Bfe,
         Shift,
         Ffma,
+        Hfma2,
         Flow,
         Synch,
         Memory,
@@ -1464,6 +1492,10 @@ private:
             INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"),
             INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"),
             INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"),
+            INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"),
+            INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
+            INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
+            INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ab30aafc3..ca2030e97 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1964,6 +1964,59 @@ private:
                                     instr.alu.saturate_d);
             break;
         }
+        case OpCode::Type::Hfma2: {
+            if (opcode->GetId() == OpCode::Id::HFMA2_RR) {
+                ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None,
+                           "Unimplemented");
+            } else {
+                ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None,
+                           "Unimplemented");
+            }
+            const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR
+                                      ? instr.hfma2.rr.saturate != 0
+                                      : instr.hfma2.saturate != 0;
+
+            const std::string op_a =
+                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a);
+            std::string op_b, op_c;
+
+            switch (opcode->GetId()) {
+            case OpCode::Id::HFMA2_CR:
+                op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                                    GLSLRegister::Type::UnsignedInteger),
+                                    instr.hfma2.type_b, false, instr.hfma2.negate_b);
+                op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
+                                    instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
+                break;
+            case OpCode::Id::HFMA2_RC:
+                op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
+                                    instr.hfma2.type_reg39, false, instr.hfma2.negate_b);
+                op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset,
+                                                    GLSLRegister::Type::UnsignedInteger),
+                                    instr.hfma2.type_b, false, instr.hfma2.negate_c);
+                break;
+            case OpCode::Id::HFMA2_RR:
+                op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+                                    instr.hfma2.type_b, false, instr.hfma2.negate_b);
+                op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
+                                    instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c);
+                break;
+            case OpCode::Id::HFMA2_IMM_R:
+                op_b = UnpackHalfImmediate(instr, true);
+                op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false),
+                                    instr.hfma2.type_reg39, false, instr.hfma2.negate_c);
+                break;
+            default:
+                UNREACHABLE();
+                op_c = op_b = "vec2(0)";
+                break;
+            }
+
+            const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')';
+
+            regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate);
+            break;
+        }
         case OpCode::Type::Conversion: {
             switch (opcode->GetId()) {
             case OpCode::Id::I2I_R: {

From 4fc8ad67bfc99d1c80c95c0df54360a55a6be011 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Oct 2018 02:09:40 -0300
Subject: [PATCH 5/6] gl_shader_decompiler: Implement HSETP2_R

---
 src/video_core/engines/shader_bytecode.h      | 20 +++++++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 45 +++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 23bfd8988..a6e764ea4 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -800,6 +800,23 @@ union Instruction {
         BitField<45, 4, PredOperation> op; // op with pred39
     } csetp;
 
+    union {
+        BitField<35, 4, PredCondition> cond;
+        BitField<49, 1, u64> h_and;
+        BitField<6, 1, u64> ftz;
+        BitField<45, 2, PredOperation> op;
+        BitField<3, 3, u64> pred3;
+        BitField<0, 3, u64> pred0;
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_a;
+        BitField<47, 2, HalfType> type_a;
+        BitField<31, 1, u64> negate_b;
+        BitField<30, 1, u64> abs_b;
+        BitField<28, 2, HalfType> type_b;
+        BitField<42, 1, u64> neg_pred;
+        BitField<39, 3, u64> pred39;
+    } hsetp2;
+
     union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
@@ -1239,6 +1256,7 @@ public:
         HFMA2_RC,
         HFMA2_RR,
         HFMA2_IMM_R,
+        HSETP2_R,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -1325,6 +1343,7 @@ public:
         FloatSetPredicate,
         IntegerSet,
         IntegerSetPredicate,
+        HalfSetPredicate,
         PredicateSetPredicate,
         PredicateSetRegister,
         Conversion,
@@ -1496,6 +1515,7 @@ private:
             INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"),
             INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
             INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
+            INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index ca2030e97..06f85fad2 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2791,6 +2791,51 @@ private:
             }
             break;
         }
+        case OpCode::Type::HalfSetPredicate: {
+            ASSERT_MSG(instr.hsetp2.ftz == 0, "Unimplemented");
+
+            const std::string op_a =
+                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a,
+                             instr.hsetp2.abs_a, instr.hsetp2.negate_a);
+
+            const std::string op_b = [&]() {
+                switch (opcode->GetId()) {
+                case OpCode::Id::HSETP2_R:
+                    return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+                                        instr.hsetp2.type_b, instr.hsetp2.abs_a,
+                                        instr.hsetp2.negate_b);
+                default:
+                    UNREACHABLE();
+                    return std::string("vec2(0)");
+                }
+            }();
+
+            // We can't use the constant predicate as destination.
+            ASSERT(instr.hsetp2.pred3 != static_cast<u64>(Pred::UnusedIndex));
+
+            const std::string second_pred =
+                GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0);
+
+            const std::string combiner = GetPredicateCombiner(instr.hsetp2.op);
+
+            const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||";
+            const std::string predicate =
+                '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' +
+                component_combiner + ' ' +
+                GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')';
+
+            // Set the primary predicate to the result of Predicate OP SecondPredicate
+            SetPredicate(instr.hsetp2.pred3,
+                         '(' + predicate + ") " + combiner + " (" + second_pred + ')');
+
+            if (instr.hsetp2.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
+                // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
+                // if enabled
+                SetPredicate(instr.hsetp2.pred0,
+                             "!(" + predicate + ") " + combiner + " (" + second_pred + ')');
+            }
+            break;
+        }
         case OpCode::Type::PredicateSetRegister: {
             const std::string op_a =
                 GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);

From 6312eec5ef650ca5363ef4cfa08c2d38ffb6a0fe Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Oct 2018 02:10:15 -0300
Subject: [PATCH 6/6] gl_shader_decompiler: Implement HSET2_R

---
 src/video_core/engines/shader_bytecode.h      | 18 ++++++++
 .../renderer_opengl/gl_shader_decompiler.cpp  | 44 +++++++++++++++++++
 2 files changed, 62 insertions(+)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index a6e764ea4..39ae065de 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -831,6 +831,21 @@ union Instruction {
         BitField<56, 1, u64> neg_imm;
     } fset;
 
+    union {
+        BitField<49, 1, u64> bf;
+        BitField<35, 3, PredCondition> cond;
+        BitField<50, 1, u64> ftz;
+        BitField<45, 2, PredOperation> op;
+        BitField<43, 1, u64> negate_a;
+        BitField<44, 1, u64> abs_a;
+        BitField<47, 2, HalfType> type_a;
+        BitField<31, 1, u64> negate_b;
+        BitField<30, 1, u64> abs_b;
+        BitField<28, 2, HalfType> type_b;
+        BitField<42, 1, u64> neg_pred;
+        BitField<39, 3, u64> pred39;
+    } hset2;
+
     union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
@@ -1257,6 +1272,7 @@ public:
         HFMA2_RR,
         HFMA2_IMM_R,
         HSETP2_R,
+        HSET2_R,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -1343,6 +1359,7 @@ public:
         FloatSetPredicate,
         IntegerSet,
         IntegerSetPredicate,
+        HalfSet,
         HalfSetPredicate,
         PredicateSetPredicate,
         PredicateSetRegister,
@@ -1516,6 +1533,7 @@ private:
             INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"),
             INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"),
             INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"),
+            INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 06f85fad2..23349b1a1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2996,6 +2996,50 @@ private:
             }
             break;
         }
+        case OpCode::Type::HalfSet: {
+            ASSERT_MSG(instr.hset2.ftz == 0, "Unimplemented");
+
+            const std::string op_a =
+                GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a,
+                             instr.hset2.abs_a != 0, instr.hset2.negate_a != 0);
+
+            const std::string op_b = [&]() {
+                switch (opcode->GetId()) {
+                case OpCode::Id::HSET2_R:
+                    return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false),
+                                        instr.hset2.type_b, instr.hset2.abs_b != 0,
+                                        instr.hset2.negate_b != 0);
+                default:
+                    UNREACHABLE();
+                    return std::string("vec2(0)");
+                }
+            }();
+
+            const std::string second_pred =
+                GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0);
+
+            const std::string combiner = GetPredicateCombiner(instr.hset2.op);
+
+            // HSET2 operates on each half float in the pack.
+            std::string result;
+            for (int i = 0; i < 2; ++i) {
+                const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000";
+                const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000";
+                const std::string value = instr.hset2.bf == 1 ? float_value : integer_value;
+
+                const std::string comp = std::string(".") + "xy"[i];
+                const std::string predicate =
+                    "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) +
+                    ") " + combiner + " (" + second_pred + "))";
+
+                result += '(' + predicate + " ? " + value + " : 0)";
+                if (i == 0) {
+                    result += " | ";
+                }
+            }
+            regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1);
+            break;
+        }
         case OpCode::Type::Xmad: {
             ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented");
             ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented");