From 08d751d88278542dcbd03e5f9cbf76588475e49d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 01:58:30 -0300 Subject: [PATCH 1/6] gl_shader_decompiler: Setup base for half float unpacking and setting --- src/video_core/engines/shader_bytecode.h | 20 +++++ .../renderer_opengl/gl_shader_decompiler.cpp | 78 +++++++++++++++++++ 2 files changed, 98 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 9a59b65b3..d6d46d277 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -335,6 +335,26 @@ enum class IsberdMode : u64 { enum class IsberdShift : u64 { None = 0, U16 = 1, B32 = 2 }; +enum class HalfType : u64 { + H0_H1 = 0, + F32 = 1, + H0_H0 = 2, + H1_H1 = 3, +}; + +enum class HalfMerge : u64 { + H0_H1 = 0, + F32 = 1, + Mrg_H0 = 2, + Mrg_H1 = 3, +}; + +enum class HalfPrecision : u64 { + None = 0, + FTZ = 1, + FMZ = 2, +}; + enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 8dfb49507..c6ae8c3b4 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -375,6 +375,49 @@ public: } } + /** + * Writes code that does a register assignment to a half float value operation. + * @param reg The destination register to use. + * @param elem The element to use for the operation. + * @param value The code representing the value to assign. Type has to be half float. + * @param type Half float kind of assignment. + * @param dest_num_components Number of components in the destionation. + * @param value_num_components Number of components in the value. + * @param is_saturated Optional, when True, saturates the provided value. + * @param dest_elem Optional, the destination element to use for the operation. + */ + void SetRegisterToHalfFloat(const Register& reg, u64 elem, const std::string& value, + Tegra::Shader::HalfMerge merge, u64 dest_num_components, + u64 value_num_components, bool is_saturated = false, + u64 dest_elem = 0) { + ASSERT_MSG(!is_saturated, "Unimplemented"); + + const std::string result = [&]() { + switch (merge) { + case Tegra::Shader::HalfMerge::H0_H1: + return "uintBitsToFloat(packHalf2x16(" + value + "))"; + case Tegra::Shader::HalfMerge::F32: + // Half float instructions take the first component when doing a float cast. + return "float(" + value + ".x)"; + case Tegra::Shader::HalfMerge::Mrg_H0: + // TODO(Rodrigo): I guess Mrg_H0 and Mrg_H1 take their respective component from the + // pack. I couldn't test this on hardware but it shouldn't really matter since most + // of the time when a Mrg_* flag is used both components will be mirrored. That + // being said, it deserves a test. + return "((" + GetRegisterAsInteger(reg, 0, false) + + " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))"; + case Tegra::Shader::HalfMerge::Mrg_H1: + return "((" + GetRegisterAsInteger(reg, 0, false) + + " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))"; + default: + UNREACHABLE(); + return std::string("0"); + } + }(); + + SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem); + } + /** * Writes code that does a register assignment to input attribute operation. Input attributes * are stored as floats, so this may require conversion. @@ -1012,6 +1055,41 @@ private: return result; } + /* + * Transforms the input string GLSL operand into an unpacked half float pair. + * @note This function returns a float type pair instead of a half float pair. This is because + * real half floats are not standarized in GLSL but unpackHalf2x16 (which returns a vec2) is. + * @param operand Input operand. It has to be an unsigned integer. + * @param type How to unpack the unsigned integer to a half float pair. + * @param abs Get the absolute value of unpacked half floats. + * @param neg Get the negative value of unpacked half floats. + * @returns String corresponding to a half float pair. + */ + static std::string GetHalfFloat(const std::string& operand, + Tegra::Shader::HalfType type = Tegra::Shader::HalfType::H0_H1, + bool abs = false, bool neg = false) { + // "vec2" calls emitted in this function are intended to alias components. + const std::string value = [&]() { + switch (type) { + case Tegra::Shader::HalfType::H0_H1: + return "unpackHalf2x16(" + operand + ')'; + case Tegra::Shader::HalfType::F32: + return "vec2(uintBitsToFloat(" + operand + "))"; + case Tegra::Shader::HalfType::H0_H0: + case Tegra::Shader::HalfType::H1_H1: { + const bool high = type == Tegra::Shader::HalfType::H1_H1; + const char unpack_index = "xy"[high ? 1 : 0]; + return "vec2(unpackHalf2x16(" + operand + ")." + unpack_index + ')'; + } + default: + UNREACHABLE(); + return std::string("vec2(0)"); + } + }(); + + return GetOperandAbsNeg(value, abs, neg); + } + /* * Returns whether the instruction at the specified offset is a 'sched' instruction. * Sched instructions always appear before a sequence of 3 instructions. From d46e2a6e7ac22661d6debe090c8c6b25d565613a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 02:04:31 -0300 Subject: [PATCH 2/6] gl_shader_decompiler: Implement non-immediate HADD2 and HMUL2 instructions --- src/video_core/engines/shader_bytecode.h | 25 ++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 50 +++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index d6d46d277..f84b9883c 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -573,6 +573,22 @@ union Instruction { BitField<49, 1, u64> negate_a; } alu_integer; + union { + BitField<39, 1, u64> ftz; + BitField<32, 1, u64> saturate; + BitField<49, 2, HalfMerge> merge; + + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, HalfType> type_a; + + BitField<31, 1, u64> negate_b; + BitField<30, 1, u64> abs_b; + BitField<47, 2, HalfType> type_b; + + BitField<35, 2, HalfType> type_c; + } alu_half; + union { BitField<40, 1, u64> invert; } popc; @@ -1165,6 +1181,10 @@ public: LEA_RZ, LEA_IMM, LEA_HI, + HADD2_C, + HADD2_R, + HMUL2_C, + HMUL2_R, POPC_C, POPC_R, POPC_IMM, @@ -1238,6 +1258,7 @@ public: ArithmeticImmediate, ArithmeticInteger, ArithmeticIntegerImmediate, + ArithmeticHalf, Bfe, Shift, Ffma, @@ -1409,6 +1430,10 @@ private: INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"), INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"), INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), + INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), + INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), + INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), + INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c6ae8c3b4..a1a0babe8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1827,6 +1827,56 @@ private: break; } + case OpCode::Type::ArithmeticHalf: { + if (opcode->GetId() == OpCode::Id::HADD2_C || opcode->GetId() == OpCode::Id::HADD2_R) { + ASSERT_MSG(instr.alu_half.ftz == 0, "Unimplemented"); + } + const bool negate_a = + opcode->GetId() != OpCode::Id::HMUL2_R && instr.alu_half.negate_a != 0; + const bool negate_b = + opcode->GetId() != OpCode::Id::HMUL2_C && instr.alu_half.negate_b != 0; + + const std::string op_a = + GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half.type_a, + instr.alu_half.abs_a != 0, negate_a); + + std::string op_b; + switch (opcode->GetId()) { + case OpCode::Id::HADD2_C: + case OpCode::Id::HMUL2_C: + op_b = regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::UnsignedInteger); + break; + case OpCode::Id::HADD2_R: + case OpCode::Id::HMUL2_R: + op_b = regs.GetRegisterAsInteger(instr.gpr20, 0, false); + break; + default: + UNREACHABLE(); + op_b = "0"; + break; + } + op_b = GetHalfFloat(op_b, instr.alu_half.type_b, instr.alu_half.abs_b != 0, negate_b); + + const std::string result = [&]() { + switch (opcode->GetId()) { + case OpCode::Id::HADD2_C: + case OpCode::Id::HADD2_R: + return '(' + op_a + " + " + op_b + ')'; + case OpCode::Id::HMUL2_C: + case OpCode::Id::HMUL2_R: + return '(' + op_a + " * " + op_b + ')'; + default: + LOG_CRITICAL(HW_GPU, "Unhandled half float instruction: {}", opcode->GetName()); + UNREACHABLE(); + return std::string("0"); + } + }(); + + regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half.merge, 1, 1, + instr.alu_half.saturate != 0); + break; + } case OpCode::Type::Ffma: { const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); std::string op_b = instr.ffma.negate_b ? "-" : ""; From d93cdc27505f86633d3a947fb48a6bcb92d790d0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 02:07:16 -0300 Subject: [PATCH 3/6] gl_shader_decompiler: Implement HADD2_IMM and HMUL2_IMM --- src/video_core/engines/shader_bytecode.h | 30 +++++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 43 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index f84b9883c..3fbdd20b8 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -589,6 +589,31 @@ union Instruction { BitField<35, 2, HalfType> type_c; } alu_half; + union { + BitField<39, 2, HalfPrecision> precision; + BitField<39, 1, u64> ftz; + BitField<52, 1, u64> saturate; + BitField<49, 2, HalfMerge> merge; + + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, HalfType> type_a; + } alu_half_imm; + + union { + BitField<29, 1, u64> first_negate; + BitField<20, 9, u64> first; + + BitField<56, 1, u64> second_negate; + BitField<30, 9, u64> second; + + u32 PackImmediates() const { + // Immediates are half floats shifted. + constexpr u32 imm_shift = 6; + return static_cast((first << imm_shift) | (second << (16 + imm_shift))); + } + } half_imm; + union { BitField<40, 1, u64> invert; } popc; @@ -1183,8 +1208,10 @@ public: LEA_HI, HADD2_C, HADD2_R, + HADD2_IMM, HMUL2_C, HMUL2_R, + HMUL2_IMM, POPC_C, POPC_R, POPC_IMM, @@ -1259,6 +1286,7 @@ public: ArithmeticInteger, ArithmeticIntegerImmediate, ArithmeticHalf, + ArithmeticHalfImmediate, Bfe, Shift, Ffma, @@ -1432,8 +1460,10 @@ private: INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"), INST("0111101-1-------", Id::HADD2_C, Type::ArithmeticHalf, "HADD2_C"), INST("0101110100010---", Id::HADD2_R, Type::ArithmeticHalf, "HADD2_R"), + INST("0111101-0-------", Id::HADD2_IMM, Type::ArithmeticHalfImmediate, "HADD2_IMM"), INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), + INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index a1a0babe8..ab30aafc3 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -920,6 +920,19 @@ private: return fmt::format("uintBitsToFloat({})", instr.alu.GetImm20_32()); } + /// Generates code representing a vec2 pair unpacked from a half float immediate + static std::string UnpackHalfImmediate(const Instruction& instr, bool negate) { + const std::string immediate = GetHalfFloat(std::to_string(instr.half_imm.PackImmediates())); + if (!negate) { + return immediate; + } + const std::string negate_first = instr.half_imm.first_negate != 0 ? "-" : ""; + const std::string negate_second = instr.half_imm.second_negate != 0 ? "-" : ""; + const std::string negate_vec = "vec2(" + negate_first + "1, " + negate_second + "1)"; + + return '(' + immediate + " * " + negate_vec + ')'; + } + /// Generates code representing a texture sampler. std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array, bool is_shadow) { @@ -1877,6 +1890,36 @@ private: instr.alu_half.saturate != 0); break; } + case OpCode::Type::ArithmeticHalfImmediate: { + if (opcode->GetId() == OpCode::Id::HADD2_IMM) { + ASSERT_MSG(instr.alu_half_imm.ftz == 0, "Unimplemented"); + } else { + ASSERT_MSG(instr.alu_half_imm.precision == Tegra::Shader::HalfPrecision::None, + "Unimplemented"); + } + + const std::string op_a = GetHalfFloat( + regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.alu_half_imm.type_a, + instr.alu_half_imm.abs_a != 0, instr.alu_half_imm.negate_a != 0); + + const std::string op_b = UnpackHalfImmediate(instr, true); + + const std::string result = [&]() { + switch (opcode->GetId()) { + case OpCode::Id::HADD2_IMM: + return op_a + " + " + op_b; + case OpCode::Id::HMUL2_IMM: + return op_a + " * " + op_b; + default: + UNREACHABLE(); + return std::string("0"); + } + }(); + + regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.alu_half_imm.merge, 1, 1, + instr.alu_half_imm.saturate != 0); + break; + } case OpCode::Type::Ffma: { const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8); std::string op_b = instr.ffma.negate_b ? "-" : ""; From 3d65aa4caf88a440eeaf2082b1f5ca3e2c41317c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 02:08:31 -0300 Subject: [PATCH 4/6] gl_shader_decompiler: Implement HFMA2 instructions --- src/video_core/engines/shader_bytecode.h | 32 +++++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 53 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 3fbdd20b8..23bfd8988 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -614,6 +614,29 @@ union Instruction { } } half_imm; + union { + union { + BitField<37, 2, HalfPrecision> precision; + BitField<32, 1, u64> saturate; + + BitField<30, 1, u64> negate_c; + BitField<35, 2, HalfType> type_c; + } rr; + + BitField<57, 2, HalfPrecision> precision; + BitField<52, 1, u64> saturate; + + BitField<49, 2, HalfMerge> merge; + + BitField<47, 2, HalfType> type_a; + + BitField<56, 1, u64> negate_b; + BitField<28, 2, HalfType> type_b; + + BitField<51, 1, u64> negate_c; + BitField<53, 2, HalfType> type_reg39; + } hfma2; + union { BitField<40, 1, u64> invert; } popc; @@ -1212,6 +1235,10 @@ public: HMUL2_C, HMUL2_R, HMUL2_IMM, + HFMA2_CR, + HFMA2_RC, + HFMA2_RR, + HFMA2_IMM_R, POPC_C, POPC_R, POPC_IMM, @@ -1290,6 +1317,7 @@ public: Bfe, Shift, Ffma, + Hfma2, Flow, Synch, Memory, @@ -1464,6 +1492,10 @@ private: INST("0111100-1-------", Id::HMUL2_C, Type::ArithmeticHalf, "HMUL2_C"), INST("0101110100001---", Id::HMUL2_R, Type::ArithmeticHalf, "HMUL2_R"), INST("0111100-0-------", Id::HMUL2_IMM, Type::ArithmeticHalfImmediate, "HMUL2_IMM"), + INST("01110---1-------", Id::HFMA2_CR, Type::Hfma2, "HFMA2_CR"), + INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), + INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), + INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ab30aafc3..ca2030e97 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1964,6 +1964,59 @@ private: instr.alu.saturate_d); break; } + case OpCode::Type::Hfma2: { + if (opcode->GetId() == OpCode::Id::HFMA2_RR) { + ASSERT_MSG(instr.hfma2.rr.precision == Tegra::Shader::HalfPrecision::None, + "Unimplemented"); + } else { + ASSERT_MSG(instr.hfma2.precision == Tegra::Shader::HalfPrecision::None, + "Unimplemented"); + } + const bool saturate = opcode->GetId() == OpCode::Id::HFMA2_RR + ? instr.hfma2.rr.saturate != 0 + : instr.hfma2.saturate != 0; + + const std::string op_a = + GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hfma2.type_a); + std::string op_b, op_c; + + switch (opcode->GetId()) { + case OpCode::Id::HFMA2_CR: + op_b = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::UnsignedInteger), + instr.hfma2.type_b, false, instr.hfma2.negate_b); + op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.type_reg39, false, instr.hfma2.negate_c); + break; + case OpCode::Id::HFMA2_RC: + op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.type_reg39, false, instr.hfma2.negate_b); + op_c = GetHalfFloat(regs.GetUniform(instr.cbuf34.index, instr.cbuf34.offset, + GLSLRegister::Type::UnsignedInteger), + instr.hfma2.type_b, false, instr.hfma2.negate_c); + break; + case OpCode::Id::HFMA2_RR: + op_b = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), + instr.hfma2.type_b, false, instr.hfma2.negate_b); + op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.rr.type_c, false, instr.hfma2.rr.negate_c); + break; + case OpCode::Id::HFMA2_IMM_R: + op_b = UnpackHalfImmediate(instr, true); + op_c = GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr39, 0, false), + instr.hfma2.type_reg39, false, instr.hfma2.negate_c); + break; + default: + UNREACHABLE(); + op_c = op_b = "vec2(0)"; + break; + } + + const std::string result = '(' + op_a + " * " + op_b + " + " + op_c + ')'; + + regs.SetRegisterToHalfFloat(instr.gpr0, 0, result, instr.hfma2.merge, 1, 1, saturate); + break; + } case OpCode::Type::Conversion: { switch (opcode->GetId()) { case OpCode::Id::I2I_R: { From 4fc8ad67bfc99d1c80c95c0df54360a55a6be011 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 02:09:40 -0300 Subject: [PATCH 5/6] gl_shader_decompiler: Implement HSETP2_R --- src/video_core/engines/shader_bytecode.h | 20 +++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 45 +++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 23bfd8988..a6e764ea4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -800,6 +800,23 @@ union Instruction { BitField<45, 4, PredOperation> op; // op with pred39 } csetp; + union { + BitField<35, 4, PredCondition> cond; + BitField<49, 1, u64> h_and; + BitField<6, 1, u64> ftz; + BitField<45, 2, PredOperation> op; + BitField<3, 3, u64> pred3; + BitField<0, 3, u64> pred0; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, HalfType> type_a; + BitField<31, 1, u64> negate_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, HalfType> type_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, u64> pred39; + } hsetp2; + union { BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; @@ -1239,6 +1256,7 @@ public: HFMA2_RC, HFMA2_RR, HFMA2_IMM_R, + HSETP2_R, POPC_C, POPC_R, POPC_IMM, @@ -1325,6 +1343,7 @@ public: FloatSetPredicate, IntegerSet, IntegerSetPredicate, + HalfSetPredicate, PredicateSetPredicate, PredicateSetRegister, Conversion, @@ -1496,6 +1515,7 @@ private: INST("01100---1-------", Id::HFMA2_RC, Type::Hfma2, "HFMA2_RC"), INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), + INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ca2030e97..06f85fad2 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2791,6 +2791,51 @@ private: } break; } + case OpCode::Type::HalfSetPredicate: { + ASSERT_MSG(instr.hsetp2.ftz == 0, "Unimplemented"); + + const std::string op_a = + GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hsetp2.type_a, + instr.hsetp2.abs_a, instr.hsetp2.negate_a); + + const std::string op_b = [&]() { + switch (opcode->GetId()) { + case OpCode::Id::HSETP2_R: + return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), + instr.hsetp2.type_b, instr.hsetp2.abs_a, + instr.hsetp2.negate_b); + default: + UNREACHABLE(); + return std::string("vec2(0)"); + } + }(); + + // We can't use the constant predicate as destination. + ASSERT(instr.hsetp2.pred3 != static_cast(Pred::UnusedIndex)); + + const std::string second_pred = + GetPredicateCondition(instr.hsetp2.pred39, instr.hsetp2.neg_pred != 0); + + const std::string combiner = GetPredicateCombiner(instr.hsetp2.op); + + const std::string component_combiner = instr.hsetp2.h_and ? "&&" : "||"; + const std::string predicate = + '(' + GetPredicateComparison(instr.hsetp2.cond, op_a + ".x", op_b + ".x") + ' ' + + component_combiner + ' ' + + GetPredicateComparison(instr.hsetp2.cond, op_a + ".y", op_b + ".y") + ')'; + + // Set the primary predicate to the result of Predicate OP SecondPredicate + SetPredicate(instr.hsetp2.pred3, + '(' + predicate + ") " + combiner + " (" + second_pred + ')'); + + if (instr.hsetp2.pred0 != static_cast(Pred::UnusedIndex)) { + // Set the secondary predicate to the result of !Predicate OP SecondPredicate, + // if enabled + SetPredicate(instr.hsetp2.pred0, + "!(" + predicate + ") " + combiner + " (" + second_pred + ')'); + } + break; + } case OpCode::Type::PredicateSetRegister: { const std::string op_a = GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0); From 6312eec5ef650ca5363ef4cfa08c2d38ffb6a0fe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 15 Oct 2018 02:10:15 -0300 Subject: [PATCH 6/6] gl_shader_decompiler: Implement HSET2_R --- src/video_core/engines/shader_bytecode.h | 18 ++++++++ .../renderer_opengl/gl_shader_decompiler.cpp | 44 +++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index a6e764ea4..39ae065de 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -831,6 +831,21 @@ union Instruction { BitField<56, 1, u64> neg_imm; } fset; + union { + BitField<49, 1, u64> bf; + BitField<35, 3, PredCondition> cond; + BitField<50, 1, u64> ftz; + BitField<45, 2, PredOperation> op; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_a; + BitField<47, 2, HalfType> type_a; + BitField<31, 1, u64> negate_b; + BitField<30, 1, u64> abs_b; + BitField<28, 2, HalfType> type_b; + BitField<42, 1, u64> neg_pred; + BitField<39, 3, u64> pred39; + } hset2; + union { BitField<39, 3, u64> pred39; BitField<42, 1, u64> neg_pred; @@ -1257,6 +1272,7 @@ public: HFMA2_RR, HFMA2_IMM_R, HSETP2_R, + HSET2_R, POPC_C, POPC_R, POPC_IMM, @@ -1343,6 +1359,7 @@ public: FloatSetPredicate, IntegerSet, IntegerSetPredicate, + HalfSet, HalfSetPredicate, PredicateSetPredicate, PredicateSetRegister, @@ -1516,6 +1533,7 @@ private: INST("0101110100000---", Id::HFMA2_RR, Type::Hfma2, "HFMA2_RR"), INST("01110---0-------", Id::HFMA2_IMM_R, Type::Hfma2, "HFMA2_R_IMM"), INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP_R"), + INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"), INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 06f85fad2..23349b1a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2996,6 +2996,50 @@ private: } break; } + case OpCode::Type::HalfSet: { + ASSERT_MSG(instr.hset2.ftz == 0, "Unimplemented"); + + const std::string op_a = + GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr8, 0, false), instr.hset2.type_a, + instr.hset2.abs_a != 0, instr.hset2.negate_a != 0); + + const std::string op_b = [&]() { + switch (opcode->GetId()) { + case OpCode::Id::HSET2_R: + return GetHalfFloat(regs.GetRegisterAsInteger(instr.gpr20, 0, false), + instr.hset2.type_b, instr.hset2.abs_b != 0, + instr.hset2.negate_b != 0); + default: + UNREACHABLE(); + return std::string("vec2(0)"); + } + }(); + + const std::string second_pred = + GetPredicateCondition(instr.hset2.pred39, instr.hset2.neg_pred != 0); + + const std::string combiner = GetPredicateCombiner(instr.hset2.op); + + // HSET2 operates on each half float in the pack. + std::string result; + for (int i = 0; i < 2; ++i) { + const std::string float_value = i == 0 ? "0x00003c00" : "0x3c000000"; + const std::string integer_value = i == 0 ? "0x0000ffff" : "0xffff0000"; + const std::string value = instr.hset2.bf == 1 ? float_value : integer_value; + + const std::string comp = std::string(".") + "xy"[i]; + const std::string predicate = + "((" + GetPredicateComparison(instr.hset2.cond, op_a + comp, op_b + comp) + + ") " + combiner + " (" + second_pred + "))"; + + result += '(' + predicate + " ? " + value + " : 0)"; + if (i == 0) { + result += " | "; + } + } + regs.SetRegisterToInteger(instr.gpr0, false, 0, '(' + result + ')', 1, 1); + break; + } case OpCode::Type::Xmad: { ASSERT_MSG(!instr.xmad.sign_a, "Unimplemented"); ASSERT_MSG(!instr.xmad.sign_b, "Unimplemented");