shader: Initial instruction support
This commit is contained in:
		| @@ -39,18 +39,27 @@ add_executable(shader_recompiler | ||||
|     frontend/maxwell/program.h | ||||
|     frontend/maxwell/termination_code.cpp | ||||
|     frontend/maxwell/termination_code.h | ||||
|     frontend/maxwell/translate/impl/common_encoding.h | ||||
|     frontend/maxwell/translate/impl/floating_point_add.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_multi_function.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_multiply.cpp | ||||
|     frontend/maxwell/translate/impl/impl.cpp | ||||
|     frontend/maxwell/translate/impl/impl.h | ||||
|     frontend/maxwell/translate/impl/integer_add.cpp | ||||
|     frontend/maxwell/translate/impl/integer_scaled_add.cpp | ||||
|     frontend/maxwell/translate/impl/integer_set_predicate.cpp | ||||
|     frontend/maxwell/translate/impl/integer_shift_left.cpp | ||||
|     frontend/maxwell/translate/impl/integer_short_multiply_add.cpp | ||||
|     frontend/maxwell/translate/impl/load_store_attribute.cpp | ||||
|     frontend/maxwell/translate/impl/load_store_memory.cpp | ||||
|     frontend/maxwell/translate/impl/not_implemented.cpp | ||||
|     frontend/maxwell/translate/impl/register_move.cpp | ||||
|     frontend/maxwell/translate/impl/move_register.cpp | ||||
|     frontend/maxwell/translate/impl/move_special_register.cpp | ||||
|     frontend/maxwell/translate/translate.cpp | ||||
|     frontend/maxwell/translate/translate.h | ||||
|     ir_opt/dead_code_elimination_pass.cpp | ||||
|     ir_opt/get_set_elimination_pass.cpp | ||||
|     ir_opt/identity_removal_pass.cpp | ||||
|     ir_opt/passes.h | ||||
|     ir_opt/ssa_rewrite_pass.cpp | ||||
|   | ||||
| @@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) { | ||||
| } | ||||
|  | ||||
| Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, | ||||
|                                       std::initializer_list<Value> args) { | ||||
|     Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; | ||||
|                                       std::initializer_list<Value> args, u64 flags) { | ||||
|     Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)}; | ||||
|     const auto result_it{instructions.insert(insertion_point, *inst)}; | ||||
|  | ||||
|     if (inst->NumArgs() != args.size()) { | ||||
|   | ||||
| @@ -39,7 +39,7 @@ public: | ||||
|  | ||||
|     /// Prepends a new instruction to this basic block before the insertion point. | ||||
|     iterator PrependNewInst(iterator insertion_point, Opcode op, | ||||
|                             std::initializer_list<Value> args = {}); | ||||
|                             std::initializer_list<Value> args = {}, u64 flags = 0); | ||||
|  | ||||
|     /// Adds a new immediate predecessor to the basic block. | ||||
|     void AddImmediatePredecessor(IR::Block* immediate_predecessor); | ||||
|   | ||||
| @@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { | ||||
|     Inst(Opcode::SetAttribute, attribute, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::WorkgroupIdX() { | ||||
|     return Inst<U32>(Opcode::WorkgroupIdX); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::WorkgroupIdY() { | ||||
|     return Inst<U32>(Opcode::WorkgroupIdY); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::WorkgroupIdZ() { | ||||
|     return Inst<U32>(Opcode::WorkgroupIdZ); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LocalInvocationIdX() { | ||||
|     return Inst<U32>(Opcode::LocalInvocationIdX); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LocalInvocationIdY() { | ||||
|     return Inst<U32>(Opcode::LocalInvocationIdY); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LocalInvocationIdZ() { | ||||
|     return Inst<U32>(Opcode::LocalInvocationIdZ); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LoadGlobalU8(const U64& address) { | ||||
|     return Inst<U32>(Opcode::LoadGlobalU8, address); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LoadGlobalS8(const U64& address) { | ||||
|     return Inst<U32>(Opcode::LoadGlobalS8, address); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LoadGlobalU16(const U64& address) { | ||||
|     return Inst<U32>(Opcode::LoadGlobalU16, address); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LoadGlobalS16(const U64& address) { | ||||
|     return Inst<U32>(Opcode::LoadGlobalS16, address); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::LoadGlobal32(const U64& address) { | ||||
|     return Inst<U32>(Opcode::LoadGlobal32, address); | ||||
| } | ||||
|  | ||||
| Value IREmitter::LoadGlobal64(const U64& address) { | ||||
|     return Inst<Value>(Opcode::LoadGlobal64, address); | ||||
| } | ||||
|  | ||||
| Value IREmitter::LoadGlobal128(const U64& address) { | ||||
|     return Inst<Value>(Opcode::LoadGlobal128, address); | ||||
| } | ||||
|  | ||||
| void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { | ||||
|     Inst(Opcode::WriteGlobalU8, address, value); | ||||
| } | ||||
| @@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) { | ||||
|     return Inst<U1>(Opcode::GetOverflowFromOp, op); | ||||
| } | ||||
|  | ||||
| U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { | ||||
| U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { | ||||
|     if (a.Type() != a.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||
|     } | ||||
|     switch (a.Type()) { | ||||
|     case Type::U16: | ||||
|         return Inst<U16>(Opcode::FPAdd16, a, b); | ||||
|         return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b); | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::FPAdd32, a, b); | ||||
|         return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::FPAdd64, a, b); | ||||
|         return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b); | ||||
|     default: | ||||
|         ThrowInvalidType(a.Type()); | ||||
|     } | ||||
| @@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { | ||||
|  | ||||
| Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { | ||||
|     if (e1.Type() != e2.Type()) { | ||||
|         throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); | ||||
|         throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); | ||||
|     } | ||||
|     return Inst(Opcode::CompositeConstruct2, e1, e2); | ||||
| } | ||||
|  | ||||
| Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { | ||||
|     if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { | ||||
|         throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); | ||||
|         throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); | ||||
|     } | ||||
|     return Inst(Opcode::CompositeConstruct3, e1, e2, e3); | ||||
| } | ||||
| @@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& | ||||
| Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, | ||||
|                                     const UAny& e4) { | ||||
|     if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { | ||||
|         throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), | ||||
|                               e4.Type()); | ||||
|         throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), | ||||
|                               e3.Type(), e4.Type()); | ||||
|     } | ||||
|     return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); | ||||
| } | ||||
| @@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { | ||||
|     return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); | ||||
| } | ||||
|  | ||||
| UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { | ||||
|     if (true_value.Type() != false_value.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | ||||
|     } | ||||
|     switch (true_value.Type()) { | ||||
|     case Type::U8: | ||||
|         return Inst<UAny>(Opcode::Select8, condition, true_value, false_value); | ||||
|     case Type::U16: | ||||
|         return Inst<UAny>(Opcode::Select16, condition, true_value, false_value); | ||||
|     case Type::U32: | ||||
|         return Inst<UAny>(Opcode::Select32, condition, true_value, false_value); | ||||
|     case Type::U64: | ||||
|         return Inst<UAny>(Opcode::Select64, condition, true_value, false_value); | ||||
|     default: | ||||
|         throw InvalidArgument("Invalid type {}", true_value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U64 IREmitter::PackUint2x32(const Value& vector) { | ||||
|     return Inst<U64>(Opcode::PackUint2x32, vector); | ||||
| } | ||||
| @@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) { | ||||
|     return Inst<Value>(Opcode::UnpackDouble2x32, value); | ||||
| } | ||||
|  | ||||
| U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { | ||||
| U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { | ||||
|     if (a.Type() != b.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||
|     } | ||||
|     switch (a.Type()) { | ||||
|     case Type::U16: | ||||
|         return Inst<U16>(Opcode::FPMul16, a, b); | ||||
|         return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b); | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::FPMul32, a, b); | ||||
|         return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::FPMul64, a, b); | ||||
|         return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b); | ||||
|     default: | ||||
|         ThrowInvalidType(a.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, | ||||
|                            FpControl control) { | ||||
|     if (a.Type() != b.Type() || a.Type() != c.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); | ||||
|     } | ||||
|     switch (a.Type()) { | ||||
|     case Type::U16: | ||||
|         return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c); | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c); | ||||
|     default: | ||||
|         ThrowInvalidType(a.Type()); | ||||
|     } | ||||
| @@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) { | ||||
|     if (a.Type() != b.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||
|     } | ||||
|     switch (a.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::IAdd32, a, b); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::IAdd64, a, b); | ||||
|     default: | ||||
|         ThrowInvalidType(a.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32 IREmitter::IMul(const U32& a, const U32& b) { | ||||
|     return Inst<U32>(Opcode::IMul32, a, b); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::INeg(const U32& value) { | ||||
|     return Inst<U32>(Opcode::INeg32, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::IAbs(const U32& value) { | ||||
|     return Inst<U32>(Opcode::IAbs32, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) { | ||||
|     return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) { | ||||
|     return Inst<U32>(Opcode::ShiftRightLogical32, base, shift); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) { | ||||
|     return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) { | ||||
|     return Inst<U32>(Opcode::BitwiseAnd32, a, b); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::BitwiseOr(const U32& a, const U32& b) { | ||||
|     return Inst<U32>(Opcode::BitwiseOr32, a, b); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::BitwiseXor(const U32& a, const U32& b) { | ||||
|     return Inst<U32>(Opcode::BitwiseXor32, a, b); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||||
|                               const U32& count) { | ||||
|     return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||||
|                                bool is_signed) { | ||||
|     return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset, | ||||
|                      count); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||||
|     return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) { | ||||
|     return Inst<U1>(Opcode::IEqual, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||||
|     return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) { | ||||
|     return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) { | ||||
|     return Inst<U1>(Opcode::INotEqual, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) { | ||||
|     return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::LogicalOr(const U1& a, const U1& b) { | ||||
|     return Inst<U1>(Opcode::LogicalOr, a, b); | ||||
| } | ||||
| @@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) { | ||||
|     return Inst<U1>(Opcode::LogicalAnd, a, b); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::LogicalXor(const U1& a, const U1& b) { | ||||
|     return Inst<U1>(Opcode::LogicalXor, a, b); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::LogicalNot(const U1& value) { | ||||
|     return Inst<U1>(Opcode::LogicalNot, value); | ||||
| } | ||||
|   | ||||
| @@ -4,8 +4,12 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <cstring> | ||||
| #include <type_traits> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/attribute.h" | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/modifiers.h" | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
|  | ||||
| namespace Shader::IR { | ||||
| @@ -52,6 +56,22 @@ public: | ||||
|     [[nodiscard]] U32 GetAttribute(IR::Attribute attribute); | ||||
|     void SetAttribute(IR::Attribute attribute, const U32& value); | ||||
|  | ||||
|     [[nodiscard]] U32 WorkgroupIdX(); | ||||
|     [[nodiscard]] U32 WorkgroupIdY(); | ||||
|     [[nodiscard]] U32 WorkgroupIdZ(); | ||||
|  | ||||
|     [[nodiscard]] U32 LocalInvocationIdX(); | ||||
|     [[nodiscard]] U32 LocalInvocationIdY(); | ||||
|     [[nodiscard]] U32 LocalInvocationIdZ(); | ||||
|  | ||||
|     [[nodiscard]] U32 LoadGlobalU8(const U64& address); | ||||
|     [[nodiscard]] U32 LoadGlobalS8(const U64& address); | ||||
|     [[nodiscard]] U32 LoadGlobalU16(const U64& address); | ||||
|     [[nodiscard]] U32 LoadGlobalS16(const U64& address); | ||||
|     [[nodiscard]] U32 LoadGlobal32(const U64& address); | ||||
|     [[nodiscard]] Value LoadGlobal64(const U64& address); | ||||
|     [[nodiscard]] Value LoadGlobal128(const U64& address); | ||||
|  | ||||
|     void WriteGlobalU8(const U64& address, const U32& value); | ||||
|     void WriteGlobalS8(const U64& address, const U32& value); | ||||
|     void WriteGlobalU16(const U64& address, const U32& value); | ||||
| @@ -71,6 +91,8 @@ public: | ||||
|                                            const UAny& e4); | ||||
|     [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); | ||||
|  | ||||
|     [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); | ||||
|  | ||||
|     [[nodiscard]] U64 PackUint2x32(const Value& vector); | ||||
|     [[nodiscard]] Value UnpackUint2x32(const U64& value); | ||||
|  | ||||
| @@ -80,8 +102,10 @@ public: | ||||
|     [[nodiscard]] U64 PackDouble2x32(const Value& vector); | ||||
|     [[nodiscard]] Value UnpackDouble2x32(const U64& value); | ||||
|  | ||||
|     [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); | ||||
|     [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); | ||||
|     [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); | ||||
|     [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); | ||||
|     [[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, | ||||
|                                   FpControl control = {}); | ||||
|  | ||||
|     [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); | ||||
|     [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); | ||||
| @@ -100,8 +124,31 @@ public: | ||||
|     [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); | ||||
|     [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); | ||||
|  | ||||
|     [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | ||||
|     [[nodiscard]] U32 IMul(const U32& a, const U32& b); | ||||
|     [[nodiscard]] U32 INeg(const U32& value); | ||||
|     [[nodiscard]] U32 IAbs(const U32& value); | ||||
|     [[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift); | ||||
|     [[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift); | ||||
|     [[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift); | ||||
|     [[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b); | ||||
|     [[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b); | ||||
|     [[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b); | ||||
|     [[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset, | ||||
|                                      const U32& count); | ||||
|     [[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count, | ||||
|                                       bool is_signed); | ||||
|  | ||||
|     [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); | ||||
|     [[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs); | ||||
|     [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||||
|     [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); | ||||
|     [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); | ||||
|     [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||||
|  | ||||
|     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); | ||||
|     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); | ||||
|     [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); | ||||
|     [[nodiscard]] U1 LogicalNot(const U1& value); | ||||
|  | ||||
|     [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); | ||||
| @@ -118,6 +165,22 @@ private: | ||||
|         auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; | ||||
|         return T{Value{&*it}}; | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags { | ||||
|         Flags() = default; | ||||
|         Flags(T proxy_) : proxy{proxy_} {} | ||||
|  | ||||
|         T proxy; | ||||
|     }; | ||||
|  | ||||
|     template <typename T = Value, typename FlagType, typename... Args> | ||||
|     T Inst(Opcode op, Flags<FlagType> flags, Args... args) { | ||||
|         u64 raw_flags{}; | ||||
|         std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy)); | ||||
|         auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)}; | ||||
|         return T{Value{&*it}}; | ||||
|     } | ||||
| }; | ||||
|  | ||||
| } // namespace Shader::IR | ||||
|   | ||||
| @@ -5,7 +5,9 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <span> | ||||
| #include <type_traits> | ||||
| #include <vector> | ||||
|  | ||||
| #include <boost/intrusive/list.hpp> | ||||
| @@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4; | ||||
|  | ||||
| class Inst : public boost::intrusive::list_base_hook<> { | ||||
| public: | ||||
|     explicit Inst(Opcode op_) noexcept : op(op_) {} | ||||
|     explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {} | ||||
|  | ||||
|     /// Get the number of uses this instruction has. | ||||
|     [[nodiscard]] int UseCount() const noexcept { | ||||
| @@ -73,6 +75,14 @@ public: | ||||
|  | ||||
|     void ReplaceUsesWith(Value replacement); | ||||
|  | ||||
|     template <typename FlagsType> | ||||
|     requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>) | ||||
|         [[nodiscard]] FlagsType Flags() const noexcept { | ||||
|         FlagsType ret; | ||||
|         std::memcpy(&ret, &flags, sizeof(ret)); | ||||
|         return ret; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     void Use(const Value& value); | ||||
|     void UndoUse(const Value& value); | ||||
|   | ||||
							
								
								
									
										28
									
								
								src/shader_recompiler/frontend/ir/modifiers.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								src/shader_recompiler/frontend/ir/modifiers.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| namespace Shader::IR { | ||||
|  | ||||
| enum class FmzMode { | ||||
|     None, // Denorms are not flushed, NAN is propagated (nouveau) | ||||
|     FTZ,  // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) | ||||
|     FMZ,  // Flush denorms to zero, x * 0 == 0 (D3D9) | ||||
| }; | ||||
|  | ||||
| enum class FpRounding { | ||||
|     RN, // Round to nearest even, | ||||
|     RM, // Round towards negative infinity | ||||
|     RP, // Round towards positive infinity | ||||
|     RZ, // Round towards zero | ||||
| }; | ||||
|  | ||||
| struct FpControl { | ||||
|     bool no_contraction{false}; | ||||
|     FpRounding rounding : 8 = FpRounding::RN; | ||||
|     FmzMode fmz_mode : 8 = FmzMode::FTZ; | ||||
| }; | ||||
| static_assert(sizeof(FpControl) <= sizeof(u64)); | ||||
| } // namespace Shader::IR | ||||
| @@ -35,6 +35,12 @@ OPCODE(SetZFlag,                                            Void,           U1, | ||||
| OPCODE(SetSFlag,                                            Void,           U1,                                                             ) | ||||
| OPCODE(SetCFlag,                                            Void,           U1,                                                             ) | ||||
| OPCODE(SetOFlag,                                            Void,           U1,                                                             ) | ||||
| OPCODE(WorkgroupIdX,                                        U32,                                                                            ) | ||||
| OPCODE(WorkgroupIdY,                                        U32,                                                                            ) | ||||
| OPCODE(WorkgroupIdZ,                                        U32,                                                                            ) | ||||
| OPCODE(LocalInvocationIdX,                                  U32,                                                                            ) | ||||
| OPCODE(LocalInvocationIdY,                                  U32,                                                                            ) | ||||
| OPCODE(LocalInvocationIdZ,                                  U32,                                                                            ) | ||||
|  | ||||
| // Undefined | ||||
| OPCODE(Undef1,                                              U1,                                                                             ) | ||||
| @@ -44,6 +50,13 @@ OPCODE(Undef32,                                             U32, | ||||
| OPCODE(Undef64,                                             U64,                                                                            ) | ||||
|  | ||||
| // Memory operations | ||||
| OPCODE(LoadGlobalU8,                                        U32,            U64,                                                            ) | ||||
| OPCODE(LoadGlobalS8,                                        U32,            U64,                                                            ) | ||||
| OPCODE(LoadGlobalU16,                                       U32,            U64,                                                            ) | ||||
| OPCODE(LoadGlobalS16,                                       U32,            U64,                                                            ) | ||||
| OPCODE(LoadGlobal32,                                        U32,            U64,                                                            ) | ||||
| OPCODE(LoadGlobal64,                                        Opaque,         U64,                                                            ) | ||||
| OPCODE(LoadGlobal128,                                       Opaque,         U64,                                                            ) | ||||
| OPCODE(WriteGlobalU8,                                       Void,           U64,            U32,                                            ) | ||||
| OPCODE(WriteGlobalS8,                                       Void,           U64,            U32,                                            ) | ||||
| OPCODE(WriteGlobalU16,                                      Void,           U64,            U32,                                            ) | ||||
| @@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3,                                 Opaque,         Opaq | ||||
| OPCODE(CompositeConstruct4,                                 Opaque,         Opaque,         Opaque,         Opaque,         Opaque,         ) | ||||
| OPCODE(CompositeExtract,                                    Opaque,         Opaque,         U32,                                            ) | ||||
|  | ||||
| // Select operations | ||||
| OPCODE(Select8,                                             U8,             U1,             U8,             U8,                             ) | ||||
| OPCODE(Select16,                                            U16,            U1,             U16,            U16,                            ) | ||||
| OPCODE(Select32,                                            U32,            U1,             U32,            U32,                            ) | ||||
| OPCODE(Select64,                                            U64,            U1,             U64,            U64,                            ) | ||||
|  | ||||
| // Bitwise conversions | ||||
| OPCODE(PackUint2x32,                                        U64,            Opaque,                                                         ) | ||||
| OPCODE(UnpackUint2x32,                                      Opaque,         U64,                                                            ) | ||||
| @@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp,                                   U1,             Opaq | ||||
| OPCODE(GetZSCOFromOp,                                       ZSCO,           Opaque,                                                         ) | ||||
|  | ||||
| // Floating-point operations | ||||
| OPCODE(FPAbs16,                                             U16,            U16                                                             ) | ||||
| OPCODE(FPAbs32,                                             U32,            U32                                                             ) | ||||
| OPCODE(FPAbs64,                                             U64,            U64                                                             ) | ||||
| OPCODE(FPAdd16,                                             U16,            U16,            U16                                             ) | ||||
| OPCODE(FPAdd32,                                             U32,            U32,            U32                                             ) | ||||
| OPCODE(FPAdd64,                                             U64,            U64,            U64                                             ) | ||||
| OPCODE(FPFma16,                                             U16,            U16,            U16                                             ) | ||||
| OPCODE(FPFma32,                                             U32,            U32,            U32                                             ) | ||||
| OPCODE(FPFma64,                                             U64,            U64,            U64                                             ) | ||||
| OPCODE(FPMax32,                                             U32,            U32,            U32                                             ) | ||||
| OPCODE(FPMax64,                                             U64,            U64,            U64                                             ) | ||||
| OPCODE(FPMin32,                                             U32,            U32,            U32                                             ) | ||||
| OPCODE(FPMin64,                                             U64,            U64,            U64                                             ) | ||||
| OPCODE(FPMul16,                                             U16,            U16,            U16                                             ) | ||||
| OPCODE(FPMul32,                                             U32,            U32,            U32                                             ) | ||||
| OPCODE(FPMul64,                                             U64,            U64,            U64                                             ) | ||||
| OPCODE(FPNeg16,                                             U16,            U16                                                             ) | ||||
| OPCODE(FPNeg32,                                             U32,            U32                                                             ) | ||||
| OPCODE(FPNeg64,                                             U64,            U64                                                             ) | ||||
| OPCODE(FPRecip32,                                           U32,            U32                                                             ) | ||||
| OPCODE(FPRecip64,                                           U64,            U64                                                             ) | ||||
| OPCODE(FPRecipSqrt32,                                       U32,            U32                                                             ) | ||||
| OPCODE(FPRecipSqrt64,                                       U64,            U64                                                             ) | ||||
| OPCODE(FPSqrt,                                              U32,            U32                                                             ) | ||||
| OPCODE(FPSin,                                               U32,            U32                                                             ) | ||||
| OPCODE(FPSinNotReduced,                                     U32,            U32                                                             ) | ||||
| OPCODE(FPExp2,                                              U32,            U32                                                             ) | ||||
| OPCODE(FPExp2NotReduced,                                    U32,            U32                                                             ) | ||||
| OPCODE(FPCos,                                               U32,            U32                                                             ) | ||||
| OPCODE(FPCosNotReduced,                                     U32,            U32                                                             ) | ||||
| OPCODE(FPLog2,                                              U32,            U32                                                             ) | ||||
| OPCODE(FPSaturate16,                                        U16,            U16                                                             ) | ||||
| OPCODE(FPSaturate32,                                        U32,            U32                                                             ) | ||||
| OPCODE(FPSaturate64,                                        U64,            U64                                                             ) | ||||
| OPCODE(FPRoundEven16,                                       U16,            U16                                                             ) | ||||
| OPCODE(FPRoundEven32,                                       U32,            U32                                                             ) | ||||
| OPCODE(FPRoundEven64,                                       U64,            U64                                                             ) | ||||
| OPCODE(FPFloor16,                                           U16,            U16                                                             ) | ||||
| OPCODE(FPFloor32,                                           U32,            U32                                                             ) | ||||
| OPCODE(FPFloor64,                                           U64,            U64                                                             ) | ||||
| OPCODE(FPCeil16,                                            U16,            U16                                                             ) | ||||
| OPCODE(FPCeil32,                                            U32,            U32                                                             ) | ||||
| OPCODE(FPCeil64,                                            U64,            U64                                                             ) | ||||
| OPCODE(FPTrunc16,                                           U16,            U16                                                             ) | ||||
| OPCODE(FPTrunc32,                                           U32,            U32                                                             ) | ||||
| OPCODE(FPTrunc64,                                           U64,            U64                                                             ) | ||||
| OPCODE(FPAbs16,                                             U16,            U16,                                                            ) | ||||
| OPCODE(FPAbs32,                                             U32,            U32,                                                            ) | ||||
| OPCODE(FPAbs64,                                             U64,            U64,                                                            ) | ||||
| OPCODE(FPAdd16,                                             U16,            U16,            U16,                                            ) | ||||
| OPCODE(FPAdd32,                                             U32,            U32,            U32,                                            ) | ||||
| OPCODE(FPAdd64,                                             U64,            U64,            U64,                                            ) | ||||
| OPCODE(FPFma16,                                             U16,            U16,            U16,            U16,                            ) | ||||
| OPCODE(FPFma32,                                             U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(FPFma64,                                             U64,            U64,            U64,            U64,                            ) | ||||
| OPCODE(FPMax32,                                             U32,            U32,            U32,                                            ) | ||||
| OPCODE(FPMax64,                                             U64,            U64,            U64,                                            ) | ||||
| OPCODE(FPMin32,                                             U32,            U32,            U32,                                            ) | ||||
| OPCODE(FPMin64,                                             U64,            U64,            U64,                                            ) | ||||
| OPCODE(FPMul16,                                             U16,            U16,            U16,                                            ) | ||||
| OPCODE(FPMul32,                                             U32,            U32,            U32,                                            ) | ||||
| OPCODE(FPMul64,                                             U64,            U64,            U64,                                            ) | ||||
| OPCODE(FPNeg16,                                             U16,            U16,                                                            ) | ||||
| OPCODE(FPNeg32,                                             U32,            U32,                                                            ) | ||||
| OPCODE(FPNeg64,                                             U64,            U64,                                                            ) | ||||
| OPCODE(FPRecip32,                                           U32,            U32,                                                            ) | ||||
| OPCODE(FPRecip64,                                           U64,            U64,                                                            ) | ||||
| OPCODE(FPRecipSqrt32,                                       U32,            U32,                                                            ) | ||||
| OPCODE(FPRecipSqrt64,                                       U64,            U64,                                                            ) | ||||
| OPCODE(FPSqrt,                                              U32,            U32,                                                            ) | ||||
| OPCODE(FPSin,                                               U32,            U32,                                                            ) | ||||
| OPCODE(FPSinNotReduced,                                     U32,            U32,                                                            ) | ||||
| OPCODE(FPExp2,                                              U32,            U32,                                                            ) | ||||
| OPCODE(FPExp2NotReduced,                                    U32,            U32,                                                            ) | ||||
| OPCODE(FPCos,                                               U32,            U32,                                                            ) | ||||
| OPCODE(FPCosNotReduced,                                     U32,            U32,                                                            ) | ||||
| OPCODE(FPLog2,                                              U32,            U32,                                                            ) | ||||
| OPCODE(FPSaturate16,                                        U16,            U16,                                                            ) | ||||
| OPCODE(FPSaturate32,                                        U32,            U32,                                                            ) | ||||
| OPCODE(FPSaturate64,                                        U64,            U64,                                                            ) | ||||
| OPCODE(FPRoundEven16,                                       U16,            U16,                                                            ) | ||||
| OPCODE(FPRoundEven32,                                       U32,            U32,                                                            ) | ||||
| OPCODE(FPRoundEven64,                                       U64,            U64,                                                            ) | ||||
| OPCODE(FPFloor16,                                           U16,            U16,                                                            ) | ||||
| OPCODE(FPFloor32,                                           U32,            U32,                                                            ) | ||||
| OPCODE(FPFloor64,                                           U64,            U64,                                                            ) | ||||
| OPCODE(FPCeil16,                                            U16,            U16,                                                            ) | ||||
| OPCODE(FPCeil32,                                            U32,            U32,                                                            ) | ||||
| OPCODE(FPCeil64,                                            U64,            U64,                                                            ) | ||||
| OPCODE(FPTrunc16,                                           U16,            U16,                                                            ) | ||||
| OPCODE(FPTrunc32,                                           U32,            U32,                                                            ) | ||||
| OPCODE(FPTrunc64,                                           U64,            U64,                                                            ) | ||||
|  | ||||
| // Integer operations | ||||
| OPCODE(IAdd32,                                              U32,            U32,            U32,                                            ) | ||||
| OPCODE(IAdd64,                                              U64,            U64,            U64,                                            ) | ||||
| OPCODE(IMul32,                                              U32,            U32,            U32,                                            ) | ||||
| OPCODE(INeg32,                                              U32,            U32,                                                            ) | ||||
| OPCODE(IAbs32,                                              U32,            U32,                                                            ) | ||||
| OPCODE(ShiftLeftLogical32,                                  U32,            U32,            U32,                                            ) | ||||
| OPCODE(ShiftRightLogical32,                                 U32,            U32,            U32,                                            ) | ||||
| OPCODE(ShiftRightArithmetic32,                              U32,            U32,            U32,                                            ) | ||||
| OPCODE(BitwiseAnd32,                                        U32,            U32,            U32,                                            ) | ||||
| OPCODE(BitwiseOr32,                                         U32,            U32,            U32,                                            ) | ||||
| OPCODE(BitwiseXor32,                                        U32,            U32,            U32,                                            ) | ||||
| OPCODE(BitFieldInsert,                                      U32,            U32,            U32,            U32,            U32,            ) | ||||
| OPCODE(BitFieldSExtract,                                    U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(BitFieldUExtract,                                    U32,            U32,            U32,            U32,                            ) | ||||
|  | ||||
| OPCODE(SLessThan,                                           U1,             U32,            U32,                                            ) | ||||
| OPCODE(ULessThan,                                           U1,             U32,            U32,                                            ) | ||||
| OPCODE(IEqual,                                              U1,             U32,            U32,                                            ) | ||||
| OPCODE(SLessThanEqual,                                      U1,             U32,            U32,                                            ) | ||||
| OPCODE(ULessThanEqual,                                      U1,             U32,            U32,                                            ) | ||||
| OPCODE(SGreaterThan,                                        U1,             U32,            U32,                                            ) | ||||
| OPCODE(UGreaterThan,                                        U1,             U32,            U32,                                            ) | ||||
| OPCODE(INotEqual,                                           U1,             U32,            U32,                                            ) | ||||
| OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                            ) | ||||
| OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                            ) | ||||
|  | ||||
| // Logical operations | ||||
| OPCODE(LogicalOr,                                           U1,             U1,             U1,                                             ) | ||||
| OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                             ) | ||||
| OPCODE(LogicalXor,                                          U1,             U1,             U1,                                             ) | ||||
| OPCODE(LogicalNot,                                          U1,             U1,                                                             ) | ||||
|  | ||||
| // Conversion operations | ||||
|   | ||||
| @@ -8,7 +8,16 @@ | ||||
|  | ||||
| namespace Shader::IR { | ||||
|  | ||||
| enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; | ||||
| enum class Pred : u64 { | ||||
|     P0, | ||||
|     P1, | ||||
|     P2, | ||||
|     P3, | ||||
|     P4, | ||||
|     P5, | ||||
|     P6, | ||||
|     PT, | ||||
| }; | ||||
|  | ||||
| constexpr size_t NUM_USER_PREDS = 6; | ||||
| constexpr size_t NUM_PREDS = 7; | ||||
|   | ||||
| @@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) { | ||||
|         Optimization::Invoke(Optimization::IdentityRemovalPass, function); | ||||
|         // Optimization::Invoke(Optimization::VerificationPass, function); | ||||
|     } | ||||
|     //*/ | ||||
| } | ||||
|  | ||||
| std::string DumpProgram(const Program& program) { | ||||
|   | ||||
| @@ -0,0 +1,56 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/ir/modifiers.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
|  | ||||
| enum class FpRounding : u64 { | ||||
|     RN, | ||||
|     RM, | ||||
|     RP, | ||||
|     RZ, | ||||
| }; | ||||
|  | ||||
| enum class FmzMode : u64 { | ||||
|     None, | ||||
|     FTZ, | ||||
|     FMZ, | ||||
|     INVALIDFMZ3, | ||||
| }; | ||||
|  | ||||
| inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) { | ||||
|     switch (fp_rounding) { | ||||
|     case FpRounding::RN: | ||||
|         return IR::FpRounding::RN; | ||||
|     case FpRounding::RM: | ||||
|         return IR::FpRounding::RM; | ||||
|     case FpRounding::RP: | ||||
|         return IR::FpRounding::RP; | ||||
|     case FpRounding::RZ: | ||||
|         return IR::FpRounding::RZ; | ||||
|     } | ||||
|     throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding); | ||||
| } | ||||
|  | ||||
| inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) { | ||||
|     switch (fmz_mode) { | ||||
|     case FmzMode::None: | ||||
|         return IR::FmzMode::None; | ||||
|     case FmzMode::FTZ: | ||||
|         return IR::FmzMode::FTZ; | ||||
|     case FmzMode::FMZ: | ||||
|         return IR::FmzMode::FMZ; | ||||
|     case FmzMode::INVALIDFMZ3: | ||||
|         break; | ||||
|     } | ||||
|     throw NotImplementedException("Invalid FMZ mode {}", fmz_mode); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,71 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
|  | ||||
| void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, | ||||
|           const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_a; | ||||
|     } const fadd{insn}; | ||||
|  | ||||
|     if (sat) { | ||||
|         throw NotImplementedException("FADD SAT"); | ||||
|     } | ||||
|     if (cc) { | ||||
|         throw NotImplementedException("FADD CC"); | ||||
|     } | ||||
|     const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; | ||||
|     const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; | ||||
|     IR::FpControl control{ | ||||
|         .no_contraction{true}, | ||||
|         .rounding{CastFpRounding(fp_rounding)}, | ||||
|         .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||||
|     }; | ||||
|     v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); | ||||
| } | ||||
|  | ||||
| void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<39, 2, FpRounding> fp_rounding; | ||||
|         BitField<44, 1, u64> ftz; | ||||
|         BitField<45, 1, u64> neg_b; | ||||
|         BitField<46, 1, u64> abs_a; | ||||
|         BitField<47, 1, u64> cc; | ||||
|         BitField<48, 1, u64> neg_a; | ||||
|         BitField<49, 1, u64> abs_b; | ||||
|         BitField<50, 1, u64> sat; | ||||
|     } const fadd{insn}; | ||||
|  | ||||
|     FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b, | ||||
|          fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::FADD_reg(u64 insn) { | ||||
|     FADD(*this, insn, GetReg20(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD_cbuf(u64) { | ||||
|     throw NotImplementedException("FADD (cbuf)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD_imm(u64) { | ||||
|     throw NotImplementedException("FADD (imm)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD32I(u64) { | ||||
|     throw NotImplementedException("FADD32I"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,73 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, | ||||
|           bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_a; | ||||
|     } const ffma{insn}; | ||||
|  | ||||
|     if (sat) { | ||||
|         throw NotImplementedException("FFMA SAT"); | ||||
|     } | ||||
|     if (cc) { | ||||
|         throw NotImplementedException("FFMA CC"); | ||||
|     } | ||||
|     const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; | ||||
|     const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||||
|     const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; | ||||
|     const IR::FpControl fp_control{ | ||||
|         .no_contraction{true}, | ||||
|         .rounding{CastFpRounding(fp_rounding)}, | ||||
|         .fmz_mode{CastFmzMode(fmz_mode)}, | ||||
|     }; | ||||
|     v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); | ||||
| } | ||||
|  | ||||
| void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<47, 1, u64> cc; | ||||
|         BitField<48, 1, u64> neg_b; | ||||
|         BitField<49, 1, u64> neg_c; | ||||
|         BitField<50, 1, u64> sat; | ||||
|         BitField<51, 2, FpRounding> fp_rounding; | ||||
|         BitField<53, 2, FmzMode> fmz_mode; | ||||
|     } const ffma{insn}; | ||||
|  | ||||
|     FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0, | ||||
|          ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::FFMA_reg(u64 insn) { | ||||
|     FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_rc(u64) { | ||||
|     throw NotImplementedException("FFMA (rc)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_cr(u64 insn) { | ||||
|     FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_imm(u64) { | ||||
|     throw NotImplementedException("FFMA (imm)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA32I(u64) { | ||||
|     throw NotImplementedException("FFMA32I"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,108 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/ir/modifiers.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class Scale : u64 { | ||||
|     None, | ||||
|     D2, | ||||
|     D4, | ||||
|     D8, | ||||
|     M8, | ||||
|     M4, | ||||
|     M2, | ||||
|     INVALIDSCALE37, | ||||
| }; | ||||
|  | ||||
| float ScaleFactor(Scale scale) { | ||||
|     switch (scale) { | ||||
|     case Scale::None: | ||||
|         return 1.0f; | ||||
|     case Scale::D2: | ||||
|         return 1.0f / 2.0f; | ||||
|     case Scale::D4: | ||||
|         return 1.0f / 4.0f; | ||||
|     case Scale::D8: | ||||
|         return 1.0f / 8.0f; | ||||
|     case Scale::M8: | ||||
|         return 8.0f; | ||||
|     case Scale::M4: | ||||
|         return 4.0f; | ||||
|     case Scale::M2: | ||||
|         return 2.0f; | ||||
|     case Scale::INVALIDSCALE37: | ||||
|         break; | ||||
|     } | ||||
|     throw NotImplementedException("Invalid FMUL scale {}", scale); | ||||
| } | ||||
|  | ||||
| void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, | ||||
|           FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_a; | ||||
|     } const fmul{insn}; | ||||
|  | ||||
|     if (cc) { | ||||
|         throw NotImplementedException("FMUL CC"); | ||||
|     } | ||||
|     if (sat) { | ||||
|         throw NotImplementedException("FMUL SAT"); | ||||
|     } | ||||
|     IR::U32 op_a{v.X(fmul.src_a)}; | ||||
|     if (scale != Scale::None) { | ||||
|         if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { | ||||
|             throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); | ||||
|         } | ||||
|         op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); | ||||
|     } | ||||
|     const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; | ||||
|     const IR::FpControl fp_control{ | ||||
|         .no_contraction{true}, | ||||
|         .rounding{CastFpRounding(fp_rounding)}, | ||||
|         .fmz_mode{CastFmzMode(fmz_mode)}, | ||||
|     }; | ||||
|     v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); | ||||
| } | ||||
|  | ||||
| void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<39, 2, FpRounding> fp_rounding; | ||||
|         BitField<41, 3, Scale> scale; | ||||
|         BitField<44, 2, FmzMode> fmz; | ||||
|         BitField<47, 1, u64> cc; | ||||
|         BitField<48, 1, u64> neg_b; | ||||
|         BitField<50, 1, u64> sat; | ||||
|     } fmul{insn}; | ||||
|  | ||||
|     FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0, | ||||
|          fmul.neg_b != 0); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::FMUL_reg(u64 insn) { | ||||
|     return FMUL(*this, insn, GetReg20(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL_cbuf(u64) { | ||||
|     throw NotImplementedException("FMUL (cbuf)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL_imm(u64) { | ||||
|     throw NotImplementedException("FMUL (imm)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL32I(u64) { | ||||
|     throw NotImplementedException("FMUL32I"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { | ||||
|     ir.SetReg(dest_reg, value); | ||||
| } | ||||
|  | ||||
| IR::U32 TranslatorVisitor::GetReg20(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<20, 8, IR::Reg> index; | ||||
|     } const reg{insn}; | ||||
|     return X(reg.index); | ||||
| } | ||||
|  | ||||
| IR::U32 TranslatorVisitor::GetReg39(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<39, 8, IR::Reg> index; | ||||
|     } const reg{insn}; | ||||
|     return X(reg.index); | ||||
| } | ||||
|  | ||||
| IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
| @@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { | ||||
|     return ir.GetCbuf(binding, byte_offset); | ||||
| } | ||||
|  | ||||
| IR::U32 TranslatorVisitor::GetImm(u64 insn) { | ||||
| IR::U32 TranslatorVisitor::GetImm20(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<20, 19, u64> value; | ||||
| @@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) { | ||||
|     return ir.Imm32(value); | ||||
| } | ||||
|  | ||||
| IR::U32 TranslatorVisitor::GetImm32(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<20, 32, u64> value; | ||||
|     } const imm{insn}; | ||||
|     return ir.Imm32(static_cast<u32>(imm.value)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SetZFlag(const IR::U1& value) { | ||||
|     ir.SetZFlag(value); | ||||
| } | ||||
|   | ||||
| @@ -46,7 +46,7 @@ public: | ||||
|     void DADD_reg(u64 insn); | ||||
|     void DADD_cbuf(u64 insn); | ||||
|     void DADD_imm(u64 insn); | ||||
|     void DEPBAR(u64 insn); | ||||
|     void DEPBAR(); | ||||
|     void DFMA_reg(u64 insn); | ||||
|     void DFMA_rc(u64 insn); | ||||
|     void DFMA_cr(u64 insn); | ||||
| @@ -298,9 +298,14 @@ public: | ||||
|     [[nodiscard]] IR::U32 X(IR::Reg reg); | ||||
|     void X(IR::Reg dest_reg, const IR::U32& value); | ||||
|  | ||||
|     [[nodiscard]] IR::U32 GetReg20(u64 insn); | ||||
|     [[nodiscard]] IR::U32 GetReg39(u64 insn); | ||||
|  | ||||
|     [[nodiscard]] IR::U32 GetCbuf(u64 insn); | ||||
|  | ||||
|     [[nodiscard]] IR::U32 GetImm(u64 insn); | ||||
|     [[nodiscard]] IR::U32 GetImm20(u64 insn); | ||||
|  | ||||
|     [[nodiscard]] IR::U32 GetImm32(u64 insn); | ||||
|  | ||||
|     void SetZFlag(const IR::U1& value); | ||||
|     void SetSFlag(const IR::U1& value); | ||||
|   | ||||
| @@ -0,0 +1,106 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x, | ||||
|           bool cc) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_a; | ||||
|     } const iadd{insn}; | ||||
|  | ||||
|     if (sat) { | ||||
|         throw NotImplementedException("IADD SAT"); | ||||
|     } | ||||
|     if (x && po) { | ||||
|         throw NotImplementedException("IADD X+PO"); | ||||
|     } | ||||
|     // Operand A is always read from here, negated if needed | ||||
|     IR::U32 op_a{v.X(iadd.src_a)}; | ||||
|     if (neg_a) { | ||||
|         op_a = v.ir.INeg(op_a); | ||||
|     } | ||||
|     // Add both operands | ||||
|     IR::U32 result{v.ir.IAdd(op_a, op_b)}; | ||||
|     if (x) { | ||||
|         const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))}; | ||||
|         result = v.ir.IAdd(result, carry); | ||||
|     } | ||||
|     if (po) { | ||||
|         // .PO adds one to the result | ||||
|         result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||||
|     } | ||||
|     if (cc) { | ||||
|         // Store flags | ||||
|         // TODO: Does this grab the result pre-PO or after? | ||||
|         if (po) { | ||||
|             throw NotImplementedException("IADD CC+PO"); | ||||
|         } | ||||
|         // TODO: How does CC behave when X is set? | ||||
|         if (x) { | ||||
|             throw NotImplementedException("IADD X+CC"); | ||||
|         } | ||||
|         v.SetZFlag(v.ir.GetZeroFromOp(result)); | ||||
|         v.SetSFlag(v.ir.GetSignFromOp(result)); | ||||
|         v.SetCFlag(v.ir.GetCarryFromOp(result)); | ||||
|         v.SetOFlag(v.ir.GetOverflowFromOp(result)); | ||||
|     } | ||||
|     // Store result | ||||
|     v.X(iadd.dest_reg, result); | ||||
| } | ||||
|  | ||||
| void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<43, 1, u64> x; | ||||
|         BitField<47, 1, u64> cc; | ||||
|         BitField<48, 2, u64> three_for_po; | ||||
|         BitField<48, 1, u64> neg_b; | ||||
|         BitField<49, 1, u64> neg_a; | ||||
|         BitField<50, 1, u64> sat; | ||||
|     } const iadd{insn}; | ||||
|  | ||||
|     const bool po{iadd.three_for_po == 3}; | ||||
|     const bool neg_a{!po && iadd.neg_a != 0}; | ||||
|     if (!po && iadd.neg_b != 0) { | ||||
|         op_b = v.ir.INeg(op_b); | ||||
|     } | ||||
|     IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::IADD_reg(u64) { | ||||
|     throw NotImplementedException("IADD (reg)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD_cbuf(u64 insn) { | ||||
|     IADD(*this, insn, GetCbuf(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD_imm(u64) { | ||||
|     throw NotImplementedException("IADD (imm)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD32I(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<52, 1, u64> cc; | ||||
|         BitField<53, 1, u64> x; | ||||
|         BitField<54, 1, u64> sat; | ||||
|         BitField<55, 2, u64> three_for_po; | ||||
|         BitField<56, 1, u64> neg_a; | ||||
|     } const iadd32i{insn}; | ||||
|  | ||||
|     const bool po{iadd32i.three_for_po == 3}; | ||||
|     const bool neg_a{!po && iadd32i.neg_a != 0}; | ||||
|     IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,73 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> op_a; | ||||
|         BitField<47, 1, u64> cc; | ||||
|         BitField<48, 2, u64> three_for_po; | ||||
|         BitField<48, 1, u64> neg_b; | ||||
|         BitField<49, 1, u64> neg_a; | ||||
|         BitField<39, 5, u64> scale; | ||||
|     } const iscadd{insn}; | ||||
|  | ||||
|     const bool po{iscadd.three_for_po == 3}; | ||||
|     IR::U32 op_a{v.X(iscadd.op_a)}; | ||||
|     if (!po) { | ||||
|         // When PO is not present, the bits are interpreted as negation | ||||
|         if (iscadd.neg_a != 0) { | ||||
|             op_a = v.ir.INeg(op_a); | ||||
|         } | ||||
|         if (iscadd.neg_b != 0) { | ||||
|             op_b = v.ir.INeg(op_b); | ||||
|         } | ||||
|     } | ||||
|     // With the operands already processed, scale A | ||||
|     const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))}; | ||||
|     const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)}; | ||||
|  | ||||
|     IR::U32 result{v.ir.IAdd(scaled_a, op_b)}; | ||||
|     if (po) { | ||||
|         // .PO adds one to the final result | ||||
|         result = v.ir.IAdd(result, v.ir.Imm32(1)); | ||||
|     } | ||||
|     v.X(iscadd.dest_reg, result); | ||||
|  | ||||
|     if (iscadd.cc != 0) { | ||||
|         throw NotImplementedException("ISCADD CC"); | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::ISCADD_reg(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<20, 8, IR::Reg> op_b; | ||||
|     } const iscadd{insn}; | ||||
|  | ||||
|     ISCADD(*this, insn, X(iscadd.op_b)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD_cbuf(u64) { | ||||
|     throw NotImplementedException("ISCADD (cbuf)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD_imm(u64) { | ||||
|     throw NotImplementedException("ISCADD (imm)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD32I(u64) { | ||||
|     throw NotImplementedException("ISCADD32I"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,99 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class CompareOp : u64 { | ||||
|     F,  // Always false | ||||
|     LT, // Less than | ||||
|     EQ, // Equal | ||||
|     LE, // Less than or equal | ||||
|     GT, // Greater than | ||||
|     NE, // Not equal | ||||
|     GE, // Greater than or equal | ||||
|     T,  // Always true | ||||
| }; | ||||
|  | ||||
| enum class Bop : u64 { | ||||
|     AND, | ||||
|     OR, | ||||
|     XOR, | ||||
| }; | ||||
|  | ||||
| IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs, | ||||
|                bool is_signed) { | ||||
|     switch (op) { | ||||
|     case CompareOp::F: | ||||
|         return ir.Imm1(false); | ||||
|     case CompareOp::LT: | ||||
|         return ir.ILessThan(lhs, rhs, is_signed); | ||||
|     case CompareOp::EQ: | ||||
|         return ir.IEqual(lhs, rhs); | ||||
|     case CompareOp::LE: | ||||
|         return ir.ILessThanEqual(lhs, rhs, is_signed); | ||||
|     case CompareOp::GT: | ||||
|         return ir.IGreaterThan(lhs, rhs, is_signed); | ||||
|     case CompareOp::NE: | ||||
|         return ir.INotEqual(lhs, rhs); | ||||
|     case CompareOp::GE: | ||||
|         return ir.IGreaterThanEqual(lhs, rhs, is_signed); | ||||
|     case CompareOp::T: | ||||
|         return ir.Imm1(true); | ||||
|     } | ||||
|     throw NotImplementedException("Invalid ISETP compare op {}", op); | ||||
| } | ||||
|  | ||||
| IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) { | ||||
|     switch (bop) { | ||||
|     case Bop::AND: | ||||
|         return ir.LogicalAnd(comparison, bop_pred); | ||||
|     case Bop::OR: | ||||
|         return ir.LogicalOr(comparison, bop_pred); | ||||
|     case Bop::XOR: | ||||
|         return ir.LogicalXor(comparison, bop_pred); | ||||
|     } | ||||
|     throw NotImplementedException("Invalid ISETP bop {}", bop); | ||||
| } | ||||
|  | ||||
| void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 3, IR::Pred> dest_pred_b; | ||||
|         BitField<3, 3, IR::Pred> dest_pred_a; | ||||
|         BitField<8, 8, IR::Reg> src_reg_a; | ||||
|         BitField<39, 3, IR::Pred> bop_pred; | ||||
|         BitField<42, 1, u64> neg_bop_pred; | ||||
|         BitField<45, 2, Bop> bop; | ||||
|         BitField<48, 1, u64> is_signed; | ||||
|         BitField<49, 3, CompareOp> compare_op; | ||||
|     } const isetp{insn}; | ||||
|  | ||||
|     const Bop bop{isetp.bop}; | ||||
|     const IR::U32 op_a{v.X(isetp.src_reg_a)}; | ||||
|     const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)}; | ||||
|     const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)}; | ||||
|     const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)}; | ||||
|     const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)}; | ||||
|     v.ir.SetPred(isetp.dest_pred_a, result_a); | ||||
|     v.ir.SetPred(isetp.dest_pred_b, result_b); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::ISETP_reg(u64 insn) { | ||||
|     ISETP(*this, insn, GetReg20(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISETP_cbuf(u64 insn) { | ||||
|     ISETP(*this, insn, GetCbuf(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISETP_imm(u64) { | ||||
|     throw NotImplementedException("ISETP_imm"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,71 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_reg_a; | ||||
|         BitField<39, 1, u64> w; | ||||
|         BitField<43, 1, u64> x; | ||||
|         BitField<47, 1, u64> cc; | ||||
|     } const shl{insn}; | ||||
|  | ||||
|     if (shl.x != 0) { | ||||
|         throw NotImplementedException("SHL.X"); | ||||
|     } | ||||
|     if (shl.cc != 0) { | ||||
|         throw NotImplementedException("SHL.CC"); | ||||
|     } | ||||
|     const IR::U32 base{v.X(shl.src_reg_a)}; | ||||
|     IR::U32 result; | ||||
|     if (shl.w != 0) { | ||||
|         // When .W is set, the shift value is wrapped | ||||
|         // To emulate this we just have to clamp it ourselves. | ||||
|         const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))}; | ||||
|         result = v.ir.ShiftLeftLogical(base, shift); | ||||
|     } else { | ||||
|         // When .W is not set, the shift value is clamped between 0 and 32. | ||||
|         // To emulate this we have to have in mind the special shift of 32, that evaluates as 0. | ||||
|         // We can safely evaluate an out of bounds shift according to the SPIR-V specification: | ||||
|         // | ||||
|         // https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical | ||||
|         // "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than | ||||
|         //  or equal to the bit width of the components of Base." | ||||
|         // | ||||
|         // And on the GLASM specification it is also safe to evaluate out of bounds: | ||||
|         // | ||||
|         // https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt | ||||
|         // "The results of a shift operation ("<<") are undefined if the value of the second operand | ||||
|         //  is negative, or greater than or equal to the number of bits in the first operand." | ||||
|         // | ||||
|         // Emphasis on undefined results in contrast to undefined behavior. | ||||
|         // | ||||
|         const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)}; | ||||
|         const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)}; | ||||
|         result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0)); | ||||
|     } | ||||
|     v.X(shl.dest_reg, result); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::SHL_reg(u64) { | ||||
|     throw NotImplementedException("SHL_reg"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SHL_cbuf(u64) { | ||||
|     throw NotImplementedException("SHL_cbuf"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SHL_imm(u64 insn) { | ||||
|     SHL(*this, insn, GetImm20(insn)); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,110 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class SelectMode : u64 { | ||||
|     Default, | ||||
|     CLO, | ||||
|     CHI, | ||||
|     CSFU, | ||||
|     CBCC, | ||||
| }; | ||||
|  | ||||
| enum class Half : u64 { | ||||
|     H0, // Least-significant bits (15:0) | ||||
|     H1, // Most-significant bits (31:16) | ||||
| }; | ||||
|  | ||||
| IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) { | ||||
|     const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)}; | ||||
|     return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed); | ||||
| } | ||||
|  | ||||
| void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, | ||||
|           SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_reg_a; | ||||
|         BitField<47, 1, u64> cc; | ||||
|         BitField<48, 1, u64> is_a_signed; | ||||
|         BitField<49, 1, u64> is_b_signed; | ||||
|         BitField<53, 1, Half> half_a; | ||||
|     } const xmad{insn}; | ||||
|  | ||||
|     if (x) { | ||||
|         throw NotImplementedException("XMAD X"); | ||||
|     } | ||||
|     const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)}; | ||||
|     const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)}; | ||||
|  | ||||
|     IR::U32 product{v.ir.IMul(op_a, op_b)}; | ||||
|     if (psl) { | ||||
|         // .PSL shifts the product 16 bits | ||||
|         product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16)); | ||||
|     } | ||||
|     const IR::U32 op_c{[&]() -> IR::U32 { | ||||
|         switch (select_mode) { | ||||
|         case SelectMode::Default: | ||||
|             return src_c; | ||||
|         case SelectMode::CLO: | ||||
|             return ExtractHalf(v, src_c, Half::H0, false); | ||||
|         case SelectMode::CHI: | ||||
|             return ExtractHalf(v, src_c, Half::H1, false); | ||||
|         case SelectMode::CBCC: | ||||
|             return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b); | ||||
|         case SelectMode::CSFU: | ||||
|             throw NotImplementedException("XMAD CSFU"); | ||||
|         } | ||||
|         throw NotImplementedException("Invalid XMAD select mode {}", select_mode); | ||||
|     }()}; | ||||
|     IR::U32 result{v.ir.IAdd(product, op_c)}; | ||||
|     if (mrg) { | ||||
|         // .MRG inserts src_b [15:0] into result's [31:16]. | ||||
|         const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)}; | ||||
|         result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16)); | ||||
|     } | ||||
|     if (xmad.cc) { | ||||
|         throw NotImplementedException("XMAD CC"); | ||||
|     } | ||||
|     // Store result | ||||
|     v.X(xmad.dest_reg, result); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::XMAD_reg(u64) { | ||||
|     throw NotImplementedException("XMAD (reg)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_rc(u64) { | ||||
|     throw NotImplementedException("XMAD (rc)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_cr(u64) { | ||||
|     throw NotImplementedException("XMAD (cr)"); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_imm(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<20, 16, u64> src_b; | ||||
|         BitField<36, 1, u64> psl; | ||||
|         BitField<37, 1, u64> mrg; | ||||
|         BitField<38, 1, u64> x; | ||||
|         BitField<39, 8, IR::Reg> src_c; | ||||
|         BitField<50, 3, SelectMode> select_mode; | ||||
|     } const xmad{insn}; | ||||
|  | ||||
|     const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))}; | ||||
|     const IR::U32 src_c{X(xmad.src_c)}; | ||||
|     XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0, | ||||
|          xmad.x != 0); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -10,14 +10,33 @@ | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class StoreSize : u64 { | ||||
|     U8, | ||||
|     S8, | ||||
|     U16, | ||||
|     S16, | ||||
| enum class LoadSize : u64 { | ||||
|     U8,  // Zero-extend | ||||
|     S8,  // Sign-extend | ||||
|     U16, // Zero-extend | ||||
|     S16, // Sign-extend | ||||
|     B32, | ||||
|     B64, | ||||
|     B128, | ||||
|     U128, // ??? | ||||
| }; | ||||
|  | ||||
| enum class StoreSize : u64 { | ||||
|     U8,  // Zero-extend | ||||
|     S8,  // Sign-extend | ||||
|     U16, // Zero-extend | ||||
|     S16, // Sign-extend | ||||
|     B32, | ||||
|     B64, | ||||
|     B128, | ||||
| }; | ||||
|  | ||||
| // See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||||
| enum class LoadCache : u64 { | ||||
|     CA, // Cache at all levels, likely to be accessed again | ||||
|     CG, // Cache at global level (cache in L2 and below, not L1) | ||||
|     CI, // ??? | ||||
|     CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again) | ||||
| }; | ||||
|  | ||||
| // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html | ||||
| @@ -27,61 +46,137 @@ enum class StoreCache : u64 { | ||||
|     CS, // Cache streaming, likely to be accessed once | ||||
|     WT, // Cache write-through (to system memory) | ||||
| }; | ||||
|  | ||||
| IR::U64 Address(TranslatorVisitor& v, u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<8, 8, IR::Reg> addr_reg; | ||||
|         BitField<20, 24, s64> addr_offset; | ||||
|         BitField<20, 24, u64> rz_addr_offset; | ||||
|         BitField<45, 1, u64> e; | ||||
|     } const mem{insn}; | ||||
|  | ||||
|     const IR::U64 address{[&]() -> IR::U64 { | ||||
|         if (mem.e == 0) { | ||||
|             // LDG/STG without .E uses a 32-bit pointer, zero-extend it | ||||
|             return v.ir.ConvertU(64, v.X(mem.addr_reg)); | ||||
|         } | ||||
|         if (!IR::IsAligned(mem.addr_reg, 2)) { | ||||
|             throw NotImplementedException("Unaligned address register"); | ||||
|         } | ||||
|         // Pack two registers to build the 64-bit address | ||||
|         return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1))); | ||||
|     }()}; | ||||
|     const u64 addr_offset{[&]() -> u64 { | ||||
|         if (mem.addr_reg == IR::Reg::RZ) { | ||||
|             // When RZ is used, the address is an absolute address | ||||
|             return static_cast<u64>(mem.rz_addr_offset.Value()); | ||||
|         } else { | ||||
|             return static_cast<u64>(mem.addr_offset.Value()); | ||||
|         } | ||||
|     }()}; | ||||
|     // Apply the offset | ||||
|     return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::LDG(u64 insn) { | ||||
|     // LDG loads global memory into registers | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<46, 2, LoadCache> cache; | ||||
|         BitField<48, 3, LoadSize> size; | ||||
|     } const ldg{insn}; | ||||
|  | ||||
|     // Pointer to load data from | ||||
|     const IR::U64 address{Address(*this, insn)}; | ||||
|     const IR::Reg dest_reg{ldg.dest_reg}; | ||||
|     switch (ldg.size) { | ||||
|     case LoadSize::U8: | ||||
|         X(dest_reg, ir.LoadGlobalU8(address)); | ||||
|         break; | ||||
|     case LoadSize::S8: | ||||
|         X(dest_reg, ir.LoadGlobalS8(address)); | ||||
|         break; | ||||
|     case LoadSize::U16: | ||||
|         X(dest_reg, ir.LoadGlobalU16(address)); | ||||
|         break; | ||||
|     case LoadSize::S16: | ||||
|         X(dest_reg, ir.LoadGlobalS16(address)); | ||||
|         break; | ||||
|     case LoadSize::B32: | ||||
|         X(dest_reg, ir.LoadGlobal32(address)); | ||||
|         break; | ||||
|     case LoadSize::B64: { | ||||
|         if (!IR::IsAligned(dest_reg, 2)) { | ||||
|             throw NotImplementedException("Unaligned data registers"); | ||||
|         } | ||||
|         const IR::Value vector{ir.LoadGlobal64(address)}; | ||||
|         for (int i = 0; i < 2; ++i) { | ||||
|             X(dest_reg + i, ir.CompositeExtract(vector, i)); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case LoadSize::B128: { | ||||
|         if (!IR::IsAligned(dest_reg, 4)) { | ||||
|             throw NotImplementedException("Unaligned data registers"); | ||||
|         } | ||||
|         const IR::Value vector{ir.LoadGlobal128(address)}; | ||||
|         for (int i = 0; i < 4; ++i) { | ||||
|             X(dest_reg + i, ir.CompositeExtract(vector, i)); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case LoadSize::U128: | ||||
|         throw NotImplementedException("LDG U.128"); | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid LDG size {}", ldg.size.Value()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::STG(u64 insn) { | ||||
|     // STG stores registers into global memory. | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> data_reg; | ||||
|         BitField<8, 8, IR::Reg> addr_reg; | ||||
|         BitField<45, 1, u64> e; | ||||
|         BitField<46, 2, StoreCache> cache; | ||||
|         BitField<48, 3, StoreSize> size; | ||||
|     } const stg{insn}; | ||||
|  | ||||
|     const IR::U64 address{[&]() -> IR::U64 { | ||||
|         if (stg.e == 0) { | ||||
|             // STG without .E uses a 32-bit pointer, zero-extend it | ||||
|             return ir.ConvertU(64, X(stg.addr_reg)); | ||||
|         } | ||||
|         if (!IR::IsAligned(stg.addr_reg, 2)) { | ||||
|             throw NotImplementedException("Unaligned address register"); | ||||
|         } | ||||
|         // Pack two registers to build the 32-bit address | ||||
|         return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1))); | ||||
|     }()}; | ||||
|  | ||||
|     // Pointer to store data into | ||||
|     const IR::U64 address{Address(*this, insn)}; | ||||
|     const IR::Reg data_reg{stg.data_reg}; | ||||
|     switch (stg.size) { | ||||
|     case StoreSize::U8: | ||||
|         ir.WriteGlobalU8(address, X(stg.data_reg)); | ||||
|         ir.WriteGlobalU8(address, X(data_reg)); | ||||
|         break; | ||||
|     case StoreSize::S8: | ||||
|         ir.WriteGlobalS8(address, X(stg.data_reg)); | ||||
|         ir.WriteGlobalS8(address, X(data_reg)); | ||||
|         break; | ||||
|     case StoreSize::U16: | ||||
|         ir.WriteGlobalU16(address, X(stg.data_reg)); | ||||
|         ir.WriteGlobalU16(address, X(data_reg)); | ||||
|         break; | ||||
|     case StoreSize::S16: | ||||
|         ir.WriteGlobalS16(address, X(stg.data_reg)); | ||||
|         ir.WriteGlobalS16(address, X(data_reg)); | ||||
|         break; | ||||
|     case StoreSize::B32: | ||||
|         ir.WriteGlobal32(address, X(stg.data_reg)); | ||||
|         ir.WriteGlobal32(address, X(data_reg)); | ||||
|         break; | ||||
|     case StoreSize::B64: { | ||||
|         if (!IR::IsAligned(stg.data_reg, 2)) { | ||||
|         if (!IR::IsAligned(data_reg, 2)) { | ||||
|             throw NotImplementedException("Unaligned data registers"); | ||||
|         } | ||||
|         const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; | ||||
|         const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))}; | ||||
|         ir.WriteGlobal64(address, vector); | ||||
|         break; | ||||
|     } | ||||
|     case StoreSize::B128: | ||||
|         if (!IR::IsAligned(stg.data_reg, 4)) { | ||||
|         if (!IR::IsAligned(data_reg, 4)) { | ||||
|             throw NotImplementedException("Unaligned data registers"); | ||||
|         } | ||||
|         const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), | ||||
|                                                      X(stg.data_reg + 2), X(stg.data_reg + 3))}; | ||||
|         const IR::Value vector{ | ||||
|             ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))}; | ||||
|         ir.WriteGlobal128(address, vector); | ||||
|         break; | ||||
|     } | ||||
|   | ||||
| @@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) { | ||||
| void TranslatorVisitor::MOV_imm(u64 insn) { | ||||
|     const MOV mov{insn}; | ||||
|     CheckMask(mov); | ||||
|     X(mov.dest_reg, GetImm(insn)); | ||||
|     X(mov.dest_reg, GetImm20(insn)); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
| @@ -0,0 +1,114 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class SpecialRegister : u64 { | ||||
|     SR_LANEID = 0, | ||||
|     SR_VIRTCFG = 2, | ||||
|     SR_VIRTID = 3, | ||||
|     SR_PM0 = 4, | ||||
|     SR_PM1 = 5, | ||||
|     SR_PM2 = 6, | ||||
|     SR_PM3 = 7, | ||||
|     SR_PM4 = 8, | ||||
|     SR_PM5 = 9, | ||||
|     SR_PM6 = 10, | ||||
|     SR_PM7 = 11, | ||||
|     SR_ORDERING_TICKET = 15, | ||||
|     SR_PRIM_TYPE = 16, | ||||
|     SR_INVOCATION_ID = 17, | ||||
|     SR_Y_DIRECTION = 18, | ||||
|     SR_THREAD_KILL = 19, | ||||
|     SM_SHADER_TYPE = 20, | ||||
|     SR_DIRECTCBEWRITEADDRESSLOW = 21, | ||||
|     SR_DIRECTCBEWRITEADDRESSHIGH = 22, | ||||
|     SR_DIRECTCBEWRITEENABLE = 23, | ||||
|     SR_MACHINE_ID_0 = 24, | ||||
|     SR_MACHINE_ID_1 = 25, | ||||
|     SR_MACHINE_ID_2 = 26, | ||||
|     SR_MACHINE_ID_3 = 27, | ||||
|     SR_AFFINITY = 28, | ||||
|     SR_INVOCATION_INFO = 29, | ||||
|     SR_WSCALEFACTOR_XY = 30, | ||||
|     SR_WSCALEFACTOR_Z = 31, | ||||
|     SR_TID = 32, | ||||
|     SR_TID_X = 33, | ||||
|     SR_TID_Y = 34, | ||||
|     SR_TID_Z = 35, | ||||
|     SR_CTAID_X = 37, | ||||
|     SR_CTAID_Y = 38, | ||||
|     SR_CTAID_Z = 39, | ||||
|     SR_NTID = 49, | ||||
|     SR_CirQueueIncrMinusOne = 50, | ||||
|     SR_NLATC = 51, | ||||
|     SR_SWINLO = 57, | ||||
|     SR_SWINSZ = 58, | ||||
|     SR_SMEMSZ = 59, | ||||
|     SR_SMEMBANKS = 60, | ||||
|     SR_LWINLO = 61, | ||||
|     SR_LWINSZ = 62, | ||||
|     SR_LMEMLOSZ = 63, | ||||
|     SR_LMEMHIOFF = 64, | ||||
|     SR_EQMASK = 65, | ||||
|     SR_LTMASK = 66, | ||||
|     SR_LEMASK = 67, | ||||
|     SR_GTMASK = 68, | ||||
|     SR_GEMASK = 69, | ||||
|     SR_REGALLOC = 70, | ||||
|     SR_GLOBALERRORSTATUS = 73, | ||||
|     SR_WARPERRORSTATUS = 75, | ||||
|     SR_PM_HI0 = 81, | ||||
|     SR_PM_HI1 = 82, | ||||
|     SR_PM_HI2 = 83, | ||||
|     SR_PM_HI3 = 84, | ||||
|     SR_PM_HI4 = 85, | ||||
|     SR_PM_HI5 = 86, | ||||
|     SR_PM_HI6 = 87, | ||||
|     SR_PM_HI7 = 88, | ||||
|     SR_CLOCKLO = 89, | ||||
|     SR_CLOCKHI = 90, | ||||
|     SR_GLOBALTIMERLO = 91, | ||||
|     SR_GLOBALTIMERHI = 92, | ||||
|     SR_HWTASKID = 105, | ||||
|     SR_CIRCULARQUEUEENTRYINDEX = 106, | ||||
|     SR_CIRCULARQUEUEENTRYADDRESSLOW = 107, | ||||
|     SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108, | ||||
| }; | ||||
|  | ||||
| [[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) { | ||||
|     switch (special_register) { | ||||
|     case SpecialRegister::SR_TID_X: | ||||
|         return ir.LocalInvocationIdX(); | ||||
|     case SpecialRegister::SR_TID_Y: | ||||
|         return ir.LocalInvocationIdY(); | ||||
|     case SpecialRegister::SR_TID_Z: | ||||
|         return ir.LocalInvocationIdZ(); | ||||
|     case SpecialRegister::SR_CTAID_X: | ||||
|         return ir.WorkgroupIdX(); | ||||
|     case SpecialRegister::SR_CTAID_Y: | ||||
|         return ir.WorkgroupIdY(); | ||||
|     case SpecialRegister::SR_CTAID_Z: | ||||
|         return ir.WorkgroupIdZ(); | ||||
|     default: | ||||
|         throw NotImplementedException("S2R special register {}", special_register); | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::S2R(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<20, 8, SpecialRegister> src_reg; | ||||
|     } const s2r{insn}; | ||||
|  | ||||
|     X(s2r.dest_reg, Read(ir, s2r.src_reg)); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -7,21 +7,8 @@ | ||||
| #include "shader_recompiler/frontend/maxwell/opcode.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
|  | ||||
| [[maybe_unused]] static inline void DumpOptimized(IR::Block& block) { | ||||
|     auto raw{IR::DumpBlock(block)}; | ||||
|  | ||||
|     Optimization::GetSetElimination(block); | ||||
|     Optimization::DeadCodeEliminationPass(block); | ||||
|     Optimization::IdentityRemovalPass(block); | ||||
|     auto dumped{IR::DumpBlock(block)}; | ||||
|  | ||||
|     fmt::print(stderr, "{}", dumped); | ||||
| } | ||||
|  | ||||
| [[noreturn]] static void ThrowNotImplemented(Opcode opcode) { | ||||
|     throw NotImplementedException("Instruction {} is not implemented", opcode); | ||||
| } | ||||
| @@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::DADD_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::DEPBAR(u64) { | ||||
|     ThrowNotImplemented(Opcode::DEPBAR); | ||||
| void TranslatorVisitor::DEPBAR() { | ||||
|     // DEPBAR is a no-op | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::DFMA_reg(u64) { | ||||
| @@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::F2F_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FADD_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::FADD_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FADD_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FADD32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::FADD32I); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FCHK_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FCHK_reg); | ||||
| } | ||||
| @@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FCMP_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FFMA_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_rc(u64) { | ||||
|     ThrowNotImplemented(Opcode::FFMA_rc); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_cr(u64) { | ||||
|     ThrowNotImplemented(Opcode::FFMA_cr); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FFMA_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FFMA32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::FFMA32I); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FLO_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FLO_reg); | ||||
| } | ||||
| @@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FMNMX_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FMUL_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::FMUL_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FMUL_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FMUL32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::FMUL32I); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::FSET_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSET_reg); | ||||
| } | ||||
| @@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::I2I_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::IADD_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::IADD_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::IADD_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD3_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::IADD3_reg); | ||||
| } | ||||
| @@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::IADD3_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::IADD32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::IADD32I); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ICMP_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::ICMP_reg); | ||||
| } | ||||
| @@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISBERD); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISCADD_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISCADD_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISCADD_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISCADD32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISCADD32I); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISET_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISET_reg); | ||||
| } | ||||
| @@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISET_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISETP_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISETP_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISETP_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISETP_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ISETP_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::ISETP_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::JCAL(u64) { | ||||
|     ThrowNotImplemented(Opcode::JCAL); | ||||
| } | ||||
| @@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) { | ||||
|     ThrowNotImplemented(Opcode::LDC); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::LDG(u64) { | ||||
|     ThrowNotImplemented(Opcode::LDG); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::LDL(u64) { | ||||
|     ThrowNotImplemented(Opcode::LDL); | ||||
| } | ||||
| @@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) { | ||||
|     ThrowNotImplemented(Opcode::RTT); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::S2R(u64) { | ||||
|     ThrowNotImplemented(Opcode::S2R); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SAM(u64) { | ||||
|     ThrowNotImplemented(Opcode::SAM); | ||||
| } | ||||
| @@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHFL); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SHL_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHL_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SHL_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHL_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SHL_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHL_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::SHR_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHR_reg); | ||||
| } | ||||
| @@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) { | ||||
|     ThrowNotImplemented(Opcode::VSHR); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::XMAD_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_rc(u64) { | ||||
|     ThrowNotImplemented(Opcode::XMAD_rc); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_cr(u64) { | ||||
|     ThrowNotImplemented(Opcode::XMAD_cr); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::XMAD_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::XMAD_imm); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
|   | ||||
| @@ -1,87 +0,0 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <array> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/microinstruction.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
|  | ||||
| namespace Shader::Optimization { | ||||
| namespace { | ||||
| using Iterator = IR::Block::iterator; | ||||
|  | ||||
| enum class TrackingType { | ||||
|     Reg, | ||||
| }; | ||||
|  | ||||
| struct RegisterInfo { | ||||
|     IR::Value register_value; | ||||
|     TrackingType tracking_type; | ||||
|     Iterator last_set_instruction; | ||||
|     bool set_instruction_present = false; | ||||
| }; | ||||
|  | ||||
| void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst, | ||||
|            TrackingType tracking_type) { | ||||
|     if (info.set_instruction_present) { | ||||
|         info.last_set_instruction->Invalidate(); | ||||
|         block.Instructions().erase(info.last_set_instruction); | ||||
|     } | ||||
|     info.register_value = value; | ||||
|     info.tracking_type = tracking_type; | ||||
|     info.set_instruction_present = true; | ||||
|     info.last_set_instruction = set_inst; | ||||
| } | ||||
|  | ||||
| RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) { | ||||
|     RegisterInfo info{}; | ||||
|     info.register_value = IR::Value{&*get_inst}; | ||||
|     info.tracking_type = tracking_type; | ||||
|     return info; | ||||
| } | ||||
|  | ||||
| void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) { | ||||
|     if (info.register_value.IsEmpty()) { | ||||
|         info = Nothing(get_inst, tracking_type); | ||||
|         return; | ||||
|     } | ||||
|     if (info.tracking_type == tracking_type) { | ||||
|         get_inst->ReplaceUsesWith(info.register_value); | ||||
|         return; | ||||
|     } | ||||
|     info = Nothing(get_inst, tracking_type); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void GetSetElimination(IR::Block& block) { | ||||
|     std::array<RegisterInfo, 255> reg_info; | ||||
|  | ||||
|     for (Iterator inst = block.begin(); inst != block.end(); ++inst) { | ||||
|         switch (inst->Opcode()) { | ||||
|         case IR::Opcode::GetRegister: { | ||||
|             const IR::Reg reg{inst->Arg(0).Reg()}; | ||||
|             if (reg == IR::Reg::RZ) { | ||||
|                 break; | ||||
|             } | ||||
|             const size_t index{static_cast<size_t>(reg)}; | ||||
|             DoGet(reg_info.at(index), inst, TrackingType::Reg); | ||||
|             break; | ||||
|         } | ||||
|         case IR::Opcode::SetRegister: { | ||||
|             const IR::Reg reg{inst->Arg(0).Reg()}; | ||||
|             if (reg == IR::Reg::RZ) { | ||||
|                 break; | ||||
|             } | ||||
|             const size_t index{static_cast<size_t>(reg)}; | ||||
|             DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg); | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Optimization | ||||
| @@ -17,7 +17,6 @@ void Invoke(Func&& func, IR::Function& function) { | ||||
| } | ||||
|  | ||||
| void DeadCodeEliminationPass(IR::Block& block); | ||||
| void GetSetElimination(IR::Block& block); | ||||
| void IdentityRemovalPass(IR::Block& block); | ||||
| void SsaRewritePass(IR::Function& function); | ||||
| void VerificationPass(const IR::Block& block); | ||||
|   | ||||
| @@ -51,7 +51,8 @@ void RunDatabase() { | ||||
| int main() { | ||||
|     // RunDatabase(); | ||||
|  | ||||
|     FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; | ||||
|     // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; | ||||
|     FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; | ||||
|     auto cfg{std::make_unique<Flow::CFG>(env, 0)}; | ||||
|     // fmt::print(stdout, "{}\n", cfg->Dot()); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user