shader: Implement HADD2
This commit is contained in:
		| @@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/maxwell/translate/impl/floating_point_multi_function.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_multiply.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | ||||
|     frontend/maxwell/translate/impl/half_floating_point_add.cpp | ||||
|     frontend/maxwell/translate/impl/impl.cpp | ||||
|     frontend/maxwell/translate/impl/impl.h | ||||
|     frontend/maxwell/translate/impl/integer_add.cpp | ||||
|   | ||||
| @@ -90,24 +90,36 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||
| Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); | ||||
| void EmitCompositeConstructF16x2(EmitContext& ctx); | ||||
| void EmitCompositeConstructF16x3(EmitContext& ctx); | ||||
| void EmitCompositeConstructF16x4(EmitContext& ctx); | ||||
| Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2); | ||||
| Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||||
| Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||
| Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); | ||||
| void EmitCompositeConstructF32x2(EmitContext& ctx); | ||||
| void EmitCompositeConstructF32x3(EmitContext& ctx); | ||||
| void EmitCompositeConstructF32x4(EmitContext& ctx); | ||||
| Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2); | ||||
| Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3); | ||||
| Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4); | ||||
| Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); | ||||
| Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| void EmitCompositeConstructF64x2(EmitContext& ctx); | ||||
| void EmitCompositeConstructF64x3(EmitContext& ctx); | ||||
| void EmitCompositeConstructF64x4(EmitContext& ctx); | ||||
| void EmitCompositeExtractF64x2(EmitContext& ctx); | ||||
| void EmitCompositeExtractF64x3(EmitContext& ctx); | ||||
| void EmitCompositeExtractF64x4(EmitContext& ctx); | ||||
| Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index); | ||||
| Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||||
| Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||||
| Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); | ||||
| @@ -270,5 +282,9 @@ Id EmitConvertU64F32(EmitContext& ctx, Id value); | ||||
| Id EmitConvertU64F64(EmitContext& ctx, Id value); | ||||
| Id EmitConvertU64U32(EmitContext& ctx, Id value); | ||||
| Id EmitConvertU32U64(EmitContext& ctx, Id value); | ||||
| Id EmitConvertF16F32(EmitContext& ctx, Id value); | ||||
| Id EmitConvertF32F16(EmitContext& ctx, Id value); | ||||
| Id EmitConvertF32F64(EmitContext& ctx, Id value); | ||||
| Id EmitConvertF64F32(EmitContext& ctx, Id value); | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -30,16 +30,28 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) { | ||||
|     return ctx.OpCompositeExtract(ctx.U32[1], composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF16x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF16x3(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF16x4(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) { | ||||
|     return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||||
|     return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||||
|     return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { | ||||
| @@ -54,16 +66,28 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) { | ||||
|     return ctx.OpCompositeExtract(ctx.F16[1], composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF32x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF32x3(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF32x4(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) { | ||||
|     return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) { | ||||
|     return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) { | ||||
|     return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { | ||||
| @@ -78,6 +102,18 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) { | ||||
|     return ctx.OpCompositeExtract(ctx.F32[1], composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index); | ||||
| } | ||||
|  | ||||
| void EmitCompositeConstructF64x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
| @@ -102,4 +138,16 @@ void EmitCompositeExtractF64x4(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index); | ||||
| } | ||||
|  | ||||
| Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) { | ||||
|     return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -86,4 +86,20 @@ Id EmitConvertU32U64(EmitContext& ctx, Id value) { | ||||
|     return ctx.OpUConvert(ctx.U32[1], value); | ||||
| } | ||||
|  | ||||
| Id EmitConvertF16F32(EmitContext& ctx, Id value) { | ||||
|     return ctx.OpFConvert(ctx.F16[1], value); | ||||
| } | ||||
|  | ||||
| Id EmitConvertF32F16(EmitContext& ctx, Id value) { | ||||
|     return ctx.OpFConvert(ctx.F32[1], value); | ||||
| } | ||||
|  | ||||
| Id EmitConvertF32F64(EmitContext& ctx, Id value) { | ||||
|     return ctx.OpFConvert(ctx.F32[1], value); | ||||
| } | ||||
|  | ||||
| Id EmitConvertF64F32(EmitContext& ctx, Id value) { | ||||
|     return ctx.OpFConvert(ctx.F64[1], value); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu | ||||
| } | ||||
|  | ||||
| Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | ||||
|     const auto read = [&](Opcode opcode, size_t limit) -> Value { | ||||
|     const auto read{[&](Opcode opcode, size_t limit) -> Value { | ||||
|         if (element >= limit) { | ||||
|             throw InvalidArgument("Out of bounds element {}", element); | ||||
|         } | ||||
|         return Inst(opcode, vector, Value{static_cast<u32>(element)}); | ||||
|     }; | ||||
|     }}; | ||||
|     switch (vector.Type()) { | ||||
|     case Type::U32x2: | ||||
|         return read(Opcode::CompositeExtractU32x2, 2); | ||||
| @@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) { | ||||
|     const auto insert{[&](Opcode opcode, size_t limit) { | ||||
|         if (element >= limit) { | ||||
|             throw InvalidArgument("Out of bounds element {}", element); | ||||
|         } | ||||
|         return Inst(opcode, vector, object, Value{static_cast<u32>(element)}); | ||||
|     }}; | ||||
|     switch (vector.Type()) { | ||||
|     case Type::U32x2: | ||||
|         return insert(Opcode::CompositeInsertU32x2, 2); | ||||
|     case Type::U32x3: | ||||
|         return insert(Opcode::CompositeInsertU32x3, 3); | ||||
|     case Type::U32x4: | ||||
|         return insert(Opcode::CompositeInsertU32x4, 4); | ||||
|     case Type::F16x2: | ||||
|         return insert(Opcode::CompositeInsertF16x2, 2); | ||||
|     case Type::F16x3: | ||||
|         return insert(Opcode::CompositeInsertF16x3, 3); | ||||
|     case Type::F16x4: | ||||
|         return insert(Opcode::CompositeInsertF16x4, 4); | ||||
|     case Type::F32x2: | ||||
|         return insert(Opcode::CompositeInsertF32x2, 2); | ||||
|     case Type::F32x3: | ||||
|         return insert(Opcode::CompositeInsertF32x3, 3); | ||||
|     case Type::F32x4: | ||||
|         return insert(Opcode::CompositeInsertF32x4, 4); | ||||
|     case Type::F64x2: | ||||
|         return insert(Opcode::CompositeInsertF64x2, 2); | ||||
|     case Type::F64x3: | ||||
|         return insert(Opcode::CompositeInsertF64x3, 3); | ||||
|     case Type::F64x4: | ||||
|         return insert(Opcode::CompositeInsertF64x4, 4); | ||||
|     default: | ||||
|         ThrowInvalidType(vector.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { | ||||
|     if (true_value.Type() != false_value.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); | ||||
| @@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) { | ||||
| } | ||||
|  | ||||
| Value IREmitter::UnpackFloat2x16(const U32& value) { | ||||
|     return Inst<Value>(Opcode::UnpackFloat2x16, value); | ||||
|     return Inst(Opcode::UnpackFloat2x16, value); | ||||
| } | ||||
|  | ||||
| F64 IREmitter::PackDouble2x32(const Value& vector) { | ||||
| @@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | ||||
| U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) { | ||||
|     switch (result_bitsize) { | ||||
|     case 32: | ||||
|         switch (value.Type()) { | ||||
| @@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { | ||||
|     throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||||
| } | ||||
|  | ||||
| F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) { | ||||
|     switch (result_bitsize) { | ||||
|     case 16: | ||||
|         switch (value.Type()) { | ||||
|         case Type::F16: | ||||
|             // Nothing to do | ||||
|             return value; | ||||
|         case Type::F32: | ||||
|             return Inst<F16>(Opcode::ConvertF16F32, value); | ||||
|         case Type::F64: | ||||
|             throw LogicError("Illegal conversion from F64 to F16"); | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|         break; | ||||
|     case 32: | ||||
|         switch (value.Type()) { | ||||
|         case Type::F16: | ||||
|             return Inst<F32>(Opcode::ConvertF32F16, value); | ||||
|         case Type::F32: | ||||
|             // Nothing to do | ||||
|             return value; | ||||
|         case Type::F64: | ||||
|             return Inst<F64>(Opcode::ConvertF32F64, value); | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|         break; | ||||
|     case 64: | ||||
|         switch (value.Type()) { | ||||
|         case Type::F16: | ||||
|             throw LogicError("Illegal conversion from F16 to F64"); | ||||
|         case Type::F32: | ||||
|             // Nothing to do | ||||
|             return value; | ||||
|         case Type::F64: | ||||
|             return Inst<F64>(Opcode::ConvertF32F64, value); | ||||
|         default: | ||||
|             break; | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::IR | ||||
|   | ||||
| @@ -97,6 +97,7 @@ public: | ||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, | ||||
|                                            const Value& e4); | ||||
|     [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); | ||||
|     [[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element); | ||||
|  | ||||
|     [[nodiscard]] Value Select(const U1& condition, const Value& true_value, | ||||
|                                const Value& false_value); | ||||
| @@ -186,7 +187,8 @@ public: | ||||
|     [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); | ||||
|     [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); | ||||
|  | ||||
|     [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value); | ||||
|     [[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value); | ||||
|  | ||||
| private: | ||||
|     IR::Block::iterator insertion_point; | ||||
|   | ||||
| @@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4,                             U32x4,          U32, | ||||
| OPCODE(CompositeExtractU32x2,                               U32,            U32x2,          U32,                                            ) | ||||
| OPCODE(CompositeExtractU32x3,                               U32,            U32x3,          U32,                                            ) | ||||
| OPCODE(CompositeExtractU32x4,                               U32,            U32x4,          U32,                                            ) | ||||
| OPCODE(CompositeInsertU32x2,                                U32x2,          U32x2,          U32,            U32,                            ) | ||||
| OPCODE(CompositeInsertU32x3,                                U32x3,          U32x3,          U32,            U32,                            ) | ||||
| OPCODE(CompositeInsertU32x4,                                U32x4,          U32x4,          U32,            U32,                            ) | ||||
| OPCODE(CompositeConstructF16x2,                             F16x2,          F16,            F16,                                            ) | ||||
| OPCODE(CompositeConstructF16x3,                             F16x3,          F16,            F16,            F16,                            ) | ||||
| OPCODE(CompositeConstructF16x4,                             F16x4,          F16,            F16,            F16,            F16,            ) | ||||
| OPCODE(CompositeExtractF16x2,                               F16,            F16x2,          U32,                                            ) | ||||
| OPCODE(CompositeExtractF16x3,                               F16,            F16x3,          U32,                                            ) | ||||
| OPCODE(CompositeExtractF16x4,                               F16,            F16x4,          U32,                                            ) | ||||
| OPCODE(CompositeInsertF16x2,                                F16x2,          F16x2,          F16,            U32,                            ) | ||||
| OPCODE(CompositeInsertF16x3,                                F16x3,          F16x3,          F16,            U32,                            ) | ||||
| OPCODE(CompositeInsertF16x4,                                F16x4,          F16x4,          F16,            U32,                            ) | ||||
| OPCODE(CompositeConstructF32x2,                             F32x2,          F32,            F32,                                            ) | ||||
| OPCODE(CompositeConstructF32x3,                             F32x3,          F32,            F32,            F32,                            ) | ||||
| OPCODE(CompositeConstructF32x4,                             F32x4,          F32,            F32,            F32,            F32,            ) | ||||
| OPCODE(CompositeExtractF32x2,                               F32,            F32x2,          U32,                                            ) | ||||
| OPCODE(CompositeExtractF32x3,                               F32,            F32x3,          U32,                                            ) | ||||
| OPCODE(CompositeExtractF32x4,                               F32,            F32x4,          U32,                                            ) | ||||
| OPCODE(CompositeInsertF32x2,                                F32x2,          F32x2,          F32,            U32,                            ) | ||||
| OPCODE(CompositeInsertF32x3,                                F32x3,          F32x3,          F32,            U32,                            ) | ||||
| OPCODE(CompositeInsertF32x4,                                F32x4,          F32x4,          F32,            U32,                            ) | ||||
| OPCODE(CompositeConstructF64x2,                             F64x2,          F64,            F64,                                            ) | ||||
| OPCODE(CompositeConstructF64x3,                             F64x3,          F64,            F64,            F64,                            ) | ||||
| OPCODE(CompositeConstructF64x4,                             F64x4,          F64,            F64,            F64,            F64,            ) | ||||
| OPCODE(CompositeExtractF64x2,                               F64,            F64x2,          U32,                                            ) | ||||
| OPCODE(CompositeExtractF64x3,                               F64,            F64x3,          U32,                                            ) | ||||
| OPCODE(CompositeExtractF64x4,                               F64,            F64x4,          U32,                                            ) | ||||
| OPCODE(CompositeInsertF64x2,                                F64x2,          F64x2,          F64,            U32,                            ) | ||||
| OPCODE(CompositeInsertF64x3,                                F64x3,          F64x3,          F64,            U32,                            ) | ||||
| OPCODE(CompositeInsertF64x4,                                F64x4,          F64x4,          F64,            U32,                            ) | ||||
|  | ||||
| // Select operations | ||||
| OPCODE(SelectU8,                                            U8,             U1,             U8,             U8,                             ) | ||||
| @@ -277,6 +289,9 @@ OPCODE(ConvertU32F64,                                       U32,            F64, | ||||
| OPCODE(ConvertU64F16,                                       U64,            F16,                                                            ) | ||||
| OPCODE(ConvertU64F32,                                       U64,            F32,                                                            ) | ||||
| OPCODE(ConvertU64F64,                                       U64,            F64,                                                            ) | ||||
|  | ||||
| OPCODE(ConvertU64U32,                                       U64,            U32,                                                            ) | ||||
| OPCODE(ConvertU32U64,                                       U32,            U64,                                                            ) | ||||
| OPCODE(ConvertF16F32,                                       F16,            F32,                                                            ) | ||||
| OPCODE(ConvertF32F16,                                       F32,            F16,                                                            ) | ||||
| OPCODE(ConvertF32F64,                                       F32,            F64,                                                            ) | ||||
| OPCODE(ConvertF64F32,                                       F64,            F32,                                                            ) | ||||
|   | ||||
| @@ -0,0 +1,184 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class Merge : u64 { | ||||
|     H1_H0, | ||||
|     F32, | ||||
|     MRG_H0, | ||||
|     MRG_H1, | ||||
| }; | ||||
|  | ||||
| enum class Swizzle : u64 { | ||||
|     H1_H0, | ||||
|     F32, | ||||
|     H0_H0, | ||||
|     H1_H1, | ||||
| }; | ||||
|  | ||||
| std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) { | ||||
|     switch (swizzle) { | ||||
|     case Swizzle::H1_H0: { | ||||
|         const IR::Value vector{ir.UnpackFloat2x16(value)}; | ||||
|         return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}}; | ||||
|     } | ||||
|     case Swizzle::H0_H0: { | ||||
|         const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)}; | ||||
|         return {scalar, scalar}; | ||||
|     } | ||||
|     case Swizzle::H1_H1: { | ||||
|         const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)}; | ||||
|         return {scalar, scalar}; | ||||
|     } | ||||
|     case Swizzle::F32: { | ||||
|         const IR::F32 scalar{ir.BitCast<IR::F32>(value)}; | ||||
|         return {scalar, scalar}; | ||||
|     } | ||||
|     } | ||||
|     throw InvalidArgument("Invalid swizzle {}", swizzle); | ||||
| } | ||||
|  | ||||
| IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs, | ||||
|                     Merge merge) { | ||||
|     switch (merge) { | ||||
|     case Merge::H1_H0: | ||||
|         return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs)); | ||||
|     case Merge::F32: | ||||
|         return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs)); | ||||
|     case Merge::MRG_H0: | ||||
|     case Merge::MRG_H1: { | ||||
|         const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))}; | ||||
|         const bool h0{merge == Merge::MRG_H0}; | ||||
|         const IR::F16& insert{h0 ? lhs : rhs}; | ||||
|         return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1)); | ||||
|     } | ||||
|     } | ||||
|     throw InvalidArgument("Invalid merge {}", merge); | ||||
| } | ||||
|  | ||||
| void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a, | ||||
|            Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_a; | ||||
|     } const hadd2{insn}; | ||||
|  | ||||
|     auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)}; | ||||
|     auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; | ||||
|     const bool promotion{lhs_a.Type() != lhs_b.Type()}; | ||||
|     if (promotion) { | ||||
|         if (lhs_a.Type() == IR::Type::F16) { | ||||
|             lhs_a = v.ir.FPConvert(32, lhs_a); | ||||
|             rhs_a = v.ir.FPConvert(32, rhs_a); | ||||
|         } | ||||
|         if (lhs_b.Type() == IR::Type::F16) { | ||||
|             lhs_b = v.ir.FPConvert(32, lhs_b); | ||||
|             rhs_b = v.ir.FPConvert(32, rhs_b); | ||||
|         } | ||||
|     } | ||||
|     lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a); | ||||
|     rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a); | ||||
|  | ||||
|     lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b); | ||||
|     rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b); | ||||
|  | ||||
|     const IR::FpControl fp_control{ | ||||
|         .no_contraction{true}, | ||||
|         .rounding{IR::FpRounding::DontCare}, | ||||
|         .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||||
|     }; | ||||
|     IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)}; | ||||
|     IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)}; | ||||
|     if (sat) { | ||||
|         lhs = v.ir.FPSaturate(lhs); | ||||
|         rhs = v.ir.FPSaturate(rhs); | ||||
|     } | ||||
|     if (promotion) { | ||||
|         lhs = v.ir.FPConvert(16, lhs); | ||||
|         rhs = v.ir.FPConvert(16, rhs); | ||||
|     } | ||||
|     v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge)); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::HADD2_reg(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<49, 2, Merge> merge; | ||||
|         BitField<39, 1, u64> ftz; | ||||
|         BitField<32, 1, u64> sat; | ||||
|         BitField<43, 1, u64> neg_a; | ||||
|         BitField<44, 1, u64> abs_a; | ||||
|         BitField<47, 2, Swizzle> swizzle_a; | ||||
|         BitField<31, 1, u64> neg_b; | ||||
|         BitField<30, 1, u64> abs_b; | ||||
|         BitField<28, 2, Swizzle> swizzle_b; | ||||
|     } const hadd2{insn}; | ||||
|  | ||||
|     HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||||
|           hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b, | ||||
|           GetReg20(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_cbuf(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<49, 2, Merge> merge; | ||||
|         BitField<39, 1, u64> ftz; | ||||
|         BitField<52, 1, u64> sat; | ||||
|         BitField<43, 1, u64> neg_a; | ||||
|         BitField<44, 1, u64> abs_a; | ||||
|         BitField<47, 2, Swizzle> swizzle_a; | ||||
|         BitField<56, 1, u64> neg_b; | ||||
|         BitField<54, 1, u64> abs_b; | ||||
|     } const hadd2{insn}; | ||||
|  | ||||
|     HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||||
|           hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32, | ||||
|           GetCbuf(insn)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_imm(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<49, 2, Merge> merge; | ||||
|         BitField<39, 1, u64> ftz; | ||||
|         BitField<52, 1, u64> sat; | ||||
|         BitField<43, 1, u64> neg_a; | ||||
|         BitField<44, 1, u64> abs_a; | ||||
|         BitField<47, 2, Swizzle> swizzle_a; | ||||
|         BitField<56, 1, u64> neg_high; | ||||
|         BitField<30, 9, u64> high; | ||||
|         BitField<29, 1, u64> neg_low; | ||||
|         BitField<20, 9, u64> low; | ||||
|     } const hadd2{insn}; | ||||
|  | ||||
|     const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) | | ||||
|                   static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)}; | ||||
|     HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0, | ||||
|           hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_32I(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<55, 1, u64> ftz; | ||||
|         BitField<52, 1, u64> sat; | ||||
|         BitField<56, 1, u64> neg_a; | ||||
|         BitField<53, 2, Swizzle> swizzle_a; | ||||
|         BitField<20, 32, u64> imm32; | ||||
|     } const hadd2{insn}; | ||||
|  | ||||
|     const u32 imm{static_cast<u32>(hadd2.imm32)}; | ||||
|     HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0, | ||||
|           hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm)); | ||||
| } | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) { | ||||
|     const IR::U64 address{[&]() -> IR::U64 { | ||||
|         if (mem.e == 0) { | ||||
|             // LDG/STG without .E uses a 32-bit pointer, zero-extend it | ||||
|             return v.ir.ConvertU(64, v.X(mem.addr_reg)); | ||||
|             return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||||
|         } | ||||
|         if (!IR::IsAligned(mem.addr_reg, 2)) { | ||||
|             throw NotImplementedException("Unaligned address register"); | ||||
|   | ||||
| @@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) { | ||||
|     ThrowNotImplemented(Opcode::GETLMEMBASE); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::HADD2_reg); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::HADD2_cbuf); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::HADD2_imm); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HADD2_32I(u64) { | ||||
|     ThrowNotImplemented(Opcode::HADD2_32I); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::HFMA2_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::HFMA2_reg); | ||||
| } | ||||
|   | ||||
| @@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | ||||
|             offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); | ||||
|         } | ||||
|     } else { | ||||
|         offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); | ||||
|         offset = ir.UConvert(32, IR::U64{inst.Arg(0)}); | ||||
|     } | ||||
|     // Subtract the least significant 32 bits from the guest offset. The result is the storage | ||||
|     // buffer offset in bytes. | ||||
|   | ||||
| @@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) { | ||||
|         return IR::Opcode::CompositeExtractF32x3; | ||||
|     case IR::Opcode::CompositeExtractF16x4: | ||||
|         return IR::Opcode::CompositeExtractF32x4; | ||||
|     case IR::Opcode::CompositeInsertF16x2: | ||||
|         return IR::Opcode::CompositeInsertF32x2; | ||||
|     case IR::Opcode::CompositeInsertF16x3: | ||||
|         return IR::Opcode::CompositeInsertF32x3; | ||||
|     case IR::Opcode::CompositeInsertF16x4: | ||||
|         return IR::Opcode::CompositeInsertF32x4; | ||||
|     case IR::Opcode::ConvertS16F16: | ||||
|         return IR::Opcode::ConvertS16F32; | ||||
|     case IR::Opcode::ConvertS32F16: | ||||
| @@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) { | ||||
|         return IR::Opcode::PackHalf2x16; | ||||
|     case IR::Opcode::UnpackFloat2x16: | ||||
|         return IR::Opcode::UnpackHalf2x16; | ||||
|     case IR::Opcode::ConvertF32F16: | ||||
|         return IR::Opcode::Identity; | ||||
|     case IR::Opcode::ConvertF16F32: | ||||
|         return IR::Opcode::Identity; | ||||
|     default: | ||||
|         return op; | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user