From 17a82b56d74afcebaad78ce4754d8ee99ea66f93 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 15 Mar 2021 04:54:43 -0300
Subject: [PATCH] shader: Implement TEXS

---
 src/shader_recompiler/CMakeLists.txt          |   3 +-
 .../backend/spirv/emit_spirv_image.cpp        |  11 +-
 .../frontend/ir/ir_emitter.cpp                |   8 +
 .../frontend/ir/ir_emitter.h                  |   3 +
 src/shader_recompiler/frontend/ir/modifiers.h |   3 +-
 .../translate/impl/not_implemented.cpp        |   4 -
 .../{texture_sample.cpp => texture_fetch.cpp} |   0
 .../translate/impl/texture_fetch_swizzled.cpp | 262 ++++++++++++++++++
 8 files changed, 287 insertions(+), 7 deletions(-)
 rename src/shader_recompiler/frontend/maxwell/translate/impl/{texture_sample.cpp => texture_fetch.cpp} (100%)
 create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp

diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt
index 20409e09a..97e9b4c8e 100644
--- a/src/shader_recompiler/CMakeLists.txt
+++ b/src/shader_recompiler/CMakeLists.txt
@@ -102,7 +102,8 @@ add_library(shader_recompiler STATIC
     frontend/maxwell/translate/impl/predicate_set_predicate.cpp
     frontend/maxwell/translate/impl/predicate_set_register.cpp
     frontend/maxwell/translate/impl/select_source_with_predicate.cpp
-    frontend/maxwell/translate/impl/texture_sample.cpp
+    frontend/maxwell/translate/impl/texture_fetch.cpp
+    frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
     frontend/maxwell/translate/translate.cpp
     frontend/maxwell/translate/translate.h
     ir_opt/collect_shader_info_pass.cpp
diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
index 5f4783c95..f75152911 100644
--- a/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
@@ -57,18 +57,27 @@ Id Texture(EmitContext& ctx, const IR::Value& index) {
     throw NotImplementedException("Indirect texture sample");
 }
 
+Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
+    const auto info{inst->Flags<IR::TextureInstInfo>()};
+    if (info.relaxed_precision != 0) {
+        ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
+    }
+    return sample;
+}
+
 template <typename MethodPtrType, typename... Args>
 Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
         Id result_type, Args&&... args) {
     IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
     if (!sparse) {
-        return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...);
+        return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
     }
     const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
     const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
     const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
     sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
     sparse->Invalidate();
+    Decorate(ctx, inst, sample);
     return ctx.OpCompositeExtract(result_type, sample, 1U);
 }
 } // Anonymous namespace
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.cpp b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
index 556961fa4..d94596ee9 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.cpp
@@ -512,6 +512,14 @@ Value IREmitter::UnpackFloat2x16(const U32& value) {
     return Inst(Opcode::UnpackFloat2x16, value);
 }
 
+U32 IREmitter::PackHalf2x16(const Value& vector) {
+    return Inst<U32>(Opcode::PackHalf2x16, vector);
+}
+
+Value IREmitter::UnpackHalf2x16(const U32& value) {
+    return Inst(Opcode::UnpackHalf2x16, value);
+}
+
 F64 IREmitter::PackDouble2x32(const Value& vector) {
     return Inst<F64>(Opcode::PackDouble2x32, vector);
 }
diff --git a/src/shader_recompiler/frontend/ir/ir_emitter.h b/src/shader_recompiler/frontend/ir/ir_emitter.h
index 74fb3dbcb..27ff5a29d 100644
--- a/src/shader_recompiler/frontend/ir/ir_emitter.h
+++ b/src/shader_recompiler/frontend/ir/ir_emitter.h
@@ -115,6 +115,9 @@ public:
     [[nodiscard]] U32 PackFloat2x16(const Value& vector);
     [[nodiscard]] Value UnpackFloat2x16(const U32& value);
 
+    [[nodiscard]] U32 PackHalf2x16(const Value& vector);
+    [[nodiscard]] Value UnpackHalf2x16(const U32& value);
+
     [[nodiscard]] F64 PackDouble2x32(const Value& vector);
     [[nodiscard]] Value UnpackDouble2x32(const F64& value);
 
diff --git a/src/shader_recompiler/frontend/ir/modifiers.h b/src/shader_recompiler/frontend/ir/modifiers.h
index ad07700ae..308c00153 100644
--- a/src/shader_recompiler/frontend/ir/modifiers.h
+++ b/src/shader_recompiler/frontend/ir/modifiers.h
@@ -36,7 +36,8 @@ union TextureInstInfo {
     u32 raw;
     BitField<0, 8, TextureType> type;
     BitField<8, 1, u32> has_bias;
-    BitField<16, 1, u32> has_lod_clamp;
+    BitField<9, 1, u32> has_lod_clamp;
+    BitField<10, 1, u32> relaxed_precision;
 };
 static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
 
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
index 92da5c7e8..9aa7b836c 100644
--- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
@@ -553,10 +553,6 @@ void TranslatorVisitor::SYNC(u64) {
     ThrowNotImplemented(Opcode::SYNC);
 }
 
-void TranslatorVisitor::TEXS(u64) {
-    ThrowNotImplemented(Opcode::TEXS);
-}
-
 void TranslatorVisitor::TLD(u64) {
     ThrowNotImplemented(Opcode::TLD);
 }
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
similarity index 100%
rename from src/shader_recompiler/frontend/maxwell/translate/impl/texture_sample.cpp
rename to src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch.cpp
diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
new file mode 100644
index 000000000..ac1615b00
--- /dev/null
+++ b/src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
@@ -0,0 +1,262 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <utility>
+
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "shader_recompiler/frontend/ir/modifiers.h"
+#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
+
+namespace Shader::Maxwell {
+namespace {
+enum class Precision : u64 {
+    F16,
+    F32,
+};
+
+union Encoding {
+    u64 raw;
+    BitField<59, 1, Precision> precision;
+    BitField<53, 4, u64> encoding;
+    BitField<49, 1, u64> nodep;
+    BitField<28, 8, IR::Reg> dest_reg_b;
+    BitField<0, 8, IR::Reg> dest_reg_a;
+    BitField<8, 8, IR::Reg> src_reg_a;
+    BitField<20, 8, IR::Reg> src_reg_b;
+    BitField<36, 13, u64> cbuf_offset;
+    BitField<50, 3, u64> swizzle;
+};
+
+constexpr unsigned R = 1;
+constexpr unsigned G = 2;
+constexpr unsigned B = 4;
+constexpr unsigned A = 8;
+
+constexpr std::array RG_LUT{
+    R,     //
+    G,     //
+    B,     //
+    A,     //
+    R | G, //
+    R | A, //
+    G | A, //
+    B | A, //
+};
+
+constexpr std::array RGBA_LUT{
+    R | G | B,     //
+    R | G | A,     //
+    R | B | A,     //
+    G | B | A,     //
+    R | G | B | A, //
+};
+
+void CheckAlignment(IR::Reg reg, int alignment) {
+    if (!IR::IsAligned(reg, alignment)) {
+        throw NotImplementedException("Unaligned source register {}", reg);
+    }
+}
+
+template <typename... Args>
+IR::Value Composite(TranslatorVisitor& v, Args... regs) {
+    return v.ir.CompositeConstruct(v.F(regs)...);
+}
+
+IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
+    return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
+}
+
+IR::Value Sample(TranslatorVisitor& v, u64 insn) {
+    const Encoding texs{insn};
+    const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))};
+    const IR::F32 zero{v.ir.Imm32(0.0f)};
+    const IR::Reg reg_a{texs.src_reg_a};
+    const IR::Reg reg_b{texs.src_reg_b};
+    IR::TextureInstInfo info{};
+    if (texs.precision == Precision::F16) {
+        info.relaxed_precision.Assign(1);
+    }
+    switch (texs.encoding) {
+    case 0: // 1D.LZ
+        info.type.Assign(TextureType::Color1D);
+        return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info);
+    case 1: // 2D
+        info.type.Assign(TextureType::Color2D);
+        return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
+    case 2: // 2D.LZ
+        info.type.Assign(TextureType::Color2D);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info);
+    case 3: // 2D.LL
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color2D);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
+                                           {}, info);
+    case 4: // 2D.DC
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Shadow2D);
+        return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+                                               {}, {}, {}, info);
+    case 5: // 2D.LL.DC
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        info.type.Assign(TextureType::Shadow2D);
+        return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
+                                               v.F(reg_b + 1), v.F(reg_b), {}, {}, info);
+    case 6: // 2D.LZ.DC
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Shadow2D);
+        return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
+                                               zero, {}, {}, info);
+    case 7: // ARRAY_2D
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::ColorArray2D);
+        return v.ir.ImageSampleImplicitLod(
+            handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+            {}, {}, {}, info);
+    case 8: // ARRAY_2D.LZ
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::ColorArray2D);
+        return v.ir.ImageSampleExplicitLod(
+            handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+            zero, {}, {}, info);
+    case 9: // ARRAY_2D.LZ.DC
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        info.type.Assign(TextureType::ShadowArray2D);
+        return v.ir.ImageSampleDrefExplicitLod(
+            handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
+            v.F(reg_b + 1), zero, {}, {}, info);
+    case 10: // 3D
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color3D);
+        return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+                                           {}, info);
+    case 11: // 3D.LZ
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::Color3D);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
+                                           {}, info);
+    case 12: // CUBE
+        CheckAlignment(reg_a, 2);
+        info.type.Assign(TextureType::ColorCube);
+        return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
+                                           {}, info);
+    case 13: // CUBE.LL
+        CheckAlignment(reg_a, 2);
+        CheckAlignment(reg_b, 2);
+        info.type.Assign(TextureType::ColorCube);
+        return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
+                                           v.F(reg_b + 1), {}, {}, info);
+    default:
+        throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
+    }
+}
+
+unsigned Swizzle(u64 insn) {
+    const Encoding texs{insn};
+    const size_t encoding{texs.swizzle};
+    if (texs.dest_reg_b == IR::Reg::RZ) {
+        if (encoding >= RG_LUT.size()) {
+            throw NotImplementedException("Illegal RG encoding {}", encoding);
+        }
+        return RG_LUT[encoding];
+    } else {
+        if (encoding >= RGBA_LUT.size()) {
+            throw NotImplementedException("Illegal RGBA encoding {}", encoding);
+        }
+        return RGBA_LUT[encoding];
+    }
+}
+
+IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
+    const bool is_shadow{sample.Type() == IR::Type::F32};
+    if (is_shadow) {
+        const bool is_alpha{component == 3};
+        return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
+    } else {
+        return IR::F32{v.ir.CompositeExtract(sample, component)};
+    }
+}
+
+IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
+    const Encoding texs{insn};
+    switch (index) {
+    case 0:
+        return texs.dest_reg_a;
+    case 1:
+        CheckAlignment(texs.dest_reg_a, 2);
+        return texs.dest_reg_a + 1;
+    case 2:
+        return texs.dest_reg_b;
+    case 3:
+        CheckAlignment(texs.dest_reg_b, 2);
+        return texs.dest_reg_b + 1;
+    }
+    throw LogicError("Invalid store index {}", index);
+}
+
+void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    const unsigned swizzle{Swizzle(insn)};
+    unsigned store_index{0};
+    for (unsigned component = 0; component < 4; ++component) {
+        if (((swizzle >> component) & 1) == 0) {
+            continue;
+        }
+        const IR::Reg dest{RegStoreComponent32(insn, store_index)};
+        v.F(dest, Extract(v, sample, component));
+        ++store_index;
+    }
+}
+
+IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
+    return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
+}
+
+void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
+    const unsigned swizzle{Swizzle(insn)};
+    unsigned store_index{0};
+    std::array<IR::F32, 4> swizzled;
+    for (unsigned component = 0; component < 4; ++component) {
+        if (((swizzle >> component) & 1) == 0) {
+            continue;
+        }
+        swizzled[store_index] = Extract(v, sample, component);
+        ++store_index;
+    }
+    const IR::F32 zero{v.ir.Imm32(0.0f)};
+    const Encoding texs{insn};
+    switch (store_index) {
+    case 1:
+        v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
+        break;
+    case 2:
+    case 3:
+    case 4:
+        v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
+        switch (store_index) {
+        case 2:
+            break;
+        case 3:
+            v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
+            break;
+        case 4:
+            v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
+            break;
+        }
+        break;
+    }
+}
+} // Anonymous namespace
+
+void TranslatorVisitor::TEXS(u64 insn) {
+    const IR::Value sample{Sample(*this, insn)};
+    if (Encoding{insn}.precision == Precision::F32) {
+        Store32(*this, insn, sample);
+    } else {
+        Store16(*this, insn, sample);
+    }
+}
+
+} // namespace Shader::Maxwell