shader: Implement TLD
This commit is contained in:
		| @@ -133,6 +133,7 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp | ||||
|     frontend/maxwell/translate/impl/texture_gather_swizzled.cpp | ||||
|     frontend/maxwell/translate/impl/texture_gather.cpp | ||||
|     frontend/maxwell/translate/impl/texture_load.cpp | ||||
|     frontend/maxwell/translate/impl/texture_query.cpp | ||||
|     frontend/maxwell/translate/impl/video_helper.cpp | ||||
|     frontend/maxwell/translate/impl/video_helper.h | ||||
|   | ||||
| @@ -254,7 +254,7 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c | ||||
|     const auto info{inst->Flags<IR::TextureInstInfo>()}; | ||||
|     const ImageOperands operands(offset, lod, ms); | ||||
|     return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4], | ||||
|                 Texture(ctx, index), coords, operands.Mask(), operands.Span()); | ||||
|                 TextureImage(ctx, index), coords, operands.Mask(), operands.Span()); | ||||
| } | ||||
|  | ||||
| Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id lod) { | ||||
|   | ||||
| @@ -378,7 +378,7 @@ OPCODE(BindlessImageSampleDrefImplicitLod,                  F32,            U32, | ||||
| OPCODE(BindlessImageSampleDrefExplicitLod,                  F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| OPCODE(BindlessImageGather,                                 F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||
| OPCODE(BindlessImageGatherDref,                             F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            ) | ||||
| OPCODE(BindlessImageFetch,                                  F32x4,          U32,            Opaque,         U32,            U32,                            ) | ||||
| OPCODE(BindlessImageFetch,                                  F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         ) | ||||
| OPCODE(BindlessImageQueryDimensions,                        U32x4,          U32,            U32,                                                            ) | ||||
|  | ||||
| OPCODE(BoundImageSampleImplicitLod,                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||
| @@ -387,7 +387,7 @@ OPCODE(BoundImageSampleDrefImplicitLod,                     F32,            U32, | ||||
| OPCODE(BoundImageSampleDrefExplicitLod,                     F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| OPCODE(BoundImageGather,                                    F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||
| OPCODE(BoundImageGatherDref,                                F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            ) | ||||
| OPCODE(BoundImageFetch,                                     F32x4,          U32,            Opaque,         U32,            U32,                            ) | ||||
| OPCODE(BoundImageFetch,                                     F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         ) | ||||
| OPCODE(BoundImageQueryDimensions,                           U32x4,          U32,            U32,                                                            ) | ||||
|  | ||||
| OPCODE(ImageSampleImplicitLod,                              F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||
| @@ -396,7 +396,7 @@ OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32, | ||||
| OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| OPCODE(ImageGather,                                         F32x4,          U32,            Opaque,         Opaque,         Opaque,                         ) | ||||
| OPCODE(ImageGatherDref,                                     F32x4,          U32,            Opaque,         Opaque,         Opaque,         F32,            ) | ||||
| OPCODE(ImageFetch,                                          F32x4,          U32,            Opaque,         U32,            U32,                            ) | ||||
| OPCODE(ImageFetch,                                          F32x4,          U32,            Opaque,         Opaque,         Opaque,         Opaque,         ) | ||||
| OPCODE(ImageQueryDimensions,                                U32x4,          U32,            U32,                                                            ) | ||||
|  | ||||
| // Warp operations | ||||
|   | ||||
| @@ -252,8 +252,8 @@ INST(SYNC,         "SYNC",           "1111 0000 1111 1---") | ||||
| INST(TEX,          "TEX",            "1100 0--- ---- ----") | ||||
| INST(TEX_b,        "TEX (b)",        "1101 1110 10-- ----") | ||||
| INST(TEXS,         "TEXS",           "1101 -00- ---- ----") | ||||
| INST(TLD,          "TLD",            "1101 1100 --11 1---") | ||||
| INST(TLD_b,        "TLD (b)",        "1101 1101 --11 1---") | ||||
| INST(TLD,          "TLD",            "1101 1100 ---- ----") | ||||
| INST(TLD_b,        "TLD (b)",        "1101 1101 ---- ----") | ||||
| INST(TLD4,         "TLD4",           "1100 10-- ---- ----") | ||||
| INST(TLD4_b,       "TLD4 (b)",       "1101 1110 11-- ----") | ||||
| INST(TLD4S,        "TLD4S",          "1101 1111 -0-- ----") | ||||
|   | ||||
| @@ -313,14 +313,6 @@ void TranslatorVisitor::SYNC(u64) { | ||||
|     ThrowNotImplemented(Opcode::SYNC); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::TLD(u64) { | ||||
|     ThrowNotImplemented(Opcode::TLD); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::TLD_b(u64) { | ||||
|     ThrowNotImplemented(Opcode::TLD_b); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::TLDS(u64) { | ||||
|     ThrowNotImplemented(Opcode::TLDS); | ||||
| } | ||||
|   | ||||
| @@ -0,0 +1,165 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <optional> | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/ir/modifiers.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
|  | ||||
| enum class TextureType : u64 { | ||||
|     _1D, | ||||
|     ARRAY_1D, | ||||
|     _2D, | ||||
|     ARRAY_2D, | ||||
|     _3D, | ||||
|     ARRAY_3D, | ||||
|     CUBE, | ||||
|     ARRAY_CUBE, | ||||
| }; | ||||
|  | ||||
| Shader::TextureType GetType(TextureType type, bool dc) { | ||||
|     switch (type) { | ||||
|     case TextureType::_1D: | ||||
|         return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D; | ||||
|     case TextureType::ARRAY_1D: | ||||
|         return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D; | ||||
|     case TextureType::_2D: | ||||
|         return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D; | ||||
|     case TextureType::ARRAY_2D: | ||||
|         return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D; | ||||
|     case TextureType::_3D: | ||||
|         return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D; | ||||
|     case TextureType::ARRAY_3D: | ||||
|         throw NotImplementedException("3D array texture type"); | ||||
|     case TextureType::CUBE: | ||||
|         return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube; | ||||
|     case TextureType::ARRAY_CUBE: | ||||
|         return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube; | ||||
|     } | ||||
|     throw NotImplementedException("Invalid texture type {}", type); | ||||
| } | ||||
|  | ||||
| IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) { | ||||
|     const auto read_array{ | ||||
|         [&]() -> IR::U32 { return v.ir.BitFieldExtract(v.X(reg), v.ir.Imm32(0), v.ir.Imm32(16)); }}; | ||||
|     switch (type) { | ||||
|     case TextureType::_1D: | ||||
|         return v.X(reg); | ||||
|     case TextureType::ARRAY_1D: | ||||
|         return v.ir.CompositeConstruct(v.X(reg + 1), read_array()); | ||||
|     case TextureType::_2D: | ||||
|         return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1)); | ||||
|     case TextureType::ARRAY_2D: | ||||
|         return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), read_array()); | ||||
|     case TextureType::_3D: | ||||
|         return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||||
|     case TextureType::ARRAY_3D: | ||||
|         throw NotImplementedException("3D array texture type"); | ||||
|     case TextureType::CUBE: | ||||
|         return v.ir.CompositeConstruct(v.X(reg), v.X(reg + 1), v.X(reg + 2)); | ||||
|     case TextureType::ARRAY_CUBE: | ||||
|         return v.ir.CompositeConstruct(v.X(reg + 1), v.X(reg + 2), v.X(reg + 3), read_array()); | ||||
|     } | ||||
|     throw NotImplementedException("Invalid texture type {}", type); | ||||
| } | ||||
|  | ||||
| IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) { | ||||
|     const IR::U32 value{v.X(reg++)}; | ||||
|     switch (type) { | ||||
|     case TextureType::_1D: | ||||
|     case TextureType::ARRAY_1D: | ||||
|         return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true); | ||||
|     case TextureType::_2D: | ||||
|     case TextureType::ARRAY_2D: | ||||
|         return v.ir.CompositeConstruct( | ||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true)); | ||||
|     case TextureType::_3D: | ||||
|     case TextureType::ARRAY_3D: | ||||
|         return v.ir.CompositeConstruct( | ||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true), | ||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true), | ||||
|             v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4), true)); | ||||
|     case TextureType::CUBE: | ||||
|     case TextureType::ARRAY_CUBE: | ||||
|         throw NotImplementedException("Illegal offset on CUBE sample"); | ||||
|     } | ||||
|     throw NotImplementedException("Invalid texture type {}", type); | ||||
| } | ||||
|  | ||||
| void Impl(TranslatorVisitor& v, u64 insn, bool is_bindless) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<49, 1, u64> nodep; | ||||
|         BitField<55, 1, u64> lod; | ||||
|         BitField<50, 1, u64> multisample; | ||||
|         BitField<35, 1, u64> aoffi; | ||||
|         BitField<54, 1, u64> clamp; | ||||
|         BitField<51, 3, IR::Pred> sparse_pred; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> coord_reg; | ||||
|         BitField<20, 8, IR::Reg> meta_reg; | ||||
|         BitField<28, 3, TextureType> type; | ||||
|         BitField<31, 4, u64> mask; | ||||
|         BitField<36, 13, u64> cbuf_offset; | ||||
|     } const tld{insn}; | ||||
|  | ||||
|     const IR::Value coords{MakeCoords(v, tld.coord_reg, tld.type)}; | ||||
|  | ||||
|     IR::Reg meta_reg{tld.meta_reg}; | ||||
|     IR::Value handle; | ||||
|     IR::Value offset; | ||||
|     IR::U32 lod; | ||||
|     IR::U32 multisample; | ||||
|     if (!is_bindless) { | ||||
|         handle = v.ir.Imm32(static_cast<u32>(tld.cbuf_offset.Value() * 4)); | ||||
|     } else { | ||||
|         handle = v.X(meta_reg++); | ||||
|     } | ||||
|     if (tld.lod != 0) { | ||||
|         lod = v.X(meta_reg++); | ||||
|     } | ||||
|     if (tld.aoffi != 0) { | ||||
|         offset = MakeOffset(v, meta_reg, tld.type); | ||||
|     } | ||||
|     if (tld.multisample != 0) { | ||||
|         multisample = v.X(meta_reg++); | ||||
|     } | ||||
|     if (tld.clamp != 0) { | ||||
|         throw NotImplementedException("TLD.CL - CLAMP is not implmented"); | ||||
|     } | ||||
|     IR::TextureInstInfo info{}; | ||||
|     info.type.Assign(GetType(tld.type, false)); | ||||
|     const IR::Value sample{[&]() -> IR::Value { | ||||
|         return v.ir.ImageFetch(handle, coords, offset, lod, multisample, info); | ||||
|     }()}; | ||||
|  | ||||
|     IR::Reg dest_reg{tld.dest_reg}; | ||||
|     for (size_t element = 0; element < 4; ++element) { | ||||
|         if (((tld.mask >> element) & 1) == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         v.F(dest_reg, IR::F32{v.ir.CompositeExtract(sample, element)}); | ||||
|         ++dest_reg; | ||||
|     } | ||||
|     if (tld.sparse_pred != IR::Pred::PT) { | ||||
|         v.ir.SetPred(tld.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample))); | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::TLD(u64 insn) { | ||||
|     Impl(*this, insn, false); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::TLD_b(u64 insn) { | ||||
|     Impl(*this, insn, true); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -382,6 +382,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::ImageSampleDrefExplicitLod: | ||||
|     case IR::Opcode::ImageGather: | ||||
|     case IR::Opcode::ImageGatherDref: | ||||
|     case IR::Opcode::ImageFetch: | ||||
|     case IR::Opcode::ImageQueryDimensions: { | ||||
|         const TextureType type{inst.Flags<IR::TextureInstInfo>().type}; | ||||
|         info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D || | ||||
|   | ||||
| @@ -64,12 +64,11 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | ||||
|     } | ||||
|     const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); | ||||
|     if (it != map_ranges.end()) { | ||||
|         ASSERT(it->first == gpu_addr); | ||||
|         // ASSERT(it->first == gpu_addr); | ||||
|         map_ranges.erase(it); | ||||
|     } else { | ||||
|         UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); | ||||
|     } | ||||
|  | ||||
|     const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); | ||||
|  | ||||
|     for (const auto& map : submapped_ranges) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user