Merge pull request #2306 from ReinUsesLisp/aoffi
shader_ir: Implement AOFFI for TEX and TLD4
This commit is contained in:
		| @@ -21,6 +21,8 @@ | ||||
|  | ||||
| namespace OpenGL::GLShader { | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| using Tegra::Shader::Attribute; | ||||
| using Tegra::Shader::AttributeUse; | ||||
| using Tegra::Shader::Header; | ||||
| @@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||||
| using Operation = const OperationNode&; | ||||
|  | ||||
| enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||||
|  | ||||
| struct TextureAoffi {}; | ||||
| using TextureArgument = std::pair<Type, Node>; | ||||
| using TextureIR = std::variant<TextureAoffi, TextureArgument>; | ||||
|  | ||||
| enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | ||||
| constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | ||||
|     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | ||||
| constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | ||||
|     static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | ||||
|  | ||||
| enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||||
|  | ||||
| class ShaderWriter { | ||||
| public: | ||||
|     void AddExpression(std::string_view text) { | ||||
| @@ -91,7 +97,7 @@ private: | ||||
| }; | ||||
|  | ||||
| /// Generates code to use for a swizzle operation. | ||||
| static std::string GetSwizzle(u32 elem) { | ||||
| std::string GetSwizzle(u32 elem) { | ||||
|     ASSERT(elem <= 3); | ||||
|     std::string swizzle = "."; | ||||
|     swizzle += "xyzw"[elem]; | ||||
| @@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) { | ||||
| } | ||||
|  | ||||
| /// Translate topology | ||||
| static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||||
| std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||||
|     switch (topology) { | ||||
|     case Tegra::Shader::OutputTopology::PointList: | ||||
|         return "points"; | ||||
| @@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||||
| } | ||||
|  | ||||
| /// Returns true if an object has to be treated as precise | ||||
| static bool IsPrecise(Operation operand) { | ||||
| bool IsPrecise(Operation operand) { | ||||
|     const auto& meta = operand.GetMeta(); | ||||
|  | ||||
|     if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | ||||
| @@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) { | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| static bool IsPrecise(Node node) { | ||||
| bool IsPrecise(Node node) { | ||||
|     if (const auto operation = std::get_if<OperationNode>(node)) { | ||||
|         return IsPrecise(*operation); | ||||
|     } | ||||
| @@ -723,8 +729,8 @@ private: | ||||
|                                                          result_type)); | ||||
|     } | ||||
|  | ||||
|     std::string GenerateTexture(Operation operation, const std::string& func, | ||||
|                                 const std::vector<std::pair<Type, Node>>& extras) { | ||||
|     std::string GenerateTexture(Operation operation, const std::string& function_suffix, | ||||
|                                 const std::vector<TextureIR>& extras) { | ||||
|         constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | ||||
|  | ||||
|         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||||
| @@ -734,11 +740,11 @@ private: | ||||
|         const bool has_array = meta->sampler.IsArray(); | ||||
|         const bool has_shadow = meta->sampler.IsShadow(); | ||||
|  | ||||
|         std::string expr = func; | ||||
|         expr += '('; | ||||
|         expr += GetSampler(meta->sampler); | ||||
|         expr += ", "; | ||||
|  | ||||
|         std::string expr = "texture" + function_suffix; | ||||
|         if (!meta->aoffi.empty()) { | ||||
|             expr += "Offset"; | ||||
|         } | ||||
|         expr += '(' + GetSampler(meta->sampler) + ", "; | ||||
|         expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); | ||||
|         expr += '('; | ||||
|         for (std::size_t i = 0; i < count; ++i) { | ||||
| @@ -756,38 +762,76 @@ private: | ||||
|         } | ||||
|         expr += ')'; | ||||
|  | ||||
|         for (const auto& extra_pair : extras) { | ||||
|             const auto [type, operand] = extra_pair; | ||||
|             if (operand == nullptr) { | ||||
|                 continue; | ||||
|             } | ||||
|             expr += ", "; | ||||
|  | ||||
|             switch (type) { | ||||
|             case Type::Int: | ||||
|                 if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||||
|                     // Inline the string as an immediate integer in GLSL (some extra arguments are | ||||
|                     // required to be constant) | ||||
|                     expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||||
|                 } else { | ||||
|                     expr += "ftoi(" + Visit(operand) + ')'; | ||||
|                 } | ||||
|                 break; | ||||
|             case Type::Float: | ||||
|                 expr += Visit(operand); | ||||
|                 break; | ||||
|             default: { | ||||
|                 const auto type_int = static_cast<u32>(type); | ||||
|                 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||||
|                 expr += '0'; | ||||
|                 break; | ||||
|             } | ||||
|         for (const auto& variant : extras) { | ||||
|             if (const auto argument = std::get_if<TextureArgument>(&variant)) { | ||||
|                 expr += GenerateTextureArgument(*argument); | ||||
|             } else if (std::get_if<TextureAoffi>(&variant)) { | ||||
|                 expr += GenerateTextureAoffi(meta->aoffi); | ||||
|             } else { | ||||
|                 UNREACHABLE(); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return expr + ')'; | ||||
|     } | ||||
|  | ||||
|     std::string GenerateTextureArgument(TextureArgument argument) { | ||||
|         const auto [type, operand] = argument; | ||||
|         if (operand == nullptr) { | ||||
|             return {}; | ||||
|         } | ||||
|  | ||||
|         std::string expr = ", "; | ||||
|         switch (type) { | ||||
|         case Type::Int: | ||||
|             if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||||
|                 // Inline the string as an immediate integer in GLSL (some extra arguments are | ||||
|                 // required to be constant) | ||||
|                 expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||||
|             } else { | ||||
|                 expr += "ftoi(" + Visit(operand) + ')'; | ||||
|             } | ||||
|             break; | ||||
|         case Type::Float: | ||||
|             expr += Visit(operand); | ||||
|             break; | ||||
|         default: { | ||||
|             const auto type_int = static_cast<u32>(type); | ||||
|             UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||||
|             expr += '0'; | ||||
|             break; | ||||
|         } | ||||
|         } | ||||
|         return expr; | ||||
|     } | ||||
|  | ||||
|     std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||||
|         if (aoffi.empty()) { | ||||
|             return {}; | ||||
|         } | ||||
|         constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; | ||||
|         std::string expr = ", "; | ||||
|         expr += coord_constructors.at(aoffi.size() - 1); | ||||
|         expr += '('; | ||||
|  | ||||
|         for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||||
|             const auto operand{aoffi.at(index)}; | ||||
|             if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||||
|                 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||||
|                 // to be constant by the standard). | ||||
|                 expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||||
|             } else { | ||||
|                 expr += "ftoi(" + Visit(operand) + ')'; | ||||
|             } | ||||
|             if (index + 1 < aoffi.size()) { | ||||
|                 expr += ", "; | ||||
|             } | ||||
|         } | ||||
|         expr += ')'; | ||||
|  | ||||
|         return expr; | ||||
|     } | ||||
|  | ||||
|     std::string Assign(Operation operation) { | ||||
|         const Node dest = operation[0]; | ||||
|         const Node src = operation[1]; | ||||
| @@ -1164,7 +1208,8 @@ private: | ||||
|         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||||
|         ASSERT(meta); | ||||
|  | ||||
|         std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); | ||||
|         std::string expr = GenerateTexture( | ||||
|             operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); | ||||
|         if (meta->sampler.IsShadow()) { | ||||
|             expr = "vec4(" + expr + ')'; | ||||
|         } | ||||
| @@ -1175,7 +1220,8 @@ private: | ||||
|         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||||
|         ASSERT(meta); | ||||
|  | ||||
|         std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); | ||||
|         std::string expr = GenerateTexture( | ||||
|             operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); | ||||
|         if (meta->sampler.IsShadow()) { | ||||
|             expr = "vec4(" + expr + ')'; | ||||
|         } | ||||
| @@ -1187,7 +1233,8 @@ private: | ||||
|         ASSERT(meta); | ||||
|  | ||||
|         const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; | ||||
|         return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + | ||||
|         return GenerateTexture(operation, "Gather", | ||||
|                                {TextureArgument{type, meta->component}, TextureAoffi{}}) + | ||||
|                GetSwizzle(meta->element); | ||||
|     } | ||||
|  | ||||
| @@ -1217,8 +1264,8 @@ private: | ||||
|         ASSERT(meta); | ||||
|  | ||||
|         if (meta->element < 2) { | ||||
|             return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + | ||||
|                    " * vec2(256))" + GetSwizzle(meta->element) + "))"; | ||||
|             return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + | ||||
|                    GetSwizzle(meta->element) + "))"; | ||||
|         } | ||||
|         return "0"; | ||||
|     } | ||||
| @@ -1571,6 +1618,8 @@ private: | ||||
|     ShaderWriter code; | ||||
| }; | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| std::string GetCommonDeclarations() { | ||||
|     const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); | ||||
|     const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); | ||||
|   | ||||
| @@ -7,7 +7,9 @@ | ||||
| #include <fmt/format.h> | ||||
|  | ||||
| #include "common/assert.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
|  | ||||
| @@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||
|  | ||||
|     switch (opcode->get().GetId()) { | ||||
|     case OpCode::Id::TEX: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), | ||||
|                              "AOFFI is not implemented"); | ||||
|  | ||||
|         if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||||
|             LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||||
|         } | ||||
|  | ||||
|         const TextureType texture_type{instr.tex.texture_type}; | ||||
|         const bool is_array = instr.tex.array != 0; | ||||
|         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||||
|         const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||||
|         const auto process_mode = instr.tex.GetTextureProcessMode(); | ||||
|         WriteTexInstructionFloat( | ||||
|             bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); | ||||
|             bb, instr, | ||||
|             GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::TEXS: { | ||||
| @@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||
|     } | ||||
|     case OpCode::Id::TLD4: { | ||||
|         ASSERT(instr.tld4.array == 0); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), | ||||
|                              "AOFFI is not implemented"); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||||
|                              "NDV is not implemented"); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||||
| @@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||
|         const auto texture_type = instr.tld4.texture_type.Value(); | ||||
|         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||||
|         const bool is_array = instr.tld4.array != 0; | ||||
|         WriteTexInstructionFloat(bb, instr, | ||||
|                                  GetTld4Code(instr, texture_type, depth_compare, is_array)); | ||||
|         const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||||
|         WriteTexInstructionFloat( | ||||
|             bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::TLD4S: { | ||||
| @@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||
|         Node4 values; | ||||
|         for (u32 element = 0; element < values.size(); ++element) { | ||||
|             auto coords_copy = coords; | ||||
|             MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; | ||||
|             MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; | ||||
|             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||
|         } | ||||
|  | ||||
| @@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||
|                 if (!instr.txq.IsComponentEnabled(element)) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | ||||
|                 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | ||||
|                 const Node value = | ||||
|                     Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||||
|                 SetTemporal(bb, indexer++, value); | ||||
| @@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | ||||
|  | ||||
|         for (u32 element = 0; element < 2; ++element) { | ||||
|             auto params = coords; | ||||
|             MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; | ||||
|             MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | ||||
|             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||||
|             SetTemporal(bb, element, value); | ||||
|         } | ||||
| @@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | ||||
|  | ||||
| Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||||
|                                TextureProcessMode process_mode, std::vector<Node> coords, | ||||
|                                Node array, Node depth_compare, u32 bias_offset) { | ||||
|                                Node array, Node depth_compare, u32 bias_offset, | ||||
|                                std::vector<Node> aoffi) { | ||||
|     const bool is_array = array; | ||||
|     const bool is_shadow = depth_compare; | ||||
|  | ||||
| @@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||||
|     Node4 values; | ||||
|     for (u32 element = 0; element < values.size(); ++element) { | ||||
|         auto copy_coords = coords; | ||||
|         MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; | ||||
|         MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; | ||||
|         values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||||
|     } | ||||
|  | ||||
| @@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||||
| } | ||||
|  | ||||
| Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||||
|                            TextureProcessMode process_mode, bool depth_compare, bool is_array) { | ||||
|     const bool lod_bias_enabled = | ||||
|         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); | ||||
|                            TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||||
|                            bool is_aoffi) { | ||||
|     const bool lod_bias_enabled{ | ||||
|         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||||
|  | ||||
|     u64 parameter_register = instr.gpr20.Value(); | ||||
|     if (lod_bias_enabled) { | ||||
|         ++parameter_register; | ||||
|     } | ||||
|  | ||||
|     const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||||
|         texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||||
| @@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||||
|  | ||||
|     const Node array = is_array ? GetRegister(array_register) : nullptr; | ||||
|  | ||||
|     std::vector<Node> aoffi; | ||||
|     if (is_aoffi) { | ||||
|         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||||
|     } | ||||
|  | ||||
|     Node dc{}; | ||||
|     if (depth_compare) { | ||||
|         // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||||
|         // or bias are used | ||||
|         const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); | ||||
|         dc = GetRegister(depth_register); | ||||
|         dc = GetRegister(parameter_register++); | ||||
|     } | ||||
|  | ||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); | ||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); | ||||
| } | ||||
|  | ||||
| Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||||
| @@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||||
|         dc = GetRegister(depth_register); | ||||
|     } | ||||
|  | ||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); | ||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); | ||||
| } | ||||
|  | ||||
| Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||||
|                             bool is_array) { | ||||
|                             bool is_array, bool is_aoffi) { | ||||
|     const std::size_t coord_count = GetCoordCount(texture_type); | ||||
|     const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||||
|     const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||||
| @@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | ||||
|     const u64 coord_register = array_register + (is_array ? 1 : 0); | ||||
|  | ||||
|     std::vector<Node> coords; | ||||
|     for (size_t i = 0; i < coord_count; ++i) | ||||
|     for (std::size_t i = 0; i < coord_count; ++i) { | ||||
|         coords.push_back(GetRegister(coord_register + i)); | ||||
|     } | ||||
|  | ||||
|     u64 parameter_register = instr.gpr20.Value(); | ||||
|     std::vector<Node> aoffi; | ||||
|     if (is_aoffi) { | ||||
|         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||||
|     } | ||||
|  | ||||
|     Node dc{}; | ||||
|     if (depth_compare) { | ||||
|         dc = GetRegister(parameter_register++); | ||||
|     } | ||||
|  | ||||
|     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||||
|  | ||||
|     Node4 values; | ||||
|     for (u32 element = 0; element < values.size(); ++element) { | ||||
|         auto coords_copy = coords; | ||||
|         MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; | ||||
|         MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; | ||||
|         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||
|     } | ||||
|  | ||||
| @@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | ||||
|     Node4 values; | ||||
|     for (u32 element = 0; element < values.size(); ++element) { | ||||
|         auto coords_copy = coords; | ||||
|         MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; | ||||
|         MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; | ||||
|         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||||
|     } | ||||
|     return values; | ||||
| @@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | ||||
|     return {coord_count, total_coord_count}; | ||||
| } | ||||
|  | ||||
| } // namespace VideoCommon::Shader | ||||
| std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | ||||
|                                                 bool is_tld4) { | ||||
|     const auto [coord_offsets, size, wrap_value, | ||||
|                 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { | ||||
|         if (is_tld4) { | ||||
|             return {{0, 8, 16}, 6, 32, 64}; | ||||
|         } else { | ||||
|             return {{0, 4, 8}, 4, 8, 16}; | ||||
|         } | ||||
|     }(); | ||||
|     const u32 mask = (1U << size) - 1; | ||||
|  | ||||
|     std::vector<Node> aoffi; | ||||
|     aoffi.reserve(coord_count); | ||||
|  | ||||
|     const auto aoffi_immediate{ | ||||
|         TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||||
|     if (!aoffi_immediate) { | ||||
|         // Variable access, not supported on AMD. | ||||
|         LOG_WARNING(HW_GPU, | ||||
|                     "AOFFI constant folding failed, some hardware might have graphical issues"); | ||||
|         for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||||
|             const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); | ||||
|             const Node condition = | ||||
|                 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||||
|             const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||||
|             aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||||
|         } | ||||
|         return aoffi; | ||||
|     } | ||||
|  | ||||
|     for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||||
|         s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; | ||||
|         if (value >= wrap_value) { | ||||
|             value -= diff_value; | ||||
|         } | ||||
|         aoffi.push_back(Immediate(value)); | ||||
|     } | ||||
|     return aoffi; | ||||
| } | ||||
|  | ||||
| } // namespace VideoCommon::Shader | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <array> | ||||
| #include <cstring> | ||||
| #include <map> | ||||
| #include <optional> | ||||
| #include <set> | ||||
| #include <string> | ||||
| #include <tuple> | ||||
| @@ -290,6 +291,7 @@ struct MetaTexture { | ||||
|     const Sampler& sampler; | ||||
|     Node array{}; | ||||
|     Node depth_compare{}; | ||||
|     std::vector<Node> aoffi; | ||||
|     Node bias{}; | ||||
|     Node lod{}; | ||||
|     Node component{}; | ||||
| @@ -741,14 +743,14 @@ private: | ||||
|  | ||||
|     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||
|                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||||
|                      bool is_array); | ||||
|                      bool is_array, bool is_aoffi); | ||||
|  | ||||
|     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||
|                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||||
|                       bool is_array); | ||||
|  | ||||
|     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||
|                       bool depth_compare, bool is_array); | ||||
|                       bool depth_compare, bool is_array, bool is_aoffi); | ||||
|  | ||||
|     Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||
|                       bool is_array); | ||||
| @@ -757,9 +759,11 @@ private: | ||||
|         Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||||
|         bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||||
|  | ||||
|     std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||||
|  | ||||
|     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||
|                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | ||||
|                          Node array, Node depth_compare, u32 bias_offset); | ||||
|                          Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); | ||||
|  | ||||
|     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||||
|                          u64 byte_height); | ||||
| @@ -773,6 +777,8 @@ private: | ||||
|  | ||||
|     Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); | ||||
|  | ||||
|     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); | ||||
|  | ||||
|     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); | ||||
|  | ||||
|     template <typename... T> | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
| #include <utility> | ||||
| #include <variant> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
|  | ||||
| namespace VideoCommon::Shader { | ||||
| @@ -14,7 +15,7 @@ namespace { | ||||
| std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | ||||
|                                    OperationCode operation_code) { | ||||
|     for (; cursor >= 0; --cursor) { | ||||
|         const Node node = code[cursor]; | ||||
|         const Node node = code.at(cursor); | ||||
|         if (const auto operation = std::get_if<OperationNode>(node)) { | ||||
|             if (operation->GetCode() == operation_code) | ||||
|                 return {node, cursor}; | ||||
| @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { | ||||
|     return nullptr; | ||||
| } | ||||
|  | ||||
| std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { | ||||
|     // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||||
|     // that it uses as operand | ||||
|     const auto [found, found_cursor] = | ||||
|         TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||||
|     if (!found) { | ||||
|         return {}; | ||||
|     } | ||||
|     if (const auto immediate = std::get_if<ImmediateNode>(found)) { | ||||
|         return immediate->GetValue(); | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
|  | ||||
| std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||||
|                                              s64 cursor) { | ||||
|     for (; cursor >= 0; --cursor) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user