Merge pull request #2306 from ReinUsesLisp/aoffi
shader_ir: Implement AOFFI for TEX and TLD4
This commit is contained in:
		| @@ -21,6 +21,8 @@ | |||||||
|  |  | ||||||
| namespace OpenGL::GLShader { | namespace OpenGL::GLShader { | ||||||
|  |  | ||||||
|  | namespace { | ||||||
|  |  | ||||||
| using Tegra::Shader::Attribute; | using Tegra::Shader::Attribute; | ||||||
| using Tegra::Shader::AttributeUse; | using Tegra::Shader::AttributeUse; | ||||||
| using Tegra::Shader::Header; | using Tegra::Shader::Header; | ||||||
| @@ -34,14 +36,18 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; | |||||||
| using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||||||
| using Operation = const OperationNode&; | using Operation = const OperationNode&; | ||||||
|  |  | ||||||
|  | enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||||||
|  |  | ||||||
|  | struct TextureAoffi {}; | ||||||
|  | using TextureArgument = std::pair<Type, Node>; | ||||||
|  | using TextureIR = std::variant<TextureAoffi, TextureArgument>; | ||||||
|  |  | ||||||
| enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; | ||||||
| constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | constexpr u32 MAX_CONSTBUFFER_ELEMENTS = | ||||||
|     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); |     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); | ||||||
| constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | constexpr u32 MAX_GLOBALMEMORY_ELEMENTS = | ||||||
|     static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); |     static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float); | ||||||
|  |  | ||||||
| enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; |  | ||||||
|  |  | ||||||
| class ShaderWriter { | class ShaderWriter { | ||||||
| public: | public: | ||||||
|     void AddExpression(std::string_view text) { |     void AddExpression(std::string_view text) { | ||||||
| @@ -91,7 +97,7 @@ private: | |||||||
| }; | }; | ||||||
|  |  | ||||||
| /// Generates code to use for a swizzle operation. | /// Generates code to use for a swizzle operation. | ||||||
| static std::string GetSwizzle(u32 elem) { | std::string GetSwizzle(u32 elem) { | ||||||
|     ASSERT(elem <= 3); |     ASSERT(elem <= 3); | ||||||
|     std::string swizzle = "."; |     std::string swizzle = "."; | ||||||
|     swizzle += "xyzw"[elem]; |     swizzle += "xyzw"[elem]; | ||||||
| @@ -99,7 +105,7 @@ static std::string GetSwizzle(u32 elem) { | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Translate topology | /// Translate topology | ||||||
| static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | ||||||
|     switch (topology) { |     switch (topology) { | ||||||
|     case Tegra::Shader::OutputTopology::PointList: |     case Tegra::Shader::OutputTopology::PointList: | ||||||
|         return "points"; |         return "points"; | ||||||
| @@ -114,7 +120,7 @@ static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Returns true if an object has to be treated as precise | /// Returns true if an object has to be treated as precise | ||||||
| static bool IsPrecise(Operation operand) { | bool IsPrecise(Operation operand) { | ||||||
|     const auto& meta = operand.GetMeta(); |     const auto& meta = operand.GetMeta(); | ||||||
|  |  | ||||||
|     if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { |     if (const auto arithmetic = std::get_if<MetaArithmetic>(&meta)) { | ||||||
| @@ -126,7 +132,7 @@ static bool IsPrecise(Operation operand) { | |||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool IsPrecise(Node node) { | bool IsPrecise(Node node) { | ||||||
|     if (const auto operation = std::get_if<OperationNode>(node)) { |     if (const auto operation = std::get_if<OperationNode>(node)) { | ||||||
|         return IsPrecise(*operation); |         return IsPrecise(*operation); | ||||||
|     } |     } | ||||||
| @@ -723,8 +729,8 @@ private: | |||||||
|                                                          result_type)); |                                                          result_type)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::string GenerateTexture(Operation operation, const std::string& func, |     std::string GenerateTexture(Operation operation, const std::string& function_suffix, | ||||||
|                                 const std::vector<std::pair<Type, Node>>& extras) { |                                 const std::vector<TextureIR>& extras) { | ||||||
|         constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; |         constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"}; | ||||||
|  |  | ||||||
|         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||||||
| @@ -734,11 +740,11 @@ private: | |||||||
|         const bool has_array = meta->sampler.IsArray(); |         const bool has_array = meta->sampler.IsArray(); | ||||||
|         const bool has_shadow = meta->sampler.IsShadow(); |         const bool has_shadow = meta->sampler.IsShadow(); | ||||||
|  |  | ||||||
|         std::string expr = func; |         std::string expr = "texture" + function_suffix; | ||||||
|         expr += '('; |         if (!meta->aoffi.empty()) { | ||||||
|         expr += GetSampler(meta->sampler); |             expr += "Offset"; | ||||||
|         expr += ", "; |         } | ||||||
|  |         expr += '(' + GetSampler(meta->sampler) + ", "; | ||||||
|         expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); |         expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1); | ||||||
|         expr += '('; |         expr += '('; | ||||||
|         for (std::size_t i = 0; i < count; ++i) { |         for (std::size_t i = 0; i < count; ++i) { | ||||||
| @@ -756,38 +762,76 @@ private: | |||||||
|         } |         } | ||||||
|         expr += ')'; |         expr += ')'; | ||||||
|  |  | ||||||
|         for (const auto& extra_pair : extras) { |         for (const auto& variant : extras) { | ||||||
|             const auto [type, operand] = extra_pair; |             if (const auto argument = std::get_if<TextureArgument>(&variant)) { | ||||||
|             if (operand == nullptr) { |                 expr += GenerateTextureArgument(*argument); | ||||||
|                 continue; |             } else if (std::get_if<TextureAoffi>(&variant)) { | ||||||
|             } |                 expr += GenerateTextureAoffi(meta->aoffi); | ||||||
|             expr += ", "; |             } else { | ||||||
|  |                 UNREACHABLE(); | ||||||
|             switch (type) { |  | ||||||
|             case Type::Int: |  | ||||||
|                 if (const auto immediate = std::get_if<ImmediateNode>(operand)) { |  | ||||||
|                     // Inline the string as an immediate integer in GLSL (some extra arguments are |  | ||||||
|                     // required to be constant) |  | ||||||
|                     expr += std::to_string(static_cast<s32>(immediate->GetValue())); |  | ||||||
|                 } else { |  | ||||||
|                     expr += "ftoi(" + Visit(operand) + ')'; |  | ||||||
|                 } |  | ||||||
|                 break; |  | ||||||
|             case Type::Float: |  | ||||||
|                 expr += Visit(operand); |  | ||||||
|                 break; |  | ||||||
|             default: { |  | ||||||
|                 const auto type_int = static_cast<u32>(type); |  | ||||||
|                 UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); |  | ||||||
|                 expr += '0'; |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         return expr + ')'; |         return expr + ')'; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     std::string GenerateTextureArgument(TextureArgument argument) { | ||||||
|  |         const auto [type, operand] = argument; | ||||||
|  |         if (operand == nullptr) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         std::string expr = ", "; | ||||||
|  |         switch (type) { | ||||||
|  |         case Type::Int: | ||||||
|  |             if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||||||
|  |                 // Inline the string as an immediate integer in GLSL (some extra arguments are | ||||||
|  |                 // required to be constant) | ||||||
|  |                 expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||||||
|  |             } else { | ||||||
|  |                 expr += "ftoi(" + Visit(operand) + ')'; | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         case Type::Float: | ||||||
|  |             expr += Visit(operand); | ||||||
|  |             break; | ||||||
|  |         default: { | ||||||
|  |             const auto type_int = static_cast<u32>(type); | ||||||
|  |             UNIMPLEMENTED_MSG("Unimplemented extra type={}", type_int); | ||||||
|  |             expr += '0'; | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         } | ||||||
|  |         return expr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) { | ||||||
|  |         if (aoffi.empty()) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |         constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"}; | ||||||
|  |         std::string expr = ", "; | ||||||
|  |         expr += coord_constructors.at(aoffi.size() - 1); | ||||||
|  |         expr += '('; | ||||||
|  |  | ||||||
|  |         for (std::size_t index = 0; index < aoffi.size(); ++index) { | ||||||
|  |             const auto operand{aoffi.at(index)}; | ||||||
|  |             if (const auto immediate = std::get_if<ImmediateNode>(operand)) { | ||||||
|  |                 // Inline the string as an immediate integer in GLSL (AOFFI arguments are required | ||||||
|  |                 // to be constant by the standard). | ||||||
|  |                 expr += std::to_string(static_cast<s32>(immediate->GetValue())); | ||||||
|  |             } else { | ||||||
|  |                 expr += "ftoi(" + Visit(operand) + ')'; | ||||||
|  |             } | ||||||
|  |             if (index + 1 < aoffi.size()) { | ||||||
|  |                 expr += ", "; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         expr += ')'; | ||||||
|  |  | ||||||
|  |         return expr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     std::string Assign(Operation operation) { |     std::string Assign(Operation operation) { | ||||||
|         const Node dest = operation[0]; |         const Node dest = operation[0]; | ||||||
|         const Node src = operation[1]; |         const Node src = operation[1]; | ||||||
| @@ -1164,7 +1208,8 @@ private: | |||||||
|         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||||||
|         ASSERT(meta); |         ASSERT(meta); | ||||||
|  |  | ||||||
|         std::string expr = GenerateTexture(operation, "texture", {{Type::Float, meta->bias}}); |         std::string expr = GenerateTexture( | ||||||
|  |             operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}}); | ||||||
|         if (meta->sampler.IsShadow()) { |         if (meta->sampler.IsShadow()) { | ||||||
|             expr = "vec4(" + expr + ')'; |             expr = "vec4(" + expr + ')'; | ||||||
|         } |         } | ||||||
| @@ -1175,7 +1220,8 @@ private: | |||||||
|         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); |         const auto meta = std::get_if<MetaTexture>(&operation.GetMeta()); | ||||||
|         ASSERT(meta); |         ASSERT(meta); | ||||||
|  |  | ||||||
|         std::string expr = GenerateTexture(operation, "textureLod", {{Type::Float, meta->lod}}); |         std::string expr = GenerateTexture( | ||||||
|  |             operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}}); | ||||||
|         if (meta->sampler.IsShadow()) { |         if (meta->sampler.IsShadow()) { | ||||||
|             expr = "vec4(" + expr + ')'; |             expr = "vec4(" + expr + ')'; | ||||||
|         } |         } | ||||||
| @@ -1187,7 +1233,8 @@ private: | |||||||
|         ASSERT(meta); |         ASSERT(meta); | ||||||
|  |  | ||||||
|         const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; |         const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int; | ||||||
|         return GenerateTexture(operation, "textureGather", {{type, meta->component}}) + |         return GenerateTexture(operation, "Gather", | ||||||
|  |                                {TextureArgument{type, meta->component}, TextureAoffi{}}) + | ||||||
|                GetSwizzle(meta->element); |                GetSwizzle(meta->element); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -1217,8 +1264,8 @@ private: | |||||||
|         ASSERT(meta); |         ASSERT(meta); | ||||||
|  |  | ||||||
|         if (meta->element < 2) { |         if (meta->element < 2) { | ||||||
|             return "itof(int((" + GenerateTexture(operation, "textureQueryLod", {}) + |             return "itof(int((" + GenerateTexture(operation, "QueryLod", {}) + " * vec2(256))" + | ||||||
|                    " * vec2(256))" + GetSwizzle(meta->element) + "))"; |                    GetSwizzle(meta->element) + "))"; | ||||||
|         } |         } | ||||||
|         return "0"; |         return "0"; | ||||||
|     } |     } | ||||||
| @@ -1571,6 +1618,8 @@ private: | |||||||
|     ShaderWriter code; |     ShaderWriter code; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | } // Anonymous namespace | ||||||
|  |  | ||||||
| std::string GetCommonDeclarations() { | std::string GetCommonDeclarations() { | ||||||
|     const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); |     const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS); | ||||||
|     const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); |     const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS); | ||||||
|   | |||||||
| @@ -7,7 +7,9 @@ | |||||||
| #include <fmt/format.h> | #include <fmt/format.h> | ||||||
|  |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
|  | #include "common/bit_field.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "common/logging/log.h" | ||||||
| #include "video_core/engines/shader_bytecode.h" | #include "video_core/engines/shader_bytecode.h" | ||||||
| #include "video_core/shader/shader_ir.h" | #include "video_core/shader/shader_ir.h" | ||||||
|  |  | ||||||
| @@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|  |  | ||||||
|     switch (opcode->get().GetId()) { |     switch (opcode->get().GetId()) { | ||||||
|     case OpCode::Id::TEX: { |     case OpCode::Id::TEX: { | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI), |  | ||||||
|                              "AOFFI is not implemented"); |  | ||||||
|  |  | ||||||
|         if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { |         if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) { | ||||||
|             LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); |             LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete"); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         const TextureType texture_type{instr.tex.texture_type}; |         const TextureType texture_type{instr.tex.texture_type}; | ||||||
|         const bool is_array = instr.tex.array != 0; |         const bool is_array = instr.tex.array != 0; | ||||||
|  |         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI); | ||||||
|         const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); |         const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC); | ||||||
|         const auto process_mode = instr.tex.GetTextureProcessMode(); |         const auto process_mode = instr.tex.GetTextureProcessMode(); | ||||||
|         WriteTexInstructionFloat( |         WriteTexInstructionFloat( | ||||||
|             bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array)); |             bb, instr, | ||||||
|  |             GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi)); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case OpCode::Id::TEXS: { |     case OpCode::Id::TEXS: { | ||||||
| @@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|     } |     } | ||||||
|     case OpCode::Id::TLD4: { |     case OpCode::Id::TLD4: { | ||||||
|         ASSERT(instr.tld4.array == 0); |         ASSERT(instr.tld4.array == 0); | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI), |  | ||||||
|                              "AOFFI is not implemented"); |  | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), |         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV), | ||||||
|                              "NDV is not implemented"); |                              "NDV is not implemented"); | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), |         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP), | ||||||
| @@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|         const auto texture_type = instr.tld4.texture_type.Value(); |         const auto texture_type = instr.tld4.texture_type.Value(); | ||||||
|         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); |         const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC); | ||||||
|         const bool is_array = instr.tld4.array != 0; |         const bool is_array = instr.tld4.array != 0; | ||||||
|         WriteTexInstructionFloat(bb, instr, |         const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI); | ||||||
|                                  GetTld4Code(instr, texture_type, depth_compare, is_array)); |         WriteTexInstructionFloat( | ||||||
|  |             bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi)); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case OpCode::Id::TLD4S: { |     case OpCode::Id::TLD4S: { | ||||||
| @@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|         Node4 values; |         Node4 values; | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |         for (u32 element = 0; element < values.size(); ++element) { | ||||||
|             auto coords_copy = coords; |             auto coords_copy = coords; | ||||||
|             MetaTexture meta{sampler, {}, {}, {}, {}, component, element}; |             MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element}; | ||||||
|             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|                 if (!instr.txq.IsComponentEnabled(element)) { |                 if (!instr.txq.IsComponentEnabled(element)) { | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|                 MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; |                 MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | ||||||
|                 const Node value = |                 const Node value = | ||||||
|                     Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); |                     Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8)); | ||||||
|                 SetTemporal(bb, indexer++, value); |                 SetTemporal(bb, indexer++, value); | ||||||
| @@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|  |  | ||||||
|         for (u32 element = 0; element < 2; ++element) { |         for (u32 element = 0; element < 2; ++element) { | ||||||
|             auto params = coords; |             auto params = coords; | ||||||
|             MetaTexture meta{sampler, {}, {}, {}, {}, {}, element}; |             MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element}; | ||||||
|             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); |             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); | ||||||
|             SetTemporal(bb, element, value); |             SetTemporal(bb, element, value); | ||||||
|         } |         } | ||||||
| @@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr, | |||||||
|  |  | ||||||
| Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | ||||||
|                                TextureProcessMode process_mode, std::vector<Node> coords, |                                TextureProcessMode process_mode, std::vector<Node> coords, | ||||||
|                                Node array, Node depth_compare, u32 bias_offset) { |                                Node array, Node depth_compare, u32 bias_offset, | ||||||
|  |                                std::vector<Node> aoffi) { | ||||||
|     const bool is_array = array; |     const bool is_array = array; | ||||||
|     const bool is_shadow = depth_compare; |     const bool is_shadow = depth_compare; | ||||||
|  |  | ||||||
| @@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto copy_coords = coords; |         auto copy_coords = coords; | ||||||
|         MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element}; |         MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element}; | ||||||
|         values[element] = Operation(read_method, meta, std::move(copy_coords)); |         values[element] = Operation(read_method, meta, std::move(copy_coords)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||||||
| } | } | ||||||
|  |  | ||||||
| Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | ||||||
|                            TextureProcessMode process_mode, bool depth_compare, bool is_array) { |                            TextureProcessMode process_mode, bool depth_compare, bool is_array, | ||||||
|     const bool lod_bias_enabled = |                            bool is_aoffi) { | ||||||
|         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ); |     const bool lod_bias_enabled{ | ||||||
|  |         (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)}; | ||||||
|  |  | ||||||
|  |     u64 parameter_register = instr.gpr20.Value(); | ||||||
|  |     if (lod_bias_enabled) { | ||||||
|  |         ++parameter_register; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( |     const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement( | ||||||
|         texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); |         texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5); | ||||||
| @@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type, | |||||||
|  |  | ||||||
|     const Node array = is_array ? GetRegister(array_register) : nullptr; |     const Node array = is_array ? GetRegister(array_register) : nullptr; | ||||||
|  |  | ||||||
|  |     std::vector<Node> aoffi; | ||||||
|  |     if (is_aoffi) { | ||||||
|  |         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     Node dc{}; |     Node dc{}; | ||||||
|     if (depth_compare) { |     if (depth_compare) { | ||||||
|         // Depth is always stored in the register signaled by gpr20 or in the next register if lod |         // Depth is always stored in the register signaled by gpr20 or in the next register if lod | ||||||
|         // or bias are used |         // or bias are used | ||||||
|         const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0); |         dc = GetRegister(parameter_register++); | ||||||
|         dc = GetRegister(depth_register); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0); |     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi); | ||||||
| } | } | ||||||
|  |  | ||||||
| Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | ||||||
| @@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type, | |||||||
|         dc = GetRegister(depth_register); |         dc = GetRegister(depth_register); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset); |     return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {}); | ||||||
| } | } | ||||||
|  |  | ||||||
| Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare, | ||||||
|                             bool is_array) { |                             bool is_array, bool is_aoffi) { | ||||||
|     const std::size_t coord_count = GetCoordCount(texture_type); |     const std::size_t coord_count = GetCoordCount(texture_type); | ||||||
|     const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); |     const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0); | ||||||
|     const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); |     const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0); | ||||||
| @@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||||||
|     const u64 coord_register = array_register + (is_array ? 1 : 0); |     const u64 coord_register = array_register + (is_array ? 1 : 0); | ||||||
|  |  | ||||||
|     std::vector<Node> coords; |     std::vector<Node> coords; | ||||||
|     for (size_t i = 0; i < coord_count; ++i) |     for (std::size_t i = 0; i < coord_count; ++i) { | ||||||
|         coords.push_back(GetRegister(coord_register + i)); |         coords.push_back(GetRegister(coord_register + i)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u64 parameter_register = instr.gpr20.Value(); | ||||||
|  |     std::vector<Node> aoffi; | ||||||
|  |     if (is_aoffi) { | ||||||
|  |         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Node dc{}; | ||||||
|  |     if (depth_compare) { | ||||||
|  |         dc = GetRegister(parameter_register++); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); | ||||||
|  |  | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto coords_copy = coords; |         auto coords_copy = coords; | ||||||
|         MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element}; |         MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element}; | ||||||
|         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); |         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy)); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|         auto coords_copy = coords; |         auto coords_copy = coords; | ||||||
|         MetaTexture meta{sampler, array, {}, {}, lod, {}, element}; |         MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element}; | ||||||
|         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); |         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy)); | ||||||
|     } |     } | ||||||
|     return values; |     return values; | ||||||
| @@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement( | |||||||
|     return {coord_count, total_coord_count}; |     return {coord_count, total_coord_count}; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, | ||||||
|  |                                                 bool is_tld4) { | ||||||
|  |     const auto [coord_offsets, size, wrap_value, | ||||||
|  |                 diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> { | ||||||
|  |         if (is_tld4) { | ||||||
|  |             return {{0, 8, 16}, 6, 32, 64}; | ||||||
|  |         } else { | ||||||
|  |             return {{0, 4, 8}, 4, 8, 16}; | ||||||
|  |         } | ||||||
|  |     }(); | ||||||
|  |     const u32 mask = (1U << size) - 1; | ||||||
|  |  | ||||||
|  |     std::vector<Node> aoffi; | ||||||
|  |     aoffi.reserve(coord_count); | ||||||
|  |  | ||||||
|  |     const auto aoffi_immediate{ | ||||||
|  |         TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))}; | ||||||
|  |     if (!aoffi_immediate) { | ||||||
|  |         // Variable access, not supported on AMD. | ||||||
|  |         LOG_WARNING(HW_GPU, | ||||||
|  |                     "AOFFI constant folding failed, some hardware might have graphical issues"); | ||||||
|  |         for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||||||
|  |             const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size); | ||||||
|  |             const Node condition = | ||||||
|  |                 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value)); | ||||||
|  |             const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value)); | ||||||
|  |             aoffi.push_back(Operation(OperationCode::Select, condition, negative, value)); | ||||||
|  |         } | ||||||
|  |         return aoffi; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     for (std::size_t coord = 0; coord < coord_count; ++coord) { | ||||||
|  |         s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask; | ||||||
|  |         if (value >= wrap_value) { | ||||||
|  |             value -= diff_value; | ||||||
|  |         } | ||||||
|  |         aoffi.push_back(Immediate(value)); | ||||||
|  |     } | ||||||
|  |     return aoffi; | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace VideoCommon::Shader | } // namespace VideoCommon::Shader | ||||||
| @@ -7,6 +7,7 @@ | |||||||
| #include <array> | #include <array> | ||||||
| #include <cstring> | #include <cstring> | ||||||
| #include <map> | #include <map> | ||||||
|  | #include <optional> | ||||||
| #include <set> | #include <set> | ||||||
| #include <string> | #include <string> | ||||||
| #include <tuple> | #include <tuple> | ||||||
| @@ -290,6 +291,7 @@ struct MetaTexture { | |||||||
|     const Sampler& sampler; |     const Sampler& sampler; | ||||||
|     Node array{}; |     Node array{}; | ||||||
|     Node depth_compare{}; |     Node depth_compare{}; | ||||||
|  |     std::vector<Node> aoffi; | ||||||
|     Node bias{}; |     Node bias{}; | ||||||
|     Node lod{}; |     Node lod{}; | ||||||
|     Node component{}; |     Node component{}; | ||||||
| @@ -741,14 +743,14 @@ private: | |||||||
|  |  | ||||||
|     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |     Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||||
|                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||||||
|                      bool is_array); |                      bool is_array, bool is_aoffi); | ||||||
|  |  | ||||||
|     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |     Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||||
|                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, |                       Tegra::Shader::TextureProcessMode process_mode, bool depth_compare, | ||||||
|                       bool is_array); |                       bool is_array); | ||||||
|  |  | ||||||
|     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||||
|                       bool depth_compare, bool is_array); |                       bool depth_compare, bool is_array, bool is_aoffi); | ||||||
|  |  | ||||||
|     Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |     Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||||
|                       bool is_array); |                       bool is_array); | ||||||
| @@ -757,9 +759,11 @@ private: | |||||||
|         Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, |         Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array, | ||||||
|         bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); |         bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs); | ||||||
|  |  | ||||||
|  |     std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4); | ||||||
|  |  | ||||||
|     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, |     Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, | ||||||
|                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, |                          Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords, | ||||||
|                          Node array, Node depth_compare, u32 bias_offset); |                          Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi); | ||||||
|  |  | ||||||
|     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, |     Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type, | ||||||
|                          u64 byte_height); |                          u64 byte_height); | ||||||
| @@ -773,6 +777,8 @@ private: | |||||||
|  |  | ||||||
|     Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); |     Node TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor); | ||||||
|  |  | ||||||
|  |     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor); | ||||||
|  |  | ||||||
|     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); |     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor); | ||||||
|  |  | ||||||
|     template <typename... T> |     template <typename... T> | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ | |||||||
| #include <utility> | #include <utility> | ||||||
| #include <variant> | #include <variant> | ||||||
|  |  | ||||||
|  | #include "common/common_types.h" | ||||||
| #include "video_core/shader/shader_ir.h" | #include "video_core/shader/shader_ir.h" | ||||||
|  |  | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
| @@ -14,7 +15,7 @@ namespace { | |||||||
| std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, | ||||||
|                                    OperationCode operation_code) { |                                    OperationCode operation_code) { | ||||||
|     for (; cursor >= 0; --cursor) { |     for (; cursor >= 0; --cursor) { | ||||||
|         const Node node = code[cursor]; |         const Node node = code.at(cursor); | ||||||
|         if (const auto operation = std::get_if<OperationNode>(node)) { |         if (const auto operation = std::get_if<OperationNode>(node)) { | ||||||
|             if (operation->GetCode() == operation_code) |             if (operation->GetCode() == operation_code) | ||||||
|                 return {node, cursor}; |                 return {node, cursor}; | ||||||
| @@ -64,6 +65,20 @@ Node ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) { | |||||||
|     return nullptr; |     return nullptr; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) { | ||||||
|  |     // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register | ||||||
|  |     // that it uses as operand | ||||||
|  |     const auto [found, found_cursor] = | ||||||
|  |         TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); | ||||||
|  |     if (!found) { | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  |     if (const auto immediate = std::get_if<ImmediateNode>(found)) { | ||||||
|  |         return immediate->GetValue(); | ||||||
|  |     } | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  |  | ||||||
| std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, | ||||||
|                                              s64 cursor) { |                                              s64 cursor) { | ||||||
|     for (; cursor >= 0; --cursor) { |     for (; cursor >= 0; --cursor) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user