MacroHLE: Add HLE replacement for base vertex and base instance.
This commit is contained in:
		| @@ -339,6 +339,10 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | ||||
|             const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | ||||
|             return ctx.OpBitcast(ctx.F32[1], ctx.OpISub(ctx.U32[1], index, base)); | ||||
|         } | ||||
|     case IR::Attribute::BaseInstance: | ||||
|         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_instance)); | ||||
|     case IR::Attribute::BaseVertex: | ||||
|         return ctx.OpBitcast(ctx.F32[1], ctx.OpLoad(ctx.U32[1], ctx.base_vertex)); | ||||
|     case IR::Attribute::FrontFace: | ||||
|         return ctx.OpSelect(ctx.F32[1], ctx.OpLoad(ctx.U1, ctx.front_face), | ||||
|                             ctx.OpBitcast(ctx.F32[1], ctx.Const(std::numeric_limits<u32>::max())), | ||||
| @@ -380,6 +384,10 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) { | ||||
|             const Id base{ctx.OpLoad(ctx.U32[1], ctx.base_vertex)}; | ||||
|             return ctx.OpISub(ctx.U32[1], index, base); | ||||
|         } | ||||
|     case IR::Attribute::BaseInstance: | ||||
|         return ctx.OpLoad(ctx.U32[1], ctx.base_instance); | ||||
|     case IR::Attribute::BaseVertex: | ||||
|         return ctx.OpLoad(ctx.U32[1], ctx.base_vertex); | ||||
|     default: | ||||
|         throw NotImplementedException("Read U32 attribute {}", attr); | ||||
|     } | ||||
|   | ||||
| @@ -1379,18 +1379,28 @@ void EmitContext::DefineInputs(const IR::Program& program) { | ||||
|     if (loads[IR::Attribute::InstanceId]) { | ||||
|         if (profile.support_vertex_instance_id) { | ||||
|             instance_id = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceId); | ||||
|             if (loads[IR::Attribute::BaseInstance]) { | ||||
|                 base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||
|             } | ||||
|         } else { | ||||
|             instance_index = DefineInput(*this, U32[1], true, spv::BuiltIn::InstanceIndex); | ||||
|             base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||||
|         } | ||||
|     } else if (loads[IR::Attribute::BaseInstance]) { | ||||
|         base_instance = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseInstance); | ||||
|     } | ||||
|     if (loads[IR::Attribute::VertexId]) { | ||||
|         if (profile.support_vertex_instance_id) { | ||||
|             vertex_id = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexId); | ||||
|             if (loads[IR::Attribute::BaseVertex]) { | ||||
|                 base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||
|             } | ||||
|         } else { | ||||
|             vertex_index = DefineInput(*this, U32[1], true, spv::BuiltIn::VertexIndex); | ||||
|             base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||
|         } | ||||
|     } else if (loads[IR::Attribute::BaseVertex]) { | ||||
|         base_vertex = DefineInput(*this, U32[1], true, spv::BuiltIn::BaseVertex); | ||||
|     } | ||||
|     if (loads[IR::Attribute::FrontFace]) { | ||||
|         front_face = DefineInput(*this, U1, true, spv::BuiltIn::FrontFacing); | ||||
|   | ||||
| @@ -34,6 +34,11 @@ public: | ||||
|  | ||||
|     [[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() const = 0; | ||||
|  | ||||
|     [[nodiscard]] virtual bool HasHLEMacroState() const = 0; | ||||
|  | ||||
|     [[nodiscard]] virtual std::optional<ReplaceConstant> GetReplaceConstBuffer( | ||||
|         u32 bank, u32 offset) = 0; | ||||
|  | ||||
|     virtual void Dump(u64 hash) = 0; | ||||
|  | ||||
|     [[nodiscard]] const ProgramHeader& SPH() const noexcept { | ||||
|   | ||||
| @@ -446,6 +446,10 @@ std::string NameOf(Attribute attribute) { | ||||
|         return "ViewportMask"; | ||||
|     case Attribute::FrontFace: | ||||
|         return "FrontFace"; | ||||
|     case Attribute::BaseInstance: | ||||
|         return "BaseInstance"; | ||||
|     case Attribute::BaseVertex: | ||||
|         return "BaseVertex"; | ||||
|     } | ||||
|     return fmt::format("<reserved attribute {}>", static_cast<int>(attribute)); | ||||
| } | ||||
|   | ||||
| @@ -219,6 +219,10 @@ enum class Attribute : u64 { | ||||
|     FixedFncTexture9Q = 231, | ||||
|     ViewportMask = 232, | ||||
|     FrontFace = 255, | ||||
|  | ||||
|     // Implementation attributes | ||||
|     BaseInstance = 256, | ||||
|     BaseVertex = 257, | ||||
| }; | ||||
|  | ||||
| constexpr size_t NUM_GENERICS = 32; | ||||
|   | ||||
| @@ -294,6 +294,14 @@ F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | ||||
|     return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::GetAttributeU32(IR::Attribute attribute) { | ||||
|     return GetAttributeU32(attribute, Imm32(0)); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::GetAttributeU32(IR::Attribute attribute, const U32& vertex) { | ||||
|     return Inst<U32>(Opcode::GetAttributeU32, attribute, vertex); | ||||
| } | ||||
|  | ||||
| void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | ||||
|     Inst(Opcode::SetAttribute, attribute, value, vertex); | ||||
| } | ||||
|   | ||||
| @@ -74,6 +74,8 @@ public: | ||||
|  | ||||
|     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | ||||
|     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | ||||
|     [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute); | ||||
|     [[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, const U32& vertex); | ||||
|     void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | ||||
|  | ||||
|     [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | ||||
|   | ||||
| @@ -219,7 +219,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||
|     } | ||||
|     Optimization::SsaRewritePass(program); | ||||
|  | ||||
|     Optimization::ConstantPropagationPass(program); | ||||
|     Optimization::ConstantPropagationPass(env, program); | ||||
|  | ||||
|     Optimization::PositionPass(env, program); | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <type_traits> | ||||
|  | ||||
| #include "common/bit_cast.h" | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
| @@ -515,6 +516,8 @@ void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) { | ||||
|             case IR::Attribute::PrimitiveId: | ||||
|             case IR::Attribute::InstanceId: | ||||
|             case IR::Attribute::VertexId: | ||||
|             case IR::Attribute::BaseVertex: | ||||
|             case IR::Attribute::BaseInstance: | ||||
|                 break; | ||||
|             default: | ||||
|                 return; | ||||
| @@ -644,7 +647,37 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||||
| void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||
|     const IR::Value bank{inst.Arg(0)}; | ||||
|     const IR::Value offset{inst.Arg(1)}; | ||||
|     if (!bank.IsImmediate() || !offset.IsImmediate()) { | ||||
|         return; | ||||
|     } | ||||
|     const auto bank_value = bank.U32(); | ||||
|     const auto offset_value = offset.U32(); | ||||
|     auto replacement = env.GetReplaceConstBuffer(bank_value, offset_value); | ||||
|     if (!replacement) { | ||||
|         return; | ||||
|     } | ||||
|     const auto new_attribute = [replacement]() { | ||||
|         switch (*replacement) { | ||||
|         case ReplaceConstant::BaseInstance: | ||||
|             return IR::Attribute::BaseInstance; | ||||
|         case ReplaceConstant::BaseVertex: | ||||
|             return IR::Attribute::BaseVertex; | ||||
|         default: | ||||
|             throw NotImplementedException("Not implemented replacement variable {}", *replacement); | ||||
|         } | ||||
|     }(); | ||||
|     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|     if (inst.GetOpcode() == IR::Opcode::GetCbufU32) { | ||||
|         inst.ReplaceUsesWith(ir.GetAttributeU32(new_attribute)); | ||||
|     } else { | ||||
|         inst.ReplaceUsesWith(ir.GetAttribute(new_attribute)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) { | ||||
|     switch (inst.GetOpcode()) { | ||||
|     case IR::Opcode::GetRegister: | ||||
|         return FoldGetRegister(inst); | ||||
| @@ -789,18 +822,24 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { | ||||
|                                     IR::Opcode::CompositeInsertF16x4); | ||||
|     case IR::Opcode::FSwizzleAdd: | ||||
|         return FoldFSwizzleAdd(block, inst); | ||||
|     case IR::Opcode::GetCbufF32: | ||||
|     case IR::Opcode::GetCbufU32: | ||||
|         if (env.HasHLEMacroState()) { | ||||
|             return FoldConstBuffer(env, block, inst); | ||||
|         } | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void ConstantPropagationPass(IR::Program& program) { | ||||
| void ConstantPropagationPass(Environment& env, IR::Program& program) { | ||||
|     const auto end{program.post_order_blocks.rend()}; | ||||
|     for (auto it = program.post_order_blocks.rbegin(); it != end; ++it) { | ||||
|         IR::Block* const block{*it}; | ||||
|         for (IR::Inst& inst : block->Instructions()) { | ||||
|             ConstantPropagation(*block, inst); | ||||
|             ConstantPropagation(env, *block, inst); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -13,7 +13,7 @@ struct HostTranslateInfo; | ||||
| namespace Shader::Optimization { | ||||
|  | ||||
| void CollectShaderInfoPass(Environment& env, IR::Program& program); | ||||
| void ConstantPropagationPass(IR::Program& program); | ||||
| void ConstantPropagationPass(Environment& env, IR::Program& program); | ||||
| void DeadCodeEliminationPass(IR::Program& program); | ||||
| void GlobalMemoryToStorageBufferPass(IR::Program& program); | ||||
| void IdentityRemovalPass(IR::Program& program); | ||||
|   | ||||
| @@ -16,6 +16,11 @@ | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| enum class ReplaceConstant : u32 { | ||||
|     BaseInstance, | ||||
|     BaseVertex, | ||||
| }; | ||||
|  | ||||
| enum class TextureType : u32 { | ||||
|     Color1D, | ||||
|     ColorArray1D, | ||||
|   | ||||
| @@ -11,7 +11,7 @@ | ||||
| namespace Shader { | ||||
|  | ||||
| struct VaryingState { | ||||
|     std::bitset<256> mask{}; | ||||
|     std::bitset<512> mask{}; | ||||
|  | ||||
|     void Set(IR::Attribute attribute, bool state = true) { | ||||
|         mask[static_cast<size_t>(attribute)] = state; | ||||
|   | ||||
| @@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() { | ||||
| size_t Maxwell3D::EstimateIndexBufferSize() { | ||||
|     GPUVAddr start_address = regs.index_buffer.StartAddress(); | ||||
|     GPUVAddr end_address = regs.index_buffer.EndAddress(); | ||||
|     return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address), | ||||
|                             static_cast<size_t>(end_address - start_address)); | ||||
|     constexpr std::array<size_t, 4> max_sizes = { | ||||
|         std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(), | ||||
|         std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()}; | ||||
|     const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); | ||||
|     return std::min<size_t>( | ||||
|         memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) / | ||||
|             byte_size, | ||||
|         static_cast<size_t>(end_address - start_address)); | ||||
| } | ||||
|  | ||||
| u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) { | ||||
| @@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const { | ||||
|     return regs.reg_array[method]; | ||||
| } | ||||
|  | ||||
| void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) { | ||||
|     const u64 key = (static_cast<u64>(bank) << 32) | offset; | ||||
|     replace_table.emplace(key, name); | ||||
| } | ||||
|  | ||||
| } // namespace Tegra::Engines | ||||
|   | ||||
| @@ -3020,6 +3020,23 @@ public: | ||||
|     /// Store temporary hw register values, used by some calls to restore state after a operation | ||||
|     Regs shadow_state; | ||||
|  | ||||
|     // None Engine | ||||
|     enum class EngineHint : u32 { | ||||
|         None = 0x0, | ||||
|         OnHLEMacro = 0x1, | ||||
|     }; | ||||
|  | ||||
|     EngineHint engine_state{EngineHint::None}; | ||||
|  | ||||
|     enum class HLEReplaceName : u32 { | ||||
|         BaseVertex = 0x0, | ||||
|         BaseInstance = 0x1, | ||||
|     }; | ||||
|  | ||||
|     void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name); | ||||
|  | ||||
|     std::unordered_map<u64, HLEReplaceName> replace_table; | ||||
|  | ||||
|     static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size"); | ||||
|     static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable"); | ||||
|  | ||||
|   | ||||
| @@ -14,26 +14,29 @@ | ||||
| #include "video_core/rasterizer_interface.h" | ||||
|  | ||||
| namespace Tegra { | ||||
|  | ||||
| using Maxwell = Engines::Maxwell3D; | ||||
|  | ||||
| namespace { | ||||
|  | ||||
| bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) { | ||||
| bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) { | ||||
|     switch (topology) { | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches: | ||||
|     case Maxwell::Regs::PrimitiveTopology::Points: | ||||
|     case Maxwell::Regs::PrimitiveTopology::Lines: | ||||
|     case Maxwell::Regs::PrimitiveTopology::LineLoop: | ||||
|     case Maxwell::Regs::PrimitiveTopology::LineStrip: | ||||
|     case Maxwell::Regs::PrimitiveTopology::Triangles: | ||||
|     case Maxwell::Regs::PrimitiveTopology::TriangleStrip: | ||||
|     case Maxwell::Regs::PrimitiveTopology::TriangleFan: | ||||
|     case Maxwell::Regs::PrimitiveTopology::LinesAdjacency: | ||||
|     case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency: | ||||
|     case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency: | ||||
|     case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency: | ||||
|     case Maxwell::Regs::PrimitiveTopology::Patches: | ||||
|         return true; | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip: | ||||
|     case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon: | ||||
|     case Maxwell::Regs::PrimitiveTopology::Quads: | ||||
|     case Maxwell::Regs::PrimitiveTopology::QuadStrip: | ||||
|     case Maxwell::Regs::PrimitiveTopology::Polygon: | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| @@ -82,8 +85,7 @@ public: | ||||
|         : HLEMacroImpl(maxwell3d_), extended(extended_) {} | ||||
|  | ||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||
|         auto topology = | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); | ||||
|         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | ||||
|         if (!IsTopologySafe(topology)) { | ||||
|             Fallback(parameters); | ||||
|             return; | ||||
| @@ -99,18 +101,16 @@ public: | ||||
|         params.stride = 0; | ||||
|  | ||||
|         if (extended) { | ||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|             maxwell3d.CallMethod(0x8e4, parameters[4], true); | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); | ||||
|         } | ||||
|  | ||||
|         maxwell3d.draw_manager->DrawArrayIndirect(topology); | ||||
|  | ||||
|         if (extended) { | ||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|             maxwell3d.CallMethod(0x8e4, 0, true); | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|             maxwell3d.replace_table.clear(); | ||||
|         } | ||||
|         maxwell3d.regs.vertex_buffer.first = 0; | ||||
|         maxwell3d.regs.vertex_buffer.count = 0; | ||||
|     } | ||||
|  | ||||
| private: | ||||
| @@ -134,13 +134,18 @@ private: | ||||
|  | ||||
|         const u32 base_instance = parameters[4]; | ||||
|         if (extended) { | ||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|             maxwell3d.CallMethod(0x8e4, base_instance, true); | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance); | ||||
|         } | ||||
|  | ||||
|         maxwell3d.draw_manager->DrawArray( | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | ||||
|             vertex_first, vertex_count, base_instance, instance_count); | ||||
|  | ||||
|         if (extended) { | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|             maxwell3d.replace_table.clear(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     bool extended; | ||||
| @@ -151,8 +156,7 @@ public: | ||||
|     explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {} | ||||
|  | ||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||
|         auto topology = | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]); | ||||
|         auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]); | ||||
|         if (!IsTopologySafe(topology)) { | ||||
|             Fallback(parameters); | ||||
|             return; | ||||
| @@ -164,16 +168,12 @@ public: | ||||
|             minimum_limit = std::max(parameters[3], minimum_limit); | ||||
|         } | ||||
|         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | ||||
|         const u32 base_size = std::max(minimum_limit, estimate); | ||||
|         const u32 element_base = parameters[4]; | ||||
|         const u32 base_instance = parameters[5]; | ||||
|         maxwell3d.regs.index_buffer.first = 0; | ||||
|         maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping | ||||
|         const u32 base_size = std::max<u32>(minimum_limit, estimate); | ||||
|         maxwell3d.regs.draw.topology.Assign(topology); | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|         maxwell3d.CallMethod(0x8e4, element_base, true); | ||||
|         maxwell3d.CallMethod(0x8e5, base_instance, true); | ||||
|         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||
|         maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|         params.is_indexed = true; | ||||
|         params.include_count = false; | ||||
| @@ -184,9 +184,8 @@ public: | ||||
|         params.stride = 0; | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | ||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|         maxwell3d.CallMethod(0x8e4, 0x0, true); | ||||
|         maxwell3d.CallMethod(0x8e5, 0x0, true); | ||||
|         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|         maxwell3d.replace_table.clear(); | ||||
|     } | ||||
|  | ||||
| private: | ||||
| @@ -197,18 +196,17 @@ private: | ||||
|         const u32 base_instance = parameters[5]; | ||||
|         maxwell3d.regs.vertex_id_base = element_base; | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|         maxwell3d.CallMethod(0x8e4, element_base, true); | ||||
|         maxwell3d.CallMethod(0x8e5, base_instance, true); | ||||
|         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||
|         maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||
|  | ||||
|         maxwell3d.draw_manager->DrawIndex( | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]), | ||||
|             parameters[3], parameters[1], element_base, base_instance, instance_count); | ||||
|  | ||||
|         maxwell3d.regs.vertex_id_base = 0x0; | ||||
|         maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|         maxwell3d.CallMethod(0x8e4, 0x0, true); | ||||
|         maxwell3d.CallMethod(0x8e5, 0x0, true); | ||||
|         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|         maxwell3d.replace_table.clear(); | ||||
|     } | ||||
|  | ||||
|     u32 minimum_limit{1 << 18}; | ||||
| @@ -238,8 +236,7 @@ public: | ||||
|         : HLEMacroImpl(maxwell3d_) {} | ||||
|  | ||||
|     void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override { | ||||
|         const auto topology = | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); | ||||
|         const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]); | ||||
|         if (!IsTopologySafe(topology)) { | ||||
|             Fallback(parameters); | ||||
|             return; | ||||
| @@ -277,9 +274,6 @@ public: | ||||
|         } | ||||
|         const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize()); | ||||
|         const u32 base_size = std::max(minimum_limit, estimate); | ||||
|  | ||||
|         maxwell3d.regs.index_buffer.first = 0; | ||||
|         maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size); | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         auto& params = maxwell3d.draw_manager->GetIndirectParams(); | ||||
|         params.is_indexed = true; | ||||
| @@ -290,7 +284,12 @@ public: | ||||
|         params.max_draw_counts = draw_count; | ||||
|         params.stride = stride; | ||||
|         maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit); | ||||
|         maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|         maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||
|         maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||
|         maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size); | ||||
|         maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|         maxwell3d.replace_table.clear(); | ||||
|     } | ||||
|  | ||||
| private: | ||||
| @@ -299,9 +298,8 @@ private: | ||||
|             // Clean everything. | ||||
|             // Clean everything. | ||||
|             maxwell3d.regs.vertex_id_base = 0x0; | ||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|             maxwell3d.CallMethod(0x8e4, 0x0, true); | ||||
|             maxwell3d.CallMethod(0x8e5, 0x0, true); | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::None; | ||||
|             maxwell3d.replace_table.clear(); | ||||
|         }); | ||||
|         maxwell3d.RefreshParameters(); | ||||
|         const u32 start_indirect = parameters[0]; | ||||
| @@ -310,8 +308,7 @@ private: | ||||
|             // Nothing to do. | ||||
|             return; | ||||
|         } | ||||
|         const auto topology = | ||||
|             static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]); | ||||
|         const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]); | ||||
|         maxwell3d.regs.draw.topology.Assign(topology); | ||||
|         const u32 padding = parameters[3]; | ||||
|         const std::size_t max_draws = parameters[4]; | ||||
| @@ -326,9 +323,9 @@ private: | ||||
|             const u32 base_vertex = parameters[base + 3]; | ||||
|             const u32 base_instance = parameters[base + 4]; | ||||
|             maxwell3d.regs.vertex_id_base = base_vertex; | ||||
|             maxwell3d.CallMethod(0x8e3, 0x640, true); | ||||
|             maxwell3d.CallMethod(0x8e4, base_vertex, true); | ||||
|             maxwell3d.CallMethod(0x8e5, base_instance, true); | ||||
|             maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro; | ||||
|             maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex); | ||||
|             maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance); | ||||
|             maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true; | ||||
|             maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base], | ||||
|                                               base_vertex, base_instance, parameters[base + 1]); | ||||
|   | ||||
| @@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const { | ||||
|     return range_so_far; | ||||
| } | ||||
|  | ||||
| size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | ||||
| size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const { | ||||
|     PTEKind base_kind = GetPageKind(gpu_addr); | ||||
|     if (base_kind == PTEKind::INVALID) { | ||||
|         return 0; | ||||
| @@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | ||||
|             return true; | ||||
|         } | ||||
|         range_so_far += copy_amount; | ||||
|         if (range_so_far >= max_size) { | ||||
|             result = true; | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     }; | ||||
|     auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||
| @@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const { | ||||
|             return true; | ||||
|         } | ||||
|         range_so_far += copy_amount; | ||||
|         if (range_so_far >= max_size) { | ||||
|             result = true; | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     }; | ||||
|     auto check_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||
|   | ||||
| @@ -118,7 +118,8 @@ public: | ||||
|  | ||||
|     PTEKind GetPageKind(GPUVAddr gpu_addr) const; | ||||
|  | ||||
|     size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const; | ||||
|     size_t GetMemoryLayoutSize(GPUVAddr gpu_addr, | ||||
|                                size_t max_size = std::numeric_limits<size_t>::max()) const; | ||||
|  | ||||
| private: | ||||
|     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||||
|   | ||||
| @@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, | ||||
|     smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0); | ||||
|     alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0); | ||||
|     alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0); | ||||
|     app_stage.Assign(maxwell3d.engine_state); | ||||
|  | ||||
|     for (size_t i = 0; i < regs.rt.size(); ++i) { | ||||
|         color_formats[i] = static_cast<u8>(regs.rt[i].format); | ||||
|   | ||||
| @@ -197,6 +197,7 @@ struct FixedPipelineState { | ||||
|         BitField<14, 1, u32> smooth_lines; | ||||
|         BitField<15, 1, u32> alpha_to_coverage_enabled; | ||||
|         BitField<16, 1, u32> alpha_to_one_enabled; | ||||
|         BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage; | ||||
|     }; | ||||
|     std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||||
|  | ||||
|   | ||||
| @@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment; | ||||
| using VideoCommon::GenericEnvironment; | ||||
| using VideoCommon::GraphicsEnvironment; | ||||
|  | ||||
| constexpr u32 CACHE_VERSION = 8; | ||||
| constexpr u32 CACHE_VERSION = 9; | ||||
|  | ||||
| template <typename Container> | ||||
| auto MakeSpan(Container& container) { | ||||
|   | ||||
| @@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | ||||
|     const u64 num_texture_types{static_cast<u64>(texture_types.size())}; | ||||
|     const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())}; | ||||
|     const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; | ||||
|     const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())}; | ||||
|  | ||||
|     file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) | ||||
|         .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) | ||||
|         .write(reinterpret_cast<const char*>(&num_texture_pixel_formats), | ||||
|                sizeof(num_texture_pixel_formats)) | ||||
|         .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||||
|         .write(reinterpret_cast<const char*>(&num_cbuf_replacement_values), | ||||
|                sizeof(num_cbuf_replacement_values)) | ||||
|         .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) | ||||
|         .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) | ||||
|         .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) | ||||
| @@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const { | ||||
|         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||
|             .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||||
|     } | ||||
|     for (const auto& [key, type] : cbuf_replacements) { | ||||
|         file.write(reinterpret_cast<const char*>(&key), sizeof(key)) | ||||
|             .write(reinterpret_cast<const char*>(&type), sizeof(type)); | ||||
|     } | ||||
|     if (stage == Shader::Stage::Compute) { | ||||
|         file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) | ||||
|             .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||||
| @@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|     ASSERT(local_size <= std::numeric_limits<u32>::max()); | ||||
|     local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size; | ||||
|     texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot; | ||||
|     has_hle_engine_state = | ||||
|         maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro; | ||||
| } | ||||
|  | ||||
| u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||||
| @@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { | ||||
|     return value; | ||||
| } | ||||
|  | ||||
| std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank, | ||||
|                                                                                   u32 offset) { | ||||
|     if (!has_hle_engine_state) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); | ||||
|     auto it = maxwell3d->replace_table.find(key); | ||||
|     if (it == maxwell3d->replace_table.end()) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) { | ||||
|         switch (name) { | ||||
|         case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex: | ||||
|             return Shader::ReplaceConstant::BaseVertex; | ||||
|         case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance: | ||||
|             return Shader::ReplaceConstant::BaseInstance; | ||||
|         default: | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|     }(it->second); | ||||
|     cbuf_replacements.emplace(key, converted_value); | ||||
|     return converted_value; | ||||
| } | ||||
|  | ||||
| Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { | ||||
|     const auto& regs{maxwell3d->regs}; | ||||
|     const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding}; | ||||
| @@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | ||||
|     u64 num_texture_types{}; | ||||
|     u64 num_texture_pixel_formats{}; | ||||
|     u64 num_cbuf_values{}; | ||||
|     u64 num_cbuf_replacement_values{}; | ||||
|     file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) | ||||
|         .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) | ||||
|         .read(reinterpret_cast<char*>(&num_texture_pixel_formats), | ||||
|               sizeof(num_texture_pixel_formats)) | ||||
|         .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) | ||||
|         .read(reinterpret_cast<char*>(&num_cbuf_replacement_values), | ||||
|               sizeof(num_cbuf_replacement_values)) | ||||
|         .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) | ||||
|         .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) | ||||
|         .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) | ||||
| @@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) { | ||||
|             .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||||
|         cbuf_values.emplace(key, value); | ||||
|     } | ||||
|     for (size_t i = 0; i < num_cbuf_replacement_values; ++i) { | ||||
|         u64 key; | ||||
|         Shader::ReplaceConstant value; | ||||
|         file.read(reinterpret_cast<char*>(&key), sizeof(key)) | ||||
|             .read(reinterpret_cast<char*>(&value), sizeof(value)); | ||||
|         cbuf_replacements.emplace(key, value); | ||||
|     } | ||||
|     if (stage == Shader::Stage::Compute) { | ||||
|         file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) | ||||
|             .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); | ||||
| @@ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const { | ||||
|     return workgroup_size; | ||||
| } | ||||
|  | ||||
| std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank, | ||||
|                                                                               u32 offset) { | ||||
|     const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset); | ||||
|     auto it = cbuf_replacements.find(key); | ||||
|     if (it == cbuf_replacements.end()) { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|     return it->second; | ||||
| } | ||||
|  | ||||
| void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, | ||||
|                        const std::filesystem::path& filename, u32 cache_version) try { | ||||
|     std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); | ||||
|   | ||||
| @@ -60,6 +60,10 @@ public: | ||||
|  | ||||
|     void Serialize(std::ofstream& file) const; | ||||
|  | ||||
|     bool HasHLEMacroState() const override { | ||||
|         return has_hle_engine_state; | ||||
|     } | ||||
|  | ||||
| protected: | ||||
|     std::optional<u64> TryFindSize(); | ||||
|  | ||||
| @@ -73,6 +77,7 @@ protected: | ||||
|     std::unordered_map<u32, Shader::TextureType> texture_types; | ||||
|     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | ||||
|     std::unordered_map<u64, u32> cbuf_values; | ||||
|     std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements; | ||||
|  | ||||
|     u32 local_memory_size{}; | ||||
|     u32 texture_bound{}; | ||||
| @@ -89,6 +94,7 @@ protected: | ||||
|     u32 viewport_transform_state = 1; | ||||
|  | ||||
|     bool has_unbound_instructions = false; | ||||
|     bool has_hle_engine_state = false; | ||||
| }; | ||||
|  | ||||
| class GraphicsEnvironment final : public GenericEnvironment { | ||||
| @@ -109,6 +115,8 @@ public: | ||||
|  | ||||
|     u32 ReadViewportTransformState() override; | ||||
|  | ||||
|     std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override; | ||||
|  | ||||
| private: | ||||
|     Tegra::Engines::Maxwell3D* maxwell3d{}; | ||||
|     size_t stage_index{}; | ||||
| @@ -131,6 +139,11 @@ public: | ||||
|  | ||||
|     u32 ReadViewportTransformState() override; | ||||
|  | ||||
|     std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer( | ||||
|         [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override { | ||||
|         return std::nullopt; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     Tegra::Engines::KeplerCompute* kepler_compute{}; | ||||
| }; | ||||
| @@ -166,6 +179,13 @@ public: | ||||
|  | ||||
|     [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; | ||||
|  | ||||
|     [[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, | ||||
|                                                                                u32 offset) override; | ||||
|  | ||||
|     [[nodiscard]] bool HasHLEMacroState() const override { | ||||
|         return cbuf_replacements.size() != 0; | ||||
|     } | ||||
|  | ||||
|     void Dump(u64 hash) override; | ||||
|  | ||||
| private: | ||||
| @@ -173,6 +193,7 @@ private: | ||||
|     std::unordered_map<u32, Shader::TextureType> texture_types; | ||||
|     std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats; | ||||
|     std::unordered_map<u64, u32> cbuf_values; | ||||
|     std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements; | ||||
|     std::array<u32, 3> workgroup_size{}; | ||||
|     u32 local_memory_size{}; | ||||
|     u32 shared_memory_size{}; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user