shader: Support SSA loops on IR
This commit is contained in:
		| @@ -32,6 +32,8 @@ add_executable(shader_recompiler | ||||
|     frontend/ir/opcodes.cpp | ||||
|     frontend/ir/opcodes.h | ||||
|     frontend/ir/opcodes.inc | ||||
|     frontend/ir/post_order.cpp | ||||
|     frontend/ir/post_order.h | ||||
|     frontend/ir/pred.h | ||||
|     frontend/ir/program.cpp | ||||
|     frontend/ir/program.h | ||||
|   | ||||
| @@ -159,10 +159,10 @@ private: | ||||
|     Id EmitWorkgroupId(EmitContext& ctx); | ||||
|     Id EmitLocalInvocationId(EmitContext& ctx); | ||||
|     Id EmitUndefU1(EmitContext& ctx); | ||||
|     void EmitUndefU8(EmitContext& ctx); | ||||
|     void EmitUndefU16(EmitContext& ctx); | ||||
|     void EmitUndefU32(EmitContext& ctx); | ||||
|     void EmitUndefU64(EmitContext& ctx); | ||||
|     Id EmitUndefU8(EmitContext& ctx); | ||||
|     Id EmitUndefU16(EmitContext& ctx); | ||||
|     Id EmitUndefU32(EmitContext& ctx); | ||||
|     Id EmitUndefU64(EmitContext& ctx); | ||||
|     void EmitLoadGlobalU8(EmitContext& ctx); | ||||
|     void EmitLoadGlobalS8(EmitContext& ctx); | ||||
|     void EmitLoadGlobalU16(EmitContext& ctx); | ||||
| @@ -297,12 +297,12 @@ private: | ||||
|     void EmitBitFieldInsert(EmitContext& ctx); | ||||
|     void EmitBitFieldSExtract(EmitContext& ctx); | ||||
|     Id EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id count); | ||||
|     void EmitSLessThan(EmitContext& ctx); | ||||
|     Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); | ||||
|     void EmitULessThan(EmitContext& ctx); | ||||
|     void EmitIEqual(EmitContext& ctx); | ||||
|     void EmitSLessThanEqual(EmitContext& ctx); | ||||
|     void EmitULessThanEqual(EmitContext& ctx); | ||||
|     void EmitSGreaterThan(EmitContext& ctx); | ||||
|     Id EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | ||||
|     void EmitUGreaterThan(EmitContext& ctx); | ||||
|     void EmitINotEqual(EmitContext& ctx); | ||||
|     void EmitSGreaterThanEqual(EmitContext& ctx); | ||||
|   | ||||
| @@ -73,8 +73,8 @@ Id EmitSPIRV::EmitBitFieldUExtract(EmitContext& ctx, Id base, Id offset, Id coun | ||||
|     return ctx.OpBitFieldUExtract(ctx.u32[1], base, offset, count); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitSLessThan(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitSPIRV::EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { | ||||
|     return ctx.OpSLessThan(ctx.u1, lhs, rhs); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitULessThan(EmitContext&) { | ||||
| @@ -93,8 +93,8 @@ void EmitSPIRV::EmitULessThanEqual(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitSGreaterThan(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitSPIRV::EmitSGreaterThan(EmitContext& ctx, Id lhs, Id rhs) { | ||||
|     return ctx.OpSGreaterThan(ctx.u1, lhs, rhs); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitUGreaterThan(EmitContext&) { | ||||
|   | ||||
| @@ -10,19 +10,19 @@ Id EmitSPIRV::EmitUndefU1(EmitContext& ctx) { | ||||
|     return ctx.OpUndef(ctx.u1); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitUndefU8(EmitContext&) { | ||||
| Id EmitSPIRV::EmitUndefU8(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitUndefU16(EmitContext&) { | ||||
| Id EmitSPIRV::EmitUndefU16(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitUndefU32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitSPIRV::EmitUndefU32(EmitContext& ctx) { | ||||
|     return ctx.OpUndef(ctx.u32[1]); | ||||
| } | ||||
|  | ||||
| void EmitSPIRV::EmitUndefU64(EmitContext&) { | ||||
| Id EmitSPIRV::EmitUndefU64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -12,6 +12,7 @@ namespace Shader::IR { | ||||
|  | ||||
| struct Function { | ||||
|     BlockList blocks; | ||||
|     BlockList post_order_blocks; | ||||
| }; | ||||
|  | ||||
| } // namespace Shader::IR | ||||
|   | ||||
							
								
								
									
										48
									
								
								src/shader_recompiler/frontend/ir/post_order.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								src/shader_recompiler/frontend/ir/post_order.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <boost/container/flat_set.hpp> | ||||
| #include <boost/container/small_vector.hpp> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/post_order.h" | ||||
|  | ||||
| namespace Shader::IR { | ||||
|  | ||||
| BlockList PostOrder(const BlockList& blocks) { | ||||
|     boost::container::small_vector<Block*, 16> block_stack; | ||||
|     boost::container::flat_set<Block*> visited; | ||||
|  | ||||
|     BlockList post_order_blocks; | ||||
|     post_order_blocks.reserve(blocks.size()); | ||||
|  | ||||
|     Block* const first_block{blocks.front()}; | ||||
|     visited.insert(first_block); | ||||
|     block_stack.push_back(first_block); | ||||
|  | ||||
|     const auto visit_branch = [&](Block* block, Block* branch) { | ||||
|         if (!branch) { | ||||
|             return false; | ||||
|         } | ||||
|         if (!visited.insert(branch).second) { | ||||
|             return false; | ||||
|         } | ||||
|         // Calling push_back twice is faster than insert on msvc | ||||
|         block_stack.push_back(block); | ||||
|         block_stack.push_back(branch); | ||||
|         return true; | ||||
|     }; | ||||
|     while (!block_stack.empty()) { | ||||
|         Block* const block{block_stack.back()}; | ||||
|         block_stack.pop_back(); | ||||
|  | ||||
|         if (!visit_branch(block, block->TrueBranch()) && | ||||
|             !visit_branch(block, block->FalseBranch())) { | ||||
|             post_order_blocks.push_back(block); | ||||
|         } | ||||
|     } | ||||
|     return post_order_blocks; | ||||
| } | ||||
|  | ||||
| } // namespace Shader::IR | ||||
							
								
								
									
										13
									
								
								src/shader_recompiler/frontend/ir/post_order.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								src/shader_recompiler/frontend/ir/post_order.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,13 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
|  | ||||
| namespace Shader::IR { | ||||
|  | ||||
| BlockList PostOrder(const BlockList& blocks); | ||||
|  | ||||
| } // namespace Shader::IR | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <vector> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/post_order.h" | ||||
| #include "shader_recompiler/frontend/ir/structured_control_flow.h" | ||||
| #include "shader_recompiler/frontend/maxwell/program.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||||
| @@ -56,11 +57,14 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||
|     } | ||||
|  | ||||
|     fmt::print(stdout, "No optimizations: {}", IR::DumpProgram(program)); | ||||
|     std::ranges::for_each(functions, Optimization::SsaRewritePass); | ||||
|     for (IR::Function& function : functions) { | ||||
|         Optimization::Invoke(Optimization::GlobalMemoryToStorageBufferPass, function); | ||||
|         Optimization::Invoke(Optimization::ConstantPropagationPass, function); | ||||
|         Optimization::Invoke(Optimization::DeadCodeEliminationPass, function); | ||||
|         function.post_order_blocks = PostOrder(function.blocks); | ||||
|         Optimization::SsaRewritePass(function.post_order_blocks); | ||||
|     } | ||||
|     for (IR::Function& function : functions) { | ||||
|         Optimization::PostOrderInvoke(Optimization::GlobalMemoryToStorageBufferPass, function); | ||||
|         Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function); | ||||
|         Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function); | ||||
|         Optimization::IdentityRemovalPass(function); | ||||
|         Optimization::VerificationPass(function); | ||||
|     } | ||||
|   | ||||
| @@ -13,7 +13,7 @@ namespace Shader::Optimization { | ||||
| void DeadCodeEliminationPass(IR::Block& block) { | ||||
|     // We iterate over the instructions in reverse order. | ||||
|     // This is because removing an instruction reduces the number of uses for earlier instructions. | ||||
|     for (IR::Inst& inst : std::views::reverse(block)) { | ||||
|     for (IR::Inst& inst : block | std::views::reverse) { | ||||
|         if (!inst.HasUses() && !inst.MayHaveSideEffects()) { | ||||
|             inst.Invalidate(); | ||||
|         } | ||||
|   | ||||
| @@ -4,14 +4,16 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <span> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/function.h" | ||||
|  | ||||
| namespace Shader::Optimization { | ||||
|  | ||||
| template <typename Func> | ||||
| void Invoke(Func&& func, IR::Function& function) { | ||||
|     for (const auto& block : function.blocks) { | ||||
| void PostOrderInvoke(Func&& func, IR::Function& function) { | ||||
|     for (const auto& block : function.post_order_blocks) { | ||||
|         func(*block); | ||||
|     } | ||||
| } | ||||
| @@ -20,7 +22,7 @@ void ConstantPropagationPass(IR::Block& block); | ||||
| void DeadCodeEliminationPass(IR::Block& block); | ||||
| void GlobalMemoryToStorageBufferPass(IR::Block& block); | ||||
| void IdentityRemovalPass(IR::Function& function); | ||||
| void SsaRewritePass(IR::Function& function); | ||||
| void SsaRewritePass(std::span<IR::Block* const> post_order_blocks); | ||||
| void VerificationPass(const IR::Function& function); | ||||
|  | ||||
| } // namespace Shader::Optimization | ||||
|   | ||||
| @@ -14,7 +14,13 @@ | ||||
| //      https://link.springer.com/chapter/10.1007/978-3-642-37051-9_6 | ||||
| // | ||||
|  | ||||
| #include <ranges> | ||||
| #include <span> | ||||
| #include <variant> | ||||
| #include <vector> | ||||
|  | ||||
| #include <boost/container/flat_map.hpp> | ||||
| #include <boost/container/flat_set.hpp> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/function.h" | ||||
| @@ -26,9 +32,9 @@ | ||||
|  | ||||
| namespace Shader::Optimization { | ||||
| namespace { | ||||
| using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; | ||||
|  | ||||
| struct FlagTag {}; | ||||
| struct FlagTag { | ||||
|     auto operator<=>(const FlagTag&) const noexcept = default; | ||||
| }; | ||||
| struct ZeroFlagTag : FlagTag {}; | ||||
| struct SignFlagTag : FlagTag {}; | ||||
| struct CarryFlagTag : FlagTag {}; | ||||
| @@ -38,9 +44,15 @@ struct GotoVariable : FlagTag { | ||||
|     GotoVariable() = default; | ||||
|     explicit GotoVariable(u32 index_) : index{index_} {} | ||||
|  | ||||
|     auto operator<=>(const GotoVariable&) const noexcept = default; | ||||
|  | ||||
|     u32 index; | ||||
| }; | ||||
|  | ||||
| using Variant = std::variant<IR::Reg, IR::Pred, ZeroFlagTag, SignFlagTag, CarryFlagTag, | ||||
|                              OverflowFlagTag, GotoVariable>; | ||||
| using ValueMap = boost::container::flat_map<IR::Block*, IR::Value, std::less<IR::Block*>>; | ||||
|  | ||||
| struct DefTable { | ||||
|     [[nodiscard]] ValueMap& operator[](IR::Reg variable) noexcept { | ||||
|         return regs[IR::RegIndex(variable)]; | ||||
| @@ -102,19 +114,35 @@ public: | ||||
|     } | ||||
|  | ||||
|     IR::Value ReadVariable(auto variable, IR::Block* block) { | ||||
|         auto& def{current_def[variable]}; | ||||
|         const ValueMap& def{current_def[variable]}; | ||||
|         if (const auto it{def.find(block)}; it != def.end()) { | ||||
|             return it->second; | ||||
|         } | ||||
|         return ReadVariableRecursive(variable, block); | ||||
|     } | ||||
|  | ||||
|     void SealBlock(IR::Block* block) { | ||||
|         const auto it{incomplete_phis.find(block)}; | ||||
|         if (it != incomplete_phis.end()) { | ||||
|             for (auto& [variant, phi] : it->second) { | ||||
|                 std::visit([&](auto& variable) { AddPhiOperands(variable, *phi, block); }, variant); | ||||
|             } | ||||
|         } | ||||
|         sealed_blocks.insert(block); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     IR::Value ReadVariableRecursive(auto variable, IR::Block* block) { | ||||
|         IR::Value val; | ||||
|         if (const std::span preds{block->ImmediatePredecessors()}; preds.size() == 1) { | ||||
|         if (!sealed_blocks.contains(block)) { | ||||
|             // Incomplete CFG | ||||
|             IR::Inst* phi{&*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; | ||||
|             incomplete_phis[block].insert_or_assign(variable, phi); | ||||
|             val = IR::Value{&*phi}; | ||||
|         } else if (const std::span imm_preds{block->ImmediatePredecessors()}; | ||||
|                    imm_preds.size() == 1) { | ||||
|             // Optimize the common case of one predecessor: no phi needed | ||||
|             val = ReadVariable(variable, preds.front()); | ||||
|             val = ReadVariable(variable, imm_preds.front()); | ||||
|         } else { | ||||
|             // Break potential cycles with operandless phi | ||||
|             IR::Inst& phi_inst{*block->PrependNewInst(block->begin(), IR::Opcode::Phi)}; | ||||
| @@ -127,8 +155,8 @@ private: | ||||
|     } | ||||
|  | ||||
|     IR::Value AddPhiOperands(auto variable, IR::Inst& phi, IR::Block* block) { | ||||
|         for (IR::Block* const pred : block->ImmediatePredecessors()) { | ||||
|             phi.AddPhiOperand(pred, ReadVariable(variable, pred)); | ||||
|         for (IR::Block* const imm_pred : block->ImmediatePredecessors()) { | ||||
|             phi.AddPhiOperand(imm_pred, ReadVariable(variable, imm_pred)); | ||||
|         } | ||||
|         return TryRemoveTrivialPhi(phi, block, UndefOpcode(variable)); | ||||
|     } | ||||
| @@ -159,6 +187,9 @@ private: | ||||
|         return same; | ||||
|     } | ||||
|  | ||||
|     boost::container::flat_set<IR::Block*> sealed_blocks; | ||||
|     boost::container::flat_map<IR::Block*, boost::container::flat_map<Variant, IR::Inst*>> | ||||
|         incomplete_phis; | ||||
|     DefTable current_def; | ||||
| }; | ||||
|  | ||||
| @@ -218,14 +249,19 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) { | ||||
|         break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void VisitBlock(Pass& pass, IR::Block* block) { | ||||
|     for (IR::Inst& inst : block->Instructions()) { | ||||
|         VisitInst(pass, block, inst); | ||||
|     } | ||||
|     pass.SealBlock(block); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void SsaRewritePass(IR::Function& function) { | ||||
| void SsaRewritePass(std::span<IR::Block* const> post_order_blocks) { | ||||
|     Pass pass; | ||||
|     for (IR::Block* const block : function.blocks) { | ||||
|         for (IR::Inst& inst : block->Instructions()) { | ||||
|             VisitInst(pass, block, inst); | ||||
|         } | ||||
|     for (IR::Block* const block : post_order_blocks | std::views::reverse) { | ||||
|         VisitBlock(pass, block); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -69,14 +69,12 @@ int main() { | ||||
|  | ||||
|     // FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; | ||||
|     FileEnvironment env{"D:\\Shaders\\shader.bin"}; | ||||
|     for (int i = 0; i < 1; ++i) { | ||||
|         block_pool->ReleaseContents(); | ||||
|         inst_pool->ReleaseContents(); | ||||
|         flow_block_pool->ReleaseContents(); | ||||
|         Flow::CFG cfg{env, *flow_block_pool, 0}; | ||||
|         fmt::print(stdout, "{}\n", cfg.Dot()); | ||||
|         IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; | ||||
|         fmt::print(stdout, "{}\n", IR::DumpProgram(program)); | ||||
|         Backend::SPIRV::EmitSPIRV spirv{program}; | ||||
|     } | ||||
|     block_pool->ReleaseContents(); | ||||
|     inst_pool->ReleaseContents(); | ||||
|     flow_block_pool->ReleaseContents(); | ||||
|     Flow::CFG cfg{env, *flow_block_pool, 0}; | ||||
|     fmt::print(stdout, "{}\n", cfg.Dot()); | ||||
|     IR::Program program{TranslateProgram(*inst_pool, *block_pool, env, cfg)}; | ||||
|     fmt::print(stdout, "{}\n", IR::DumpProgram(program)); | ||||
|     // Backend::SPIRV::EmitSPIRV spirv{program}; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user