shader: Implement VertexA stage
This commit is contained in:
		| @@ -162,6 +162,7 @@ add_library(shader_recompiler STATIC | ||||
|     ir_opt/collect_shader_info_pass.cpp | ||||
|     ir_opt/constant_propagation_pass.cpp | ||||
|     ir_opt/dead_code_elimination_pass.cpp | ||||
|     ir_opt/dual_vertex_pass.cpp | ||||
|     ir_opt/global_memory_to_storage_buffer_pass.cpp | ||||
|     ir_opt/identity_removal_pass.cpp | ||||
|     ir_opt/lower_fp16_to_fp32.cpp | ||||
|   | ||||
| @@ -25,6 +25,7 @@ void EmitBranchConditional(EmitContext& ctx, Id condition, Id true_label, Id fal | ||||
| void EmitLoopMerge(EmitContext& ctx, Id merge_label, Id continue_label); | ||||
| void EmitSelectionMerge(EmitContext& ctx, Id merge_label); | ||||
| void EmitReturn(EmitContext& ctx); | ||||
| void EmitJoin(EmitContext& ctx); | ||||
| void EmitUnreachable(EmitContext& ctx); | ||||
| void EmitDemoteToHelperInvocation(EmitContext& ctx, Id continue_label); | ||||
| void EmitBarrier(EmitContext& ctx); | ||||
|   | ||||
| @@ -26,6 +26,10 @@ void EmitReturn(EmitContext& ctx) { | ||||
|     ctx.OpReturn(); | ||||
| } | ||||
|  | ||||
| void EmitJoin(EmitContext&) { | ||||
|     throw NotImplementedException("Join shouldn't be emitted"); | ||||
| } | ||||
|  | ||||
| void EmitUnreachable(EmitContext& ctx) { | ||||
|     ctx.OpUnreachable(); | ||||
| } | ||||
|   | ||||
| @@ -61,6 +61,7 @@ bool Inst::MayHaveSideEffects() const noexcept { | ||||
|     case Opcode::LoopMerge: | ||||
|     case Opcode::SelectionMerge: | ||||
|     case Opcode::Return: | ||||
|     case Opcode::Join: | ||||
|     case Opcode::Unreachable: | ||||
|     case Opcode::DemoteToHelperInvocation: | ||||
|     case Opcode::Barrier: | ||||
|   | ||||
| @@ -13,6 +13,7 @@ OPCODE(BranchConditional,                                   Void,           U1, | ||||
| OPCODE(LoopMerge,                                           Void,           Label,          Label,                                                          ) | ||||
| OPCODE(SelectionMerge,                                      Void,           Label,                                                                          ) | ||||
| OPCODE(Return,                                              Void,                                                                                           ) | ||||
| OPCODE(Join,                                                Void,                                                                                           ) | ||||
| OPCODE(Unreachable,                                         Void,                                                                                           ) | ||||
| OPCODE(DemoteToHelperInvocation,                            Void,           Label,                                                                          ) | ||||
|  | ||||
|   | ||||
| @@ -150,4 +150,32 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||
|     return program; | ||||
| } | ||||
|  | ||||
| IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||||
|                                     Environment& env2) { | ||||
|     IR::Program program{}; | ||||
|     Optimization::VertexATransformPass(vertex_a); | ||||
|     Optimization::VertexBTransformPass(vertex_b); | ||||
|     program.blocks.swap(vertex_a.blocks); | ||||
|     for (IR::Block* block : vertex_b.blocks) { | ||||
|         program.blocks.push_back(block); | ||||
|     } | ||||
|     program.stage = Stage::VertexB; | ||||
|     program.info = vertex_a.info; | ||||
|     program.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size); | ||||
|  | ||||
|     for (size_t index = 0; index < 32; index++) { | ||||
|         program.info.input_generics[index].used |= vertex_b.info.input_generics[index].used; | ||||
|         program.info.stores_generics[index] |= vertex_b.info.stores_generics[index]; | ||||
|     } | ||||
|     Optimization::JoinTextureInfo(program.info, vertex_b.info); | ||||
|     Optimization::JoinStorageInfo(program.info, vertex_b.info); | ||||
|     Optimization::DualVertexJoinPass(program); | ||||
|     program.post_order_blocks = PostOrder(program.blocks); | ||||
|     Optimization::DeadCodeEliminationPass(program); | ||||
|     Optimization::IdentityRemovalPass(program); | ||||
|     Optimization::VerificationPass(program); | ||||
|     Optimization::CollectShaderInfoPass(env2, program); | ||||
|     return program; | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
|   | ||||
| @@ -21,4 +21,6 @@ namespace Shader::Maxwell { | ||||
|                                            ObjectPool<IR::Block>& block_pool, Environment& env, | ||||
|                                            Flow::CFG& cfg); | ||||
|  | ||||
| [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||||
|                                                   Environment& env_vertex_b); | ||||
| } // namespace Shader::Maxwell | ||||
|   | ||||
							
								
								
									
										74
									
								
								src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/shader_recompiler/ir_opt/dual_vertex_pass.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,74 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <ranges> | ||||
| #include <tuple> | ||||
| #include <type_traits> | ||||
|  | ||||
| #include "common/bit_cast.h" | ||||
| #include "common/bit_util.h" | ||||
| #include "shader_recompiler/exception.h" | ||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
|  | ||||
| namespace Shader::Optimization { | ||||
|  | ||||
| void VertexATransformPass(IR::Program& program) { | ||||
|     bool replaced_join{}; | ||||
|     bool eliminated_epilogue{}; | ||||
|     for (IR::Block* const block : program.post_order_blocks) { | ||||
|         for (IR::Inst& inst : block->Instructions()) { | ||||
|             switch (inst.GetOpcode()) { | ||||
|             case IR::Opcode::Return: | ||||
|                 inst.ReplaceOpcode(IR::Opcode::Join); | ||||
|                 replaced_join = true; | ||||
|                 break; | ||||
|             case IR::Opcode::Epilogue: | ||||
|                 inst.Invalidate(); | ||||
|                 eliminated_epilogue = true; | ||||
|                 break; | ||||
|             default: | ||||
|                 break; | ||||
|             } | ||||
|             if (replaced_join && eliminated_epilogue) { | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void VertexBTransformPass(IR::Program& program) { | ||||
|     for (IR::Block* const block : program.post_order_blocks | std::views::reverse) { | ||||
|         for (IR::Inst& inst : block->Instructions()) { | ||||
|             if (inst.GetOpcode() == IR::Opcode::Prologue) { | ||||
|                 return inst.Invalidate(); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void DualVertexJoinPass(IR::Program& program) { | ||||
|     const auto& blocks = program.blocks; | ||||
|     s64 s = static_cast<s64>(blocks.size()) - 1; | ||||
|     if (s < 1) { | ||||
|         throw NotImplementedException("Dual Vertex Join pass failed, expected atleast 2 blocks!"); | ||||
|     } | ||||
|     for (s64 index = 0; index < s; index++) { | ||||
|         IR::Block* const current_block = blocks[index]; | ||||
|         IR::Block* const next_block = blocks[index + 1]; | ||||
|         for (IR::Inst& inst : current_block->Instructions()) { | ||||
|             if (inst.GetOpcode() == IR::Opcode::Join) { | ||||
|                 IR::IREmitter ir{*current_block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|                 ir.Branch(next_block); | ||||
|                 inst.Invalidate(); | ||||
|                 // only 1 join should exist | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     throw NotImplementedException("Dual Vertex Join pass failed, no join present!"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Optimization | ||||
| @@ -499,4 +499,30 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename Descriptors, typename Descriptor, typename Func> | ||||
| static u32 Add(Descriptors& descriptors, const Descriptor& desc, Func&& pred) { | ||||
|     // TODO: Handle arrays | ||||
|     const auto it{std::ranges::find_if(descriptors, pred)}; | ||||
|     if (it != descriptors.end()) { | ||||
|         return static_cast<u32>(std::distance(descriptors.begin(), it)); | ||||
|     } | ||||
|     descriptors.push_back(desc); | ||||
|     return static_cast<u32>(descriptors.size()) - 1; | ||||
| } | ||||
|  | ||||
| void JoinStorageInfo(Info& base, Info& source) { | ||||
|     auto& descriptors = base.storage_buffers_descriptors; | ||||
|     for (auto& desc : source.storage_buffers_descriptors) { | ||||
|         auto it{std::ranges::find_if(descriptors, [&desc](const auto& existing) { | ||||
|             return desc.cbuf_index == existing.cbuf_index && | ||||
|                    desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count; | ||||
|         })}; | ||||
|         if (it != descriptors.end()) { | ||||
|             it->is_written |= desc.is_written; | ||||
|             continue; | ||||
|         } | ||||
|         descriptors.push_back(desc); | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Optimization | ||||
|   | ||||
| @@ -22,4 +22,11 @@ void SsaRewritePass(IR::Program& program); | ||||
| void TexturePass(Environment& env, IR::Program& program); | ||||
| void VerificationPass(const IR::Program& program); | ||||
|  | ||||
| // Dual Vertex | ||||
| void VertexATransformPass(IR::Program& program); | ||||
| void VertexBTransformPass(IR::Program& program); | ||||
| void DualVertexJoinPass(IR::Program& program); | ||||
| void JoinTextureInfo(Info& base, Info& source); | ||||
| void JoinStorageInfo(Info& base, Info& source); | ||||
|  | ||||
| } // namespace Shader::Optimization | ||||
|   | ||||
| @@ -426,4 +426,25 @@ void TexturePass(Environment& env, IR::Program& program) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void JoinTextureInfo(Info& base, Info& source) { | ||||
|     Descriptors descriptors{ | ||||
|         base.texture_buffer_descriptors, | ||||
|         base.image_buffer_descriptors, | ||||
|         base.texture_descriptors, | ||||
|         base.image_descriptors, | ||||
|     }; | ||||
|     for (auto& desc : source.texture_buffer_descriptors) { | ||||
|         descriptors.Add(desc); | ||||
|     } | ||||
|     for (auto& desc : source.image_buffer_descriptors) { | ||||
|         descriptors.Add(desc); | ||||
|     } | ||||
|     for (auto& desc : source.texture_descriptors) { | ||||
|         descriptors.Add(desc); | ||||
|     } | ||||
|     for (auto& desc : source.image_descriptors) { | ||||
|         descriptors.Add(desc); | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Optimization | ||||
|   | ||||
| @@ -47,6 +47,7 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); | ||||
|  | ||||
| namespace { | ||||
| using Shader::Backend::SPIRV::EmitSPIRV; | ||||
| using Shader::Maxwell::MergeDualVertexPrograms; | ||||
| using Shader::Maxwell::TranslateProgram; | ||||
| using VideoCommon::ComputeEnvironment; | ||||
| using VideoCommon::FileEnvironment; | ||||
| @@ -287,22 +288,32 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | ||||
|     LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | ||||
|     size_t env_index{0}; | ||||
|     std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||||
|     bool uses_vertex_a{}; | ||||
|     std::size_t start_value_processing{}; | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         uses_vertex_a |= index == 0; | ||||
|         Shader::Environment& env{*envs[env_index]}; | ||||
|         ++env_index; | ||||
|  | ||||
|         const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | ||||
|         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); | ||||
|         programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||||
|         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | ||||
|         if (!uses_vertex_a || index != 1) { | ||||
|             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||||
|             continue; | ||||
|         } | ||||
|         Shader::IR::Program& program_va{programs[0]}; | ||||
|         Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||||
|         programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||||
|         start_value_processing = 1; | ||||
|     } | ||||
|     std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | ||||
|     std::array<vk::ShaderModule, Maxwell::MaxShaderStage> modules; | ||||
|  | ||||
|     u32 binding{0}; | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|     for (size_t index = start_value_processing; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user