shader_recompiler: Align SSBO offsets to meet host requirements
We can take advantage of SSBO addresses being passed in a constant bufer to account for the extra alignment requirements in the shader itself.
This commit is contained in:
		| @@ -223,7 +223,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | |||||||
|  |  | ||||||
|     Optimization::PositionPass(env, program); |     Optimization::PositionPass(env, program); | ||||||
|  |  | ||||||
|     Optimization::GlobalMemoryToStorageBufferPass(program); |     Optimization::GlobalMemoryToStorageBufferPass(program, host_info); | ||||||
|     Optimization::TexturePass(env, program, host_info); |     Optimization::TexturePass(env, program, host_info); | ||||||
|  |  | ||||||
|     if (Settings::values.resolution_info.active) { |     if (Settings::values.resolution_info.active) { | ||||||
|   | |||||||
| @@ -15,6 +15,7 @@ struct HostTranslateInfo { | |||||||
|     bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered |     bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered | ||||||
|     bool support_snorm_render_buffer{};  ///< True when the device supports SNORM render buffers |     bool support_snorm_render_buffer{};  ///< True when the device supports SNORM render buffers | ||||||
|     bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS |     bool support_viewport_index_layer{}; ///< True when the device supports gl_Layer in VS | ||||||
|  |     u32 min_ssbo_alignment{};  ///< Minimum alignment supported by the device for SSBOs | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } // namespace Shader | } // namespace Shader | ||||||
|   | |||||||
| @@ -11,6 +11,7 @@ | |||||||
| #include "shader_recompiler/frontend/ir/breadth_first_search.h" | #include "shader_recompiler/frontend/ir/breadth_first_search.h" | ||||||
| #include "shader_recompiler/frontend/ir/ir_emitter.h" | #include "shader_recompiler/frontend/ir/ir_emitter.h" | ||||||
| #include "shader_recompiler/frontend/ir/value.h" | #include "shader_recompiler/frontend/ir/value.h" | ||||||
|  | #include "shader_recompiler/host_translate_info.h" | ||||||
| #include "shader_recompiler/ir_opt/passes.h" | #include "shader_recompiler/ir_opt/passes.h" | ||||||
|  |  | ||||||
| namespace Shader::Optimization { | namespace Shader::Optimization { | ||||||
| @@ -402,7 +403,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) | |||||||
| } | } | ||||||
|  |  | ||||||
| /// Returns the offset in indices (not bytes) for an equivalent storage instruction | /// Returns the offset in indices (not bytes) for an equivalent storage instruction | ||||||
| IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer) { | IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) { | ||||||
|     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; |     IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; | ||||||
|     IR::U32 offset; |     IR::U32 offset; | ||||||
|     if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { |     if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) { | ||||||
| @@ -415,7 +416,10 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer | |||||||
|     } |     } | ||||||
|     // Subtract the least significant 32 bits from the guest offset. The result is the storage |     // Subtract the least significant 32 bits from the guest offset. The result is the storage | ||||||
|     // buffer offset in bytes. |     // buffer offset in bytes. | ||||||
|     const IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; |     IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))}; | ||||||
|  |  | ||||||
|  |     // Align the offset base to match the host alignment requirements | ||||||
|  |     low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U))); | ||||||
|     return ir.ISub(offset, low_cbuf); |     return ir.ISub(offset, low_cbuf); | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -510,7 +514,7 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | |||||||
| } | } | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| void GlobalMemoryToStorageBufferPass(IR::Program& program) { | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) { | ||||||
|     StorageInfo info; |     StorageInfo info; | ||||||
|     for (IR::Block* const block : program.post_order_blocks) { |     for (IR::Block* const block : program.post_order_blocks) { | ||||||
|         for (IR::Inst& inst : block->Instructions()) { |         for (IR::Inst& inst : block->Instructions()) { | ||||||
| @@ -534,7 +538,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | |||||||
|         const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; |         const IR::U32 index{IR::Value{static_cast<u32>(info.set.index_of(it))}}; | ||||||
|         IR::Block* const block{storage_inst.block}; |         IR::Block* const block{storage_inst.block}; | ||||||
|         IR::Inst* const inst{storage_inst.inst}; |         IR::Inst* const inst{storage_inst.inst}; | ||||||
|         const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer)}; |         const IR::U32 offset{StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)}; | ||||||
|         Replace(*block, *inst, index, offset); |         Replace(*block, *inst, index, offset); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ namespace Shader::Optimization { | |||||||
| void CollectShaderInfoPass(Environment& env, IR::Program& program); | void CollectShaderInfoPass(Environment& env, IR::Program& program); | ||||||
| void ConstantPropagationPass(Environment& env, IR::Program& program); | void ConstantPropagationPass(Environment& env, IR::Program& program); | ||||||
| void DeadCodeEliminationPass(IR::Program& program); | void DeadCodeEliminationPass(IR::Program& program); | ||||||
| void GlobalMemoryToStorageBufferPass(IR::Program& program); | void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info); | ||||||
| void IdentityRemovalPass(IR::Program& program); | void IdentityRemovalPass(IR::Program& program); | ||||||
| void LowerFp16ToFp32(IR::Program& program); | void LowerFp16ToFp32(IR::Program& program); | ||||||
| void LowerInt64ToInt32(IR::Program& program); | void LowerInt64ToInt32(IR::Program& program); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user