shader_recompiler: Align SSBO offsets in GlobalMemory functions
This commit is contained in:
		| @@ -5,6 +5,7 @@ | |||||||
| #include "shader_recompiler/backend/glasm/glasm_emit_context.h" | #include "shader_recompiler/backend/glasm/glasm_emit_context.h" | ||||||
| #include "shader_recompiler/frontend/ir/program.h" | #include "shader_recompiler/frontend/ir/program.h" | ||||||
| #include "shader_recompiler/frontend/ir/value.h" | #include "shader_recompiler/frontend/ir/value.h" | ||||||
|  | #include "shader_recompiler/profile.h" | ||||||
| #include "shader_recompiler/runtime_info.h" | #include "shader_recompiler/runtime_info.h" | ||||||
|  |  | ||||||
| namespace Shader::Backend::GLASM { | namespace Shader::Backend::GLASM { | ||||||
| @@ -35,7 +36,9 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|         const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; |         const auto& ssbo{ctx.info.storage_buffers_descriptors[index]}; | ||||||
|         ctx.Add("LDC.U64 DC.x,c{}[{}];"    // ssbo_addr |         const u64 ssbo_align_mask{~(ctx.profile.min_ssbo_alignment - 1U)}; | ||||||
|  |         ctx.Add("LDC.U64 DC.x,c{}[{}];"    // unaligned_ssbo_addr | ||||||
|  |                 "AND.U64 DC.x,DC.x,{};"    // ssbo_addr = unaligned_ssbo_addr & ssbo_align_mask | ||||||
|                 "LDC.U32 RC.x,c{}[{}];"    // ssbo_size_u32 |                 "LDC.U32 RC.x,c{}[{}];"    // ssbo_size_u32 | ||||||
|                 "CVT.U64.U32 DC.y,RC.x;"   // ssbo_size = ssbo_size_u32 |                 "CVT.U64.U32 DC.y,RC.x;"   // ssbo_size = ssbo_size_u32 | ||||||
|                 "ADD.U64 DC.y,DC.y,DC.x;"  // ssbo_end = ssbo_addr + ssbo_size |                 "ADD.U64 DC.y,DC.y,DC.x;"  // ssbo_end = ssbo_addr + ssbo_size | ||||||
| @@ -44,8 +47,8 @@ void GlobalStorageOp(EmitContext& ctx, Register address, bool pointer_based, std | |||||||
|                 "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b |                 "AND.U.CC RC.x,RC.x,RC.y;" // cond = a && b | ||||||
|                 "IF NE.x;"                 // if cond |                 "IF NE.x;"                 // if cond | ||||||
|                 "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr |                 "SUB.U64 DC.x,{}.x,DC.x;", // offset = input_addr - ssbo_addr | ||||||
|                 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo.cbuf_index, ssbo.cbuf_offset + 8, address, |                 ssbo.cbuf_index, ssbo.cbuf_offset, ssbo_align_mask, ssbo.cbuf_index, | ||||||
|                 address, address); |                 ssbo.cbuf_offset + 8, address, address, address); | ||||||
|         if (pointer_based) { |         if (pointer_based) { | ||||||
|             ctx.Add("PK64.U DC.y,c[{}];"      // host_ssbo = cbuf |             ctx.Add("PK64.U DC.y,c[{}];"      // host_ssbo = cbuf | ||||||
|                     "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset |                     "ADD.U64 DC.x,DC.x,DC.y;" // host_addr = host_ssbo + offset | ||||||
|   | |||||||
| @@ -601,7 +601,10 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | |||||||
|             addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); |             addr_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, addr_loc / 16, Swizzle(addr_loc)); | ||||||
|             size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); |             size_xy[i] = fmt::format("ftou({}[{}].{})", cbuf, size_loc / 16, Swizzle(size_loc)); | ||||||
|         } |         } | ||||||
|         const auto addr_pack{fmt::format("packUint2x32(uvec2({},{}))", addr_xy[0], addr_xy[1])}; |         const u32 ssbo_align_mask{~(static_cast<u32>(profile.min_ssbo_alignment) - 1U)}; | ||||||
|  |         const auto aligned_low_addr{fmt::format("{}&{}", addr_xy[0], ssbo_align_mask)}; | ||||||
|  |         const auto aligned_addr{fmt::format("uvec2({},{})", aligned_low_addr, addr_xy[1])}; | ||||||
|  |         const auto addr_pack{fmt::format("packUint2x32({})", aligned_addr)}; | ||||||
|         const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; |         const auto addr_statment{fmt::format("uint64_t {}={};", ssbo_addr, addr_pack)}; | ||||||
|         func += addr_statment; |         func += addr_statment; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -891,7 +891,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | |||||||
|             const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, |             const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | ||||||
|                                                      zero, ssbo_size_cbuf_offset)}; |                                                      zero, ssbo_size_cbuf_offset)}; | ||||||
|  |  | ||||||
|             const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; |             const u64 ssbo_align_mask{~(profile.min_ssbo_alignment - 1U)}; | ||||||
|  |             const Id unaligned_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||||||
|  |             const Id ssbo_addr{OpBitwiseAnd(U64, unaligned_addr, Constant(U64, ssbo_align_mask))}; | ||||||
|             const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; |             const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | ||||||
|             const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; |             const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | ||||||
|             const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), |             const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | ||||||
|   | |||||||
| @@ -84,6 +84,8 @@ struct Profile { | |||||||
|  |  | ||||||
|     /// Maxwell and earlier nVidia architectures have broken robust support |     /// Maxwell and earlier nVidia architectures have broken robust support | ||||||
|     bool has_broken_robust{}; |     bool has_broken_robust{}; | ||||||
|  |  | ||||||
|  |     u64 min_ssbo_alignment{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } // namespace Shader | } // namespace Shader | ||||||
|   | |||||||
| @@ -1796,7 +1796,8 @@ Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr, u32 cbuf_index, | |||||||
|         return NULL_BINDING; |         return NULL_BINDING; | ||||||
|     } |     } | ||||||
|     const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |     const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|     ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", cbuf_index); |     ASSERT_MSG(cpu_addr, "Unaligned storage buffer address not found for cbuf index {}", | ||||||
|  |                cbuf_index); | ||||||
|     // The end address used for size calculation does not need to be aligned |     // The end address used for size calculation does not need to be aligned | ||||||
|     const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); |     const VAddr cpu_end = Common::AlignUp(*cpu_addr + size, Core::Memory::YUZU_PAGESIZE); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||||||
|           .has_gl_bool_ref_bug = device.HasBoolRefBug(), |           .has_gl_bool_ref_bug = device.HasBoolRefBug(), | ||||||
|           .ignore_nan_fp_comparisons = true, |           .ignore_nan_fp_comparisons = true, | ||||||
|           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), |           .gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(), | ||||||
|  |           .min_ssbo_alignment = device.GetShaderStorageBufferAlignment(), | ||||||
|       }, |       }, | ||||||
|       host_info{ |       host_info{ | ||||||
|           .support_float64 = true, |           .support_float64 = true, | ||||||
|   | |||||||
| @@ -359,6 +359,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device | |||||||
|             driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, |             driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY, | ||||||
|         .has_broken_robust = |         .has_broken_robust = | ||||||
|             device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, |             device.IsNvidia() && device.GetNvidiaArch() <= NvidiaArchitecture::Arch_Pascal, | ||||||
|  |         .min_ssbo_alignment = device.GetStorageBufferAlignment(), | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     host_info = Shader::HostTranslateInfo{ |     host_info = Shader::HostTranslateInfo{ | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user