shader: Implement ATOM/S and RED
This commit is contained in:
		| @@ -3,6 +3,7 @@ add_library(shader_recompiler STATIC | ||||
|     backend/spirv/emit_context.h | ||||
|     backend/spirv/emit_spirv.cpp | ||||
|     backend/spirv/emit_spirv.h | ||||
|     backend/spirv/emit_spirv_atomic.cpp | ||||
|     backend/spirv/emit_spirv_barriers.cpp | ||||
|     backend/spirv/emit_spirv_bitwise_conversion.cpp | ||||
|     backend/spirv/emit_spirv_composite.cpp | ||||
| @@ -65,6 +66,8 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/maxwell/program.h | ||||
|     frontend/maxwell/structured_control_flow.cpp | ||||
|     frontend/maxwell/structured_control_flow.h | ||||
|     frontend/maxwell/translate/impl/atomic_operations_global_memory.cpp | ||||
|     frontend/maxwell/translate/impl/atomic_operations_shared_memory.cpp | ||||
|     frontend/maxwell/translate/impl/attribute_memory_to_physical.cpp | ||||
|     frontend/maxwell/translate/impl/barrier_operations.cpp | ||||
|     frontend/maxwell/translate/impl/bitfield_extract.cpp | ||||
|   | ||||
| @@ -15,6 +15,53 @@ | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| enum class CasFunctionType { | ||||
|     Increment, | ||||
|     Decrement, | ||||
|     FPAdd, | ||||
|     FPMin, | ||||
|     FPMax, | ||||
| }; | ||||
|  | ||||
| Id CasFunction(EmitContext& ctx, CasFunctionType function_type, Id value_type) { | ||||
|     const Id func_type{ctx.TypeFunction(value_type, value_type, value_type)}; | ||||
|     const Id func{ctx.OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||||
|     const Id op_a{ctx.OpFunctionParameter(value_type)}; | ||||
|     const Id op_b{ctx.OpFunctionParameter(value_type)}; | ||||
|     ctx.AddLabel(); | ||||
|     Id result{}; | ||||
|     switch (function_type) { | ||||
|     case CasFunctionType::Increment: { | ||||
|         const Id pred{ctx.OpUGreaterThanEqual(ctx.U1, op_a, op_b)}; | ||||
|         const Id incr{ctx.OpIAdd(value_type, op_a, ctx.Constant(value_type, 1))}; | ||||
|         result = ctx.OpSelect(value_type, pred, ctx.u32_zero_value, incr); | ||||
|         break; | ||||
|     } | ||||
|     case CasFunctionType::Decrement: { | ||||
|         const Id lhs{ctx.OpIEqual(ctx.U1, op_a, ctx.Constant(value_type, 0u))}; | ||||
|         const Id rhs{ctx.OpUGreaterThan(ctx.U1, op_a, op_b)}; | ||||
|         const Id pred{ctx.OpLogicalOr(ctx.U1, lhs, rhs)}; | ||||
|         const Id decr{ctx.OpISub(value_type, op_a, ctx.Constant(value_type, 1))}; | ||||
|         result = ctx.OpSelect(value_type, pred, op_b, decr); | ||||
|         break; | ||||
|     } | ||||
|     case CasFunctionType::FPAdd: | ||||
|         result = ctx.OpFAdd(value_type, op_a, op_b); | ||||
|         break; | ||||
|     case CasFunctionType::FPMin: | ||||
|         result = ctx.OpFMin(value_type, op_a, op_b); | ||||
|         break; | ||||
|     case CasFunctionType::FPMax: | ||||
|         result = ctx.OpFMax(value_type, op_a, op_b); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|     ctx.OpReturnValue(result); | ||||
|     ctx.OpFunctionEnd(); | ||||
|     return func; | ||||
| } | ||||
|  | ||||
| Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) { | ||||
|     const spv::ImageFormat format{spv::ImageFormat::Unknown}; | ||||
|     const Id type{ctx.F32[1]}; | ||||
| @@ -196,6 +243,56 @@ Id EmitContext::Def(const IR::Value& value) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| Id EmitContext::CasLoop(Id function, CasPointerType pointer_type, Id value_type) { | ||||
|     const Id loop_header{OpLabel()}; | ||||
|     const Id continue_block{OpLabel()}; | ||||
|     const Id merge_block{OpLabel()}; | ||||
|     const Id storage_type{pointer_type == CasPointerType::Shared ? shared_memory_u32_type | ||||
|                                                                  : storage_memory_u32}; | ||||
|     const Id func_type{TypeFunction(value_type, U32[1], value_type, storage_type)}; | ||||
|     const Id func{OpFunction(value_type, spv::FunctionControlMask::MaskNone, func_type)}; | ||||
|     const Id index{OpFunctionParameter(U32[1])}; | ||||
|     const Id op_b{OpFunctionParameter(value_type)}; | ||||
|     const Id base{OpFunctionParameter(storage_type)}; | ||||
|     AddLabel(); | ||||
|     const Id one{Constant(U32[1], 1)}; | ||||
|     OpBranch(loop_header); | ||||
|     AddLabel(loop_header); | ||||
|     OpLoopMerge(merge_block, continue_block, spv::LoopControlMask::MaskNone); | ||||
|     OpBranch(continue_block); | ||||
|  | ||||
|     AddLabel(continue_block); | ||||
|     const Id word_pointer{pointer_type == CasPointerType::Shared | ||||
|                               ? OpAccessChain(shared_u32, base, index) | ||||
|                               : OpAccessChain(storage_u32, base, u32_zero_value, index)}; | ||||
|     if (value_type.value == F32[2].value) { | ||||
|         const Id u32_value{OpLoad(U32[1], word_pointer)}; | ||||
|         const Id value{OpUnpackHalf2x16(F32[2], u32_value)}; | ||||
|         const Id new_value{OpFunctionCall(value_type, function, value, op_b)}; | ||||
|         const Id u32_new_value{OpPackHalf2x16(U32[1], new_value)}; | ||||
|         const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, | ||||
|                                                     u32_zero_value, u32_new_value, u32_value)}; | ||||
|         const Id success{OpIEqual(U1, atomic_res, u32_value)}; | ||||
|         OpBranchConditional(success, merge_block, loop_header); | ||||
|  | ||||
|         AddLabel(merge_block); | ||||
|         OpReturnValue(OpUnpackHalf2x16(F32[2], atomic_res)); | ||||
|     } else { | ||||
|         const Id value{OpLoad(U32[1], word_pointer)}; | ||||
|         const Id new_value{OpBitcast( | ||||
|             U32[1], OpFunctionCall(value_type, function, OpBitcast(value_type, value), op_b))}; | ||||
|         const Id atomic_res{OpAtomicCompareExchange(U32[1], word_pointer, one, u32_zero_value, | ||||
|                                                     u32_zero_value, new_value, value)}; | ||||
|         const Id success{OpIEqual(U1, atomic_res, value)}; | ||||
|         OpBranchConditional(success, merge_block, loop_header); | ||||
|  | ||||
|         AddLabel(merge_block); | ||||
|         OpReturnValue(OpBitcast(value_type, atomic_res)); | ||||
|     } | ||||
|     OpFunctionEnd(); | ||||
|     return func; | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineCommonTypes(const Info& info) { | ||||
|     void_id = TypeVoid(); | ||||
|  | ||||
| @@ -300,9 +397,9 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||||
|     } | ||||
|     const u32 num_elements{Common::DivCeil(program.shared_memory_size, 4U)}; | ||||
|     const Id type{TypeArray(U32[1], Constant(U32[1], num_elements))}; | ||||
|     const Id pointer_type{TypePointer(spv::StorageClass::Workgroup, type)}; | ||||
|     shared_memory_u32_type = TypePointer(spv::StorageClass::Workgroup, type); | ||||
|     shared_u32 = TypePointer(spv::StorageClass::Workgroup, U32[1]); | ||||
|     shared_memory_u32 = AddGlobalVariable(pointer_type, spv::StorageClass::Workgroup); | ||||
|     shared_memory_u32 = AddGlobalVariable(shared_memory_u32_type, spv::StorageClass::Workgroup); | ||||
|     interfaces.push_back(shared_memory_u32); | ||||
|  | ||||
|     const Id func_type{TypeFunction(void_id, U32[1], U32[1])}; | ||||
| @@ -346,6 +443,14 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||||
|     if (program.info.uses_int16) { | ||||
|         shared_store_u16_func = make_function(16, 16); | ||||
|     } | ||||
|     if (program.info.uses_shared_increment) { | ||||
|         const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; | ||||
|         increment_cas_shared = CasLoop(inc_func, CasPointerType::Shared, U32[1]); | ||||
|     } | ||||
|     if (program.info.uses_shared_decrement) { | ||||
|         const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; | ||||
|         decrement_cas_shared = CasLoop(dec_func, CasPointerType::Shared, U32[1]); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
| @@ -530,12 +635,12 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | ||||
|     MemberName(struct_type, 0, "data"); | ||||
|     MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); | ||||
|  | ||||
|     const Id storage_type{TypePointer(spv::StorageClass::StorageBuffer, struct_type)}; | ||||
|     storage_memory_u32 = TypePointer(spv::StorageClass::StorageBuffer, struct_type); | ||||
|     storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]); | ||||
|  | ||||
|     u32 index{}; | ||||
|     for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) { | ||||
|         const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)}; | ||||
|         const Id id{AddGlobalVariable(storage_memory_u32, spv::StorageClass::StorageBuffer)}; | ||||
|         Decorate(id, spv::Decoration::Binding, binding); | ||||
|         Decorate(id, spv::Decoration::DescriptorSet, 0U); | ||||
|         Name(id, fmt::format("ssbo{}", index)); | ||||
| @@ -546,6 +651,51 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) { | ||||
|         index += desc.count; | ||||
|         binding += desc.count; | ||||
|     } | ||||
|     if (info.uses_global_increment) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id inc_func{CasFunction(*this, CasFunctionType::Increment, U32[1])}; | ||||
|         increment_cas_ssbo = CasLoop(inc_func, CasPointerType::Ssbo, U32[1]); | ||||
|     } | ||||
|     if (info.uses_global_decrement) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id dec_func{CasFunction(*this, CasFunctionType::Decrement, U32[1])}; | ||||
|         decrement_cas_ssbo = CasLoop(dec_func, CasPointerType::Ssbo, U32[1]); | ||||
|     } | ||||
|     if (info.uses_atomic_f32_add) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[1])}; | ||||
|         f32_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[1]); | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_add) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F16[2])}; | ||||
|         f16x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F16[2]); | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_min) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id func{CasFunction(*this, CasFunctionType::FPMin, F16[2])}; | ||||
|         f16x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_max) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id func{CasFunction(*this, CasFunctionType::FPMax, F16[2])}; | ||||
|         f16x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F16[2]); | ||||
|     } | ||||
|     if (info.uses_atomic_f32x2_add) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id add_func{CasFunction(*this, CasFunctionType::FPAdd, F32[2])}; | ||||
|         f32x2_add_cas = CasLoop(add_func, CasPointerType::Ssbo, F32[2]); | ||||
|     } | ||||
|     if (info.uses_atomic_f32x2_min) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id func{CasFunction(*this, CasFunctionType::FPMin, F32[2])}; | ||||
|         f32x2_min_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); | ||||
|     } | ||||
|     if (info.uses_atomic_f32x2_max) { | ||||
|         AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|         const Id func{CasFunction(*this, CasFunctionType::FPMax, F32[2])}; | ||||
|         f32x2_max_cas = CasLoop(func, CasPointerType::Ssbo, F32[2]); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineTextureBuffers(const Info& info, u32& binding) { | ||||
|   | ||||
| @@ -94,6 +94,7 @@ public: | ||||
|     Id output_f32{}; | ||||
|  | ||||
|     Id storage_u32{}; | ||||
|     Id storage_memory_u32{}; | ||||
|  | ||||
|     Id image_buffer_type{}; | ||||
|     Id sampled_texture_buffer_type{}; | ||||
| @@ -136,9 +137,21 @@ public: | ||||
|     Id shared_memory_u32{}; | ||||
|     Id shared_memory_u32x2{}; | ||||
|     Id shared_memory_u32x4{}; | ||||
|     Id shared_memory_u32_type{}; | ||||
|  | ||||
|     Id shared_store_u8_func{}; | ||||
|     Id shared_store_u16_func{}; | ||||
|     Id increment_cas_shared{}; | ||||
|     Id increment_cas_ssbo{}; | ||||
|     Id decrement_cas_shared{}; | ||||
|     Id decrement_cas_ssbo{}; | ||||
|     Id f32_add_cas{}; | ||||
|     Id f16x2_add_cas{}; | ||||
|     Id f16x2_min_cas{}; | ||||
|     Id f16x2_max_cas{}; | ||||
|     Id f32x2_add_cas{}; | ||||
|     Id f32x2_min_cas{}; | ||||
|     Id f32x2_max_cas{}; | ||||
|  | ||||
|     Id input_position{}; | ||||
|     std::array<Id, 32> input_generics{}; | ||||
| @@ -153,6 +166,11 @@ public: | ||||
|     std::vector<Id> interfaces; | ||||
|  | ||||
| private: | ||||
|     enum class CasPointerType { | ||||
|         Shared, | ||||
|         Ssbo, | ||||
|     }; | ||||
|  | ||||
|     void DefineCommonTypes(const Info& info); | ||||
|     void DefineCommonConstants(); | ||||
|     void DefineInterfaces(const Info& info); | ||||
| @@ -171,6 +189,8 @@ private: | ||||
|  | ||||
|     void DefineInputs(const Info& info); | ||||
|     void DefineOutputs(const Info& info); | ||||
|  | ||||
|     [[nodiscard]] Id CasLoop(Id function, CasPointerType pointer_type, Id value_type); | ||||
| }; | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -238,6 +238,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct | ||||
|             ctx.AddCapability(spv::Capability::SubgroupVoteKHR); | ||||
|         } | ||||
|     } | ||||
|     if (info.uses_64_bit_atomics && profile.support_int64_atomics) { | ||||
|         ctx.AddCapability(spv::Capability::Int64Atomics); | ||||
|     } | ||||
|     if (info.uses_typeless_image_reads && profile.support_typeless_image_loads) { | ||||
|         ctx.AddCapability(spv::Capability::StorageImageReadWithoutFormat); | ||||
|     } | ||||
|   | ||||
| @@ -306,6 +306,101 @@ Id EmitUGreaterThan(EmitContext& ctx, Id lhs, Id rhs); | ||||
| Id EmitINotEqual(EmitContext& ctx, Id lhs, Id rhs); | ||||
| Id EmitSGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||||
| Id EmitUGreaterThanEqual(EmitContext& ctx, Id lhs, Id rhs); | ||||
| Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value); | ||||
| Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value); | ||||
| Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value); | ||||
| Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value); | ||||
| Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                          Id value); | ||||
| Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value); | ||||
| Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                                Id value); | ||||
| Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value); | ||||
| Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                          Id value); | ||||
| Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value); | ||||
| Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                                Id value); | ||||
| Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value); | ||||
| Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value); | ||||
| Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value); | ||||
| Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value); | ||||
| Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value); | ||||
| Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value); | ||||
| Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value); | ||||
| Id EmitGlobalAtomicIAdd32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicSMin32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicUMin32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicSMax32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicUMax32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicInc32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicDec32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicAnd32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicOr32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicXor32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicExchange32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicIAdd64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicSMin64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicUMin64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicSMax64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicUMax64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicInc64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicDec64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicAnd64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicOr64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicXor64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicExchange64(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicAddF32(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicAddF16x2(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicAddF32x2(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicMinF16x2(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicMinF32x2(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicMaxF16x2(EmitContext& ctx); | ||||
| Id EmitGlobalAtomicMaxF32x2(EmitContext& ctx); | ||||
| Id EmitLogicalOr(EmitContext& ctx, Id a, Id b); | ||||
| Id EmitLogicalAnd(EmitContext& ctx, Id a, Id b); | ||||
| Id EmitLogicalXor(EmitContext& ctx, Id a, Id b); | ||||
|   | ||||
							
								
								
									
										528
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										528
									
								
								src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,528 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
|  | ||||
| Id GetSharedPointer(EmitContext& ctx, Id offset, u32 index_offset = 0) { | ||||
|     const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||||
|     const Id shifted_value{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; | ||||
|     const Id index{ctx.OpIAdd(ctx.U32[1], shifted_value, ctx.Constant(ctx.U32[1], index_offset))}; | ||||
|     return ctx.profile.support_explicit_workgroup_layout | ||||
|                ? ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, ctx.u32_zero_value, index) | ||||
|                : ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index); | ||||
| } | ||||
|  | ||||
| Id StorageIndex(EmitContext& ctx, const IR::Value& offset, size_t element_size) { | ||||
|     if (offset.IsImmediate()) { | ||||
|         const u32 imm_offset{static_cast<u32>(offset.U32() / element_size)}; | ||||
|         return ctx.Constant(ctx.U32[1], imm_offset); | ||||
|     } | ||||
|     const u32 shift{static_cast<u32>(std::countr_zero(element_size))}; | ||||
|     const Id index{ctx.Def(offset)}; | ||||
|     if (shift == 0) { | ||||
|         return index; | ||||
|     } | ||||
|     const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; | ||||
|     return ctx.OpShiftRightLogical(ctx.U32[1], index, shift_id); | ||||
| } | ||||
|  | ||||
| Id GetStoragePointer(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                      u32 index_offset = 0) { | ||||
|     // TODO: Support reinterpreting bindings, guaranteed to be aligned | ||||
|     if (!binding.IsImmediate()) { | ||||
|         throw NotImplementedException("Dynamic storage buffer indexing"); | ||||
|     } | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], index_offset))}; | ||||
|     return ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index); | ||||
| } | ||||
|  | ||||
| std::pair<Id, Id> GetAtomicArgs(EmitContext& ctx) { | ||||
|     const Id scope{ctx.Constant(ctx.U32[1], static_cast<u32>(spv::Scope::Device))}; | ||||
|     const Id semantics{ctx.u32_zero_value}; | ||||
|     return {scope, semantics}; | ||||
| } | ||||
|  | ||||
| Id LoadU64(EmitContext& ctx, Id pointer_1, Id pointer_2) { | ||||
|     const Id value_1{ctx.OpLoad(ctx.U32[1], pointer_1)}; | ||||
|     const Id value_2{ctx.OpLoad(ctx.U32[1], pointer_2)}; | ||||
|     const Id original_composite{ctx.OpCompositeConstruct(ctx.U32[2], value_1, value_2)}; | ||||
|     return ctx.OpBitcast(ctx.U64, original_composite); | ||||
| } | ||||
|  | ||||
| void StoreResult(EmitContext& ctx, Id pointer_1, Id pointer_2, Id result) { | ||||
|     const Id composite{ctx.OpBitcast(ctx.U32[2], result)}; | ||||
|     ctx.OpStore(pointer_1, ctx.OpCompositeExtract(ctx.U32[1], composite, 0)); | ||||
|     ctx.OpStore(pointer_2, ctx.OpCompositeExtract(ctx.U32[1], composite, 1)); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| Id EmitSharedAtomicIAdd32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicSMin32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicUMin32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicSMax32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicUMax32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicInc32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||||
|     const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; | ||||
|     return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_shared, index, value, | ||||
|                               ctx.shared_memory_u32); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicDec32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; | ||||
|     const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], pointer_offset, shift_id)}; | ||||
|     return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_shared, index, value, | ||||
|                               ctx.shared_memory_u32); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicAnd32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicOr32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicXor32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicExchange32(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitSharedAtomicExchange64(EmitContext& ctx, Id pointer_offset, Id value) { | ||||
|     const Id pointer_1{GetSharedPointer(ctx, pointer_offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetSharedPointer(ctx, pointer_offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, value); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicIAdd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicIAdd(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicSMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicSMin(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicUMin32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicUMin(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicSMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicSMax(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicUMax32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicUMax(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicInc32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     return ctx.OpFunctionCall(ctx.U32[1], ctx.increment_cas_ssbo, base_index, value, ssbo); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicDec32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     return ctx.OpFunctionCall(ctx.U32[1], ctx.decrement_cas_ssbo, base_index, value, ssbo); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicAnd32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicAnd(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicOr32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                          Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicOr(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicXor32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicXor(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicExchange32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                                Id value) { | ||||
|     const Id pointer{GetStoragePointer(ctx, binding, offset)}; | ||||
|     const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|     return ctx.OpAtomicExchange(ctx.U32[1], pointer, scope, semantics, value); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicIAdd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicIAdd(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpIAdd(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicSMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicSMin(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpSMin(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicUMin64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicUMin(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpUMin(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicSMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicSMax(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpSMax(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicUMax64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicUMax(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpUMax(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicAnd64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicAnd(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpBitwiseAnd(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicOr64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                          Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicOr(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpBitwiseOr(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicXor64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                           Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicXor(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     const Id result{ctx.OpBitwiseXor(ctx.U64, value, original_value)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, result); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicExchange64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                                Id value) { | ||||
|     const Id pointer_1{GetStoragePointer(ctx, binding, offset)}; | ||||
|     if (ctx.profile.support_int64_atomics) { | ||||
|         const auto [scope, semantics]{GetAtomicArgs(ctx)}; | ||||
|         return ctx.OpAtomicExchange(ctx.U64, pointer_1, scope, semantics, value); | ||||
|     } | ||||
|     // LOG_WARNING(Render_Vulkan, "Int64 Atomics not supported, fallback to non-atomic"); | ||||
|     const Id pointer_2{GetStoragePointer(ctx, binding, offset, 1)}; | ||||
|     const Id original_value{LoadU64(ctx, pointer_1, pointer_2)}; | ||||
|     StoreResult(ctx, pointer_1, pointer_2, value); | ||||
|     return original_value; | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicAddF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     return ctx.OpFunctionCall(ctx.F32[1], ctx.f32_add_cas, base_index, value, ssbo); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicAddF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_add_cas, base_index, value, ssbo)}; | ||||
|     return ctx.OpBitcast(ctx.U32[1], result); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicAddF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_add_cas, base_index, value, ssbo)}; | ||||
|     return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicMinF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_min_cas, base_index, value, ssbo)}; | ||||
|     return ctx.OpBitcast(ctx.U32[1], result); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicMinF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_min_cas, base_index, value, ssbo)}; | ||||
|     return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicMaxF16x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id result{ctx.OpFunctionCall(ctx.F16[2], ctx.f16x2_max_cas, base_index, value, ssbo)}; | ||||
|     return ctx.OpBitcast(ctx.U32[1], result); | ||||
| } | ||||
|  | ||||
| Id EmitStorageAtomicMaxF32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                              Id value) { | ||||
|     const Id ssbo{ctx.ssbos[binding.U32()]}; | ||||
|     const Id base_index{StorageIndex(ctx, offset, sizeof(u32))}; | ||||
|     const Id result{ctx.OpFunctionCall(ctx.F32[2], ctx.f32x2_max_cas, base_index, value, ssbo)}; | ||||
|     return ctx.OpPackHalf2x16(ctx.U32[1], result); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicIAdd32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicSMin32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicUMin32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicSMax32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicUMax32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicInc32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicDec32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicAnd32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicOr32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicXor32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicExchange32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicIAdd64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicSMin64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicUMin64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicSMax64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicUMax64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicInc64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicDec64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicAnd64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicOr64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicXor64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicExchange64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicAddF32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicAddF16x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicAddF32x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicMinF16x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicMinF32x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicMaxF16x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| Id EmitGlobalAtomicMaxF32x2(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
| @@ -1284,6 +1284,204 @@ U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) | ||||
|     return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicIAdd(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicIAdd32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicSMin(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicSMin32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicUMin(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicUMin32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed) { | ||||
|     return is_signed ? SharedAtomicSMin(pointer_offset, value) | ||||
|                      : SharedAtomicUMin(pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicSMax(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicSMax32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicUMax(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicUMax32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed) { | ||||
|     return is_signed ? SharedAtomicSMax(pointer_offset, value) | ||||
|                      : SharedAtomicUMax(pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicInc(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicInc32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicDec(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicDec32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicAnd(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicAnd32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicOr(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicOr32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::SharedAtomicXor(const U32& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::SharedAtomicXor32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::SharedAtomicExchange(const U32& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::SharedAtomicExchange32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::SharedAtomicExchange64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(pointer_offset.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicIAdd32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicIAdd64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicSMin32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicSMin64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicUMin32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicUMin64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||||
|     return is_signed ? GlobalAtomicSMin(pointer_offset, value) | ||||
|                      : GlobalAtomicUMin(pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicSMax32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicSMax64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicUMax32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicUMax64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, bool is_signed) { | ||||
|     return is_signed ? GlobalAtomicSMax(pointer_offset, value) | ||||
|                      : GlobalAtomicUMax(pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::GlobalAtomicInc(const U64& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::GlobalAtomicInc32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32 IREmitter::GlobalAtomicDec(const U64& pointer_offset, const U32& value) { | ||||
|     return Inst<U32>(Opcode::GlobalAtomicDec32, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicAnd32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicAnd64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicOr(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicOr32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicOr64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicXor(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicXor32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicXor64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| U32U64 IREmitter::GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::U32: | ||||
|         return Inst<U32>(Opcode::GlobalAtomicExchange32, pointer_offset, value); | ||||
|     case Type::U64: | ||||
|         return Inst<U64>(Opcode::GlobalAtomicExchange64, pointer_offset, value); | ||||
|     default: | ||||
|         ThrowInvalidType(pointer_offset.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| F32 IREmitter::GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||||
|                                   const FpControl control) { | ||||
|     return Inst<F32>(Opcode::GlobalAtomicAddF32, Flags{control}, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| Value IREmitter::GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||||
|                                       const FpControl control) { | ||||
|     return Inst(Opcode::GlobalAtomicAddF16x2, Flags{control}, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| Value IREmitter::GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||||
|                                       const FpControl control) { | ||||
|     return Inst(Opcode::GlobalAtomicMinF16x2, Flags{control}, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| Value IREmitter::GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||||
|                                       const FpControl control) { | ||||
|     return Inst(Opcode::GlobalAtomicMaxF16x2, Flags{control}, pointer_offset, value); | ||||
| } | ||||
|  | ||||
| U1 IREmitter::LogicalOr(const U1& a, const U1& b) { | ||||
|     return Inst<U1>(Opcode::LogicalOr, a, b); | ||||
| } | ||||
| @@ -1626,7 +1824,7 @@ Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInst | ||||
| } | ||||
|  | ||||
| void IREmitter::ImageWrite(const Value& handle, const Value& coords, const Value& color, | ||||
|                             TextureInstInfo info) { | ||||
|                            TextureInstInfo info) { | ||||
|     const Opcode op{handle.IsImmediate() ? Opcode::BoundImageWrite : Opcode::BindlessImageWrite}; | ||||
|     Inst(op, Flags{info}, handle, coords, color); | ||||
| } | ||||
|   | ||||
| @@ -228,6 +228,45 @@ public: | ||||
|     [[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs); | ||||
|     [[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed); | ||||
|  | ||||
|     [[nodiscard]] U32 SharedAtomicIAdd(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicSMin(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicUMin(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicIMin(const U32& pointer_offset, const U32& value, bool is_signed); | ||||
|     [[nodiscard]] U32 SharedAtomicSMax(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicUMax(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicIMax(const U32& pointer_offset, const U32& value, bool is_signed); | ||||
|     [[nodiscard]] U32 SharedAtomicInc(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicDec(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicAnd(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicOr(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 SharedAtomicXor(const U32& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32U64 SharedAtomicExchange(const U32& pointer_offset, const U32U64& value); | ||||
|  | ||||
|     [[nodiscard]] U32U64 GlobalAtomicIAdd(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicSMin(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicUMin(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicIMin(const U64& pointer_offset, const U32U64& value, | ||||
|                                           bool is_signed); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicSMax(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicUMax(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicIMax(const U64& pointer_offset, const U32U64& value, | ||||
|                                           bool is_signed); | ||||
|     [[nodiscard]] U32 GlobalAtomicInc(const U64& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32 GlobalAtomicDec(const U64& pointer_offset, const U32& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicAnd(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicOr(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicXor(const U64& pointer_offset, const U32U64& value); | ||||
|     [[nodiscard]] U32U64 GlobalAtomicExchange(const U64& pointer_offset, const U32U64& value); | ||||
|  | ||||
|     [[nodiscard]] F32 GlobalAtomicF32Add(const U64& pointer_offset, const Value& value, | ||||
|                                          const FpControl control = {}); | ||||
|     [[nodiscard]] Value GlobalAtomicF16x2Add(const U64& pointer_offset, const Value& value, | ||||
|                                              const FpControl control = {}); | ||||
|     [[nodiscard]] Value GlobalAtomicF16x2Min(const U64& pointer_offset, const Value& value, | ||||
|                                              const FpControl control = {}); | ||||
|     [[nodiscard]] Value GlobalAtomicF16x2Max(const U64& pointer_offset, const Value& value, | ||||
|                                              const FpControl control = {}); | ||||
|  | ||||
|     [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); | ||||
|     [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); | ||||
|     [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); | ||||
|   | ||||
| @@ -93,6 +93,72 @@ bool Inst::MayHaveSideEffects() const noexcept { | ||||
|     case Opcode::WriteSharedU32: | ||||
|     case Opcode::WriteSharedU64: | ||||
|     case Opcode::WriteSharedU128: | ||||
|     case Opcode::SharedAtomicIAdd32: | ||||
|     case Opcode::SharedAtomicSMin32: | ||||
|     case Opcode::SharedAtomicUMin32: | ||||
|     case Opcode::SharedAtomicSMax32: | ||||
|     case Opcode::SharedAtomicUMax32: | ||||
|     case Opcode::SharedAtomicInc32: | ||||
|     case Opcode::SharedAtomicDec32: | ||||
|     case Opcode::SharedAtomicAnd32: | ||||
|     case Opcode::SharedAtomicOr32: | ||||
|     case Opcode::SharedAtomicXor32: | ||||
|     case Opcode::SharedAtomicExchange32: | ||||
|     case Opcode::SharedAtomicExchange64: | ||||
|     case Opcode::GlobalAtomicIAdd32: | ||||
|     case Opcode::GlobalAtomicSMin32: | ||||
|     case Opcode::GlobalAtomicUMin32: | ||||
|     case Opcode::GlobalAtomicSMax32: | ||||
|     case Opcode::GlobalAtomicUMax32: | ||||
|     case Opcode::GlobalAtomicInc32: | ||||
|     case Opcode::GlobalAtomicDec32: | ||||
|     case Opcode::GlobalAtomicAnd32: | ||||
|     case Opcode::GlobalAtomicOr32: | ||||
|     case Opcode::GlobalAtomicXor32: | ||||
|     case Opcode::GlobalAtomicExchange32: | ||||
|     case Opcode::GlobalAtomicIAdd64: | ||||
|     case Opcode::GlobalAtomicSMin64: | ||||
|     case Opcode::GlobalAtomicUMin64: | ||||
|     case Opcode::GlobalAtomicSMax64: | ||||
|     case Opcode::GlobalAtomicUMax64: | ||||
|     case Opcode::GlobalAtomicAnd64: | ||||
|     case Opcode::GlobalAtomicOr64: | ||||
|     case Opcode::GlobalAtomicXor64: | ||||
|     case Opcode::GlobalAtomicExchange64: | ||||
|     case Opcode::GlobalAtomicAddF32: | ||||
|     case Opcode::GlobalAtomicAddF16x2: | ||||
|     case Opcode::GlobalAtomicAddF32x2: | ||||
|     case Opcode::GlobalAtomicMinF16x2: | ||||
|     case Opcode::GlobalAtomicMinF32x2: | ||||
|     case Opcode::GlobalAtomicMaxF16x2: | ||||
|     case Opcode::GlobalAtomicMaxF32x2: | ||||
|     case Opcode::StorageAtomicIAdd32: | ||||
|     case Opcode::StorageAtomicSMin32: | ||||
|     case Opcode::StorageAtomicUMin32: | ||||
|     case Opcode::StorageAtomicSMax32: | ||||
|     case Opcode::StorageAtomicUMax32: | ||||
|     case Opcode::StorageAtomicInc32: | ||||
|     case Opcode::StorageAtomicDec32: | ||||
|     case Opcode::StorageAtomicAnd32: | ||||
|     case Opcode::StorageAtomicOr32: | ||||
|     case Opcode::StorageAtomicXor32: | ||||
|     case Opcode::StorageAtomicExchange32: | ||||
|     case Opcode::StorageAtomicIAdd64: | ||||
|     case Opcode::StorageAtomicSMin64: | ||||
|     case Opcode::StorageAtomicUMin64: | ||||
|     case Opcode::StorageAtomicSMax64: | ||||
|     case Opcode::StorageAtomicUMax64: | ||||
|     case Opcode::StorageAtomicAnd64: | ||||
|     case Opcode::StorageAtomicOr64: | ||||
|     case Opcode::StorageAtomicXor64: | ||||
|     case Opcode::StorageAtomicExchange64: | ||||
|     case Opcode::StorageAtomicAddF32: | ||||
|     case Opcode::StorageAtomicAddF16x2: | ||||
|     case Opcode::StorageAtomicAddF32x2: | ||||
|     case Opcode::StorageAtomicMinF16x2: | ||||
|     case Opcode::StorageAtomicMinF32x2: | ||||
|     case Opcode::StorageAtomicMaxF16x2: | ||||
|     case Opcode::StorageAtomicMaxF32x2: | ||||
|     case Opcode::BindlessImageWrite: | ||||
|     case Opcode::BoundImageWrite: | ||||
|     case Opcode::ImageWrite: | ||||
|   | ||||
| @@ -321,6 +321,76 @@ OPCODE(INotEqual,                                           U1,             U32, | ||||
| OPCODE(SGreaterThanEqual,                                   U1,             U32,            U32,                                                            ) | ||||
| OPCODE(UGreaterThanEqual,                                   U1,             U32,            U32,                                                            ) | ||||
|  | ||||
| // Atomic operations | ||||
| OPCODE(SharedAtomicIAdd32,                                  U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicSMin32,                                  U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicUMin32,                                  U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicSMax32,                                  U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicUMax32,                                  U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicInc32,                                   U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicDec32,                                   U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicAnd32,                                   U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicOr32,                                    U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicXor32,                                   U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicExchange32,                              U32,            U32,            U32,                                                            ) | ||||
| OPCODE(SharedAtomicExchange64,                              U64,            U32,            U64,                                                            ) | ||||
|  | ||||
| OPCODE(GlobalAtomicIAdd32,                                  U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicSMin32,                                  U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicUMin32,                                  U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicSMax32,                                  U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicUMax32,                                  U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicInc32,                                   U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicDec32,                                   U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicAnd32,                                   U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicOr32,                                    U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicXor32,                                   U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicExchange32,                              U32,            U64,            U32,                                                            ) | ||||
| OPCODE(GlobalAtomicIAdd64,                                  U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicSMin64,                                  U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicUMin64,                                  U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicSMax64,                                  U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicUMax64,                                  U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicAnd64,                                   U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicOr64,                                    U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicXor64,                                   U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicExchange64,                              U64,            U64,            U64,                                                            ) | ||||
| OPCODE(GlobalAtomicAddF32,                                  F32,            U64,            F32,                                                            ) | ||||
| OPCODE(GlobalAtomicAddF16x2,                                U32,            U64,            F16x2,                                                          ) | ||||
| OPCODE(GlobalAtomicAddF32x2,                                U32,            U64,            F32x2,                                                          ) | ||||
| OPCODE(GlobalAtomicMinF16x2,                                U32,            U64,            F16x2,                                                          ) | ||||
| OPCODE(GlobalAtomicMinF32x2,                                U32,            U64,            F32x2,                                                          ) | ||||
| OPCODE(GlobalAtomicMaxF16x2,                                U32,            U64,            F16x2,                                                          ) | ||||
| OPCODE(GlobalAtomicMaxF32x2,                                U32,            U64,            F32x2,                                                          ) | ||||
|  | ||||
| OPCODE(StorageAtomicIAdd32,                                 U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicSMin32,                                 U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicUMin32,                                 U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicSMax32,                                 U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicUMax32,                                 U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicInc32,                                  U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicDec32,                                  U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicAnd32,                                  U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicOr32,                                   U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicXor32,                                  U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicExchange32,                             U32,            U32,            U32,            U32,                                            ) | ||||
| OPCODE(StorageAtomicIAdd64,                                 U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicSMin64,                                 U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicUMin64,                                 U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicSMax64,                                 U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicUMax64,                                 U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicAnd64,                                  U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicOr64,                                   U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicXor64,                                  U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicExchange64,                             U64,            U32,            U32,            U64,                                            ) | ||||
| OPCODE(StorageAtomicAddF32,                                 F32,            U32,            U32,            F32,                                            ) | ||||
| OPCODE(StorageAtomicAddF16x2,                               U32,            U32,            U32,            F16x2,                                          ) | ||||
| OPCODE(StorageAtomicAddF32x2,                               U32,            U32,            U32,            F32x2,                                          ) | ||||
| OPCODE(StorageAtomicMinF16x2,                               U32,            U32,            U32,            F16x2,                                          ) | ||||
| OPCODE(StorageAtomicMinF32x2,                               U32,            U32,            U32,            F32x2,                                          ) | ||||
| OPCODE(StorageAtomicMaxF16x2,                               U32,            U32,            U32,            F16x2,                                          ) | ||||
| OPCODE(StorageAtomicMaxF32x2,                               U32,            U32,            U32,            F32x2,                                          ) | ||||
|  | ||||
| // Logical operations | ||||
| OPCODE(LogicalOr,                                           U1,             U1,             U1,                                                             ) | ||||
| OPCODE(LogicalAnd,                                          U1,             U1,             U1,                                                             ) | ||||
|   | ||||
| @@ -0,0 +1,222 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class AtomOp : u64 { | ||||
|     ADD, | ||||
|     MIN, | ||||
|     MAX, | ||||
|     INC, | ||||
|     DEC, | ||||
|     AND, | ||||
|     OR, | ||||
|     XOR, | ||||
|     EXCH, | ||||
|     SAFEADD, | ||||
| }; | ||||
|  | ||||
| enum class AtomSize : u64 { | ||||
|     U32, | ||||
|     S32, | ||||
|     U64, | ||||
|     F32, | ||||
|     F16x2, | ||||
|     S64, | ||||
| }; | ||||
|  | ||||
| IR::U32U64 ApplyIntegerAtomOp(IR::IREmitter& ir, const IR::U32U64& offset, const IR::U32U64& op_b, | ||||
|                               AtomOp op, bool is_signed) { | ||||
|     switch (op) { | ||||
|     case AtomOp::ADD: | ||||
|         return ir.GlobalAtomicIAdd(offset, op_b); | ||||
|     case AtomOp::MIN: | ||||
|         return ir.GlobalAtomicIMin(offset, op_b, is_signed); | ||||
|     case AtomOp::MAX: | ||||
|         return ir.GlobalAtomicIMax(offset, op_b, is_signed); | ||||
|     case AtomOp::INC: | ||||
|         return ir.GlobalAtomicInc(offset, op_b); | ||||
|     case AtomOp::DEC: | ||||
|         return ir.GlobalAtomicDec(offset, op_b); | ||||
|     case AtomOp::AND: | ||||
|         return ir.GlobalAtomicAnd(offset, op_b); | ||||
|     case AtomOp::OR: | ||||
|         return ir.GlobalAtomicOr(offset, op_b); | ||||
|     case AtomOp::XOR: | ||||
|         return ir.GlobalAtomicXor(offset, op_b); | ||||
|     case AtomOp::EXCH: | ||||
|         return ir.GlobalAtomicExchange(offset, op_b); | ||||
|     default: | ||||
|         throw NotImplementedException("Integer Atom Operation {}", op); | ||||
|     } | ||||
| } | ||||
|  | ||||
| IR::Value ApplyFpAtomOp(IR::IREmitter& ir, const IR::U64& offset, const IR::Value& op_b, AtomOp op, | ||||
|                         AtomSize size) { | ||||
|     static constexpr IR::FpControl f16_control{ | ||||
|         .no_contraction{false}, | ||||
|         .rounding{IR::FpRounding::RN}, | ||||
|         .fmz_mode{IR::FmzMode::DontCare}, | ||||
|     }; | ||||
|     static constexpr IR::FpControl f32_control{ | ||||
|         .no_contraction{false}, | ||||
|         .rounding{IR::FpRounding::RN}, | ||||
|         .fmz_mode{IR::FmzMode::FTZ}, | ||||
|     }; | ||||
|     switch (op) { | ||||
|     case AtomOp::ADD: | ||||
|         return size == AtomSize::F32 ? ir.GlobalAtomicF32Add(offset, op_b, f32_control) | ||||
|                                      : ir.GlobalAtomicF16x2Add(offset, op_b, f16_control); | ||||
|     case AtomOp::MIN: | ||||
|         return ir.GlobalAtomicF16x2Min(offset, op_b, f16_control); | ||||
|     case AtomOp::MAX: | ||||
|         return ir.GlobalAtomicF16x2Max(offset, op_b, f16_control); | ||||
|     default: | ||||
|         throw NotImplementedException("FP Atom Operation {}", op); | ||||
|     } | ||||
| } | ||||
|  | ||||
| IR::U64 AtomOffset(TranslatorVisitor& v, u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<8, 8, IR::Reg> addr_reg; | ||||
|         BitField<28, 20, s64> addr_offset; | ||||
|         BitField<28, 20, u64> rz_addr_offset; | ||||
|         BitField<48, 1, u64> e; | ||||
|     } const mem{insn}; | ||||
|  | ||||
|     const IR::U64 address{[&]() -> IR::U64 { | ||||
|         if (mem.e == 0) { | ||||
|             return v.ir.UConvert(64, v.X(mem.addr_reg)); | ||||
|         } | ||||
|         return v.L(mem.addr_reg); | ||||
|     }()}; | ||||
|     const u64 addr_offset{[&]() -> u64 { | ||||
|         if (mem.addr_reg == IR::Reg::RZ) { | ||||
|             // When RZ is used, the address is an absolute address | ||||
|             return static_cast<u64>(mem.rz_addr_offset.Value()); | ||||
|         } else { | ||||
|             return static_cast<u64>(mem.addr_offset.Value()); | ||||
|         } | ||||
|     }()}; | ||||
|     return v.ir.IAdd(address, v.ir.Imm64(addr_offset)); | ||||
| } | ||||
|  | ||||
| bool AtomOpNotApplicable(AtomSize size, AtomOp op) { | ||||
|     // TODO: SAFEADD | ||||
|     switch (size) { | ||||
|     case AtomSize::S32: | ||||
|     case AtomSize::U64: | ||||
|         return (op == AtomOp::INC || op == AtomOp::DEC); | ||||
|     case AtomSize::S64: | ||||
|         return !(op == AtomOp::MIN || op == AtomOp::MAX); | ||||
|     case AtomSize::F32: | ||||
|         return op != AtomOp::ADD; | ||||
|     case AtomSize::F16x2: | ||||
|         return !(op == AtomOp::ADD || op == AtomOp::MIN || op == AtomOp::MAX); | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
|  | ||||
| IR::U32U64 LoadGlobal(IR::IREmitter& ir, const IR::U64& offset, AtomSize size) { | ||||
|     switch (size) { | ||||
|     case AtomSize::U32: | ||||
|     case AtomSize::S32: | ||||
|     case AtomSize::F32: | ||||
|     case AtomSize::F16x2: | ||||
|         return ir.LoadGlobal32(offset); | ||||
|     case AtomSize::U64: | ||||
|     case AtomSize::S64: | ||||
|         return ir.PackUint2x32(ir.LoadGlobal64(offset)); | ||||
|     default: | ||||
|         throw NotImplementedException("Atom Size {}", size); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomSize size) { | ||||
|     switch (size) { | ||||
|     case AtomSize::U32: | ||||
|     case AtomSize::S32: | ||||
|     case AtomSize::F16x2: | ||||
|         return v.X(dest_reg, IR::U32{result}); | ||||
|     case AtomSize::U64: | ||||
|     case AtomSize::S64: | ||||
|         return v.L(dest_reg, IR::U64{result}); | ||||
|     case AtomSize::F32: | ||||
|         return v.F(dest_reg, IR::F32{result}); | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::ATOM(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> addr_reg; | ||||
|         BitField<20, 8, IR::Reg> src_reg_b; | ||||
|         BitField<49, 3, AtomSize> size; | ||||
|         BitField<52, 4, AtomOp> op; | ||||
|     } const atom{insn}; | ||||
|  | ||||
|     const bool size_64{atom.size == AtomSize::U64 || atom.size == AtomSize::S64}; | ||||
|     const bool is_signed{atom.size == AtomSize::S32 || atom.size == AtomSize::S64}; | ||||
|     const bool is_integer{atom.size != AtomSize::F32 && atom.size != AtomSize::F16x2}; | ||||
|     const IR::U64 offset{AtomOffset(*this, insn)}; | ||||
|     IR::Value result; | ||||
|  | ||||
|     if (AtomOpNotApplicable(atom.size, atom.op)) { | ||||
|         result = LoadGlobal(ir, offset, atom.size); | ||||
|     } else if (!is_integer) { | ||||
|         if (atom.size == AtomSize::F32) { | ||||
|             result = ApplyFpAtomOp(ir, offset, F(atom.src_reg_b), atom.op, atom.size); | ||||
|         } else { | ||||
|             const IR::Value src_b{ir.UnpackFloat2x16(X(atom.src_reg_b))}; | ||||
|             result = ApplyFpAtomOp(ir, offset, src_b, atom.op, atom.size); | ||||
|         } | ||||
|     } else if (size_64) { | ||||
|         result = ApplyIntegerAtomOp(ir, offset, L(atom.src_reg_b), atom.op, is_signed); | ||||
|     } else { | ||||
|         result = ApplyIntegerAtomOp(ir, offset, X(atom.src_reg_b), atom.op, is_signed); | ||||
|     } | ||||
|     StoreResult(*this, atom.dest_reg, result, atom.size); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::RED(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> src_reg_b; | ||||
|         BitField<8, 8, IR::Reg> addr_reg; | ||||
|         BitField<20, 3, AtomSize> size; | ||||
|         BitField<23, 3, AtomOp> op; | ||||
|     } const red{insn}; | ||||
|  | ||||
|     if (AtomOpNotApplicable(red.size, red.op)) { | ||||
|         return; | ||||
|     } | ||||
|     const bool size_64{red.size == AtomSize::U64 || red.size == AtomSize::S64}; | ||||
|     const bool is_signed{red.size == AtomSize::S32 || red.size == AtomSize::S64}; | ||||
|     const bool is_integer{red.size != AtomSize::F32 && red.size != AtomSize::F16x2}; | ||||
|     const IR::U64 offset{AtomOffset(*this, insn)}; | ||||
|     if (!is_integer) { | ||||
|         if (red.size == AtomSize::F32) { | ||||
|             ApplyFpAtomOp(ir, offset, F(red.src_reg_b), red.op, red.size); | ||||
|         } else { | ||||
|             const IR::Value src_b{ir.UnpackFloat2x16(X(red.src_reg_b))}; | ||||
|             ApplyFpAtomOp(ir, offset, src_b, red.op, red.size); | ||||
|         } | ||||
|     } else if (size_64) { | ||||
|         ApplyIntegerAtomOp(ir, offset, L(red.src_reg_b), red.op, is_signed); | ||||
|     } else { | ||||
|         ApplyIntegerAtomOp(ir, offset, X(red.src_reg_b), red.op, is_signed); | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -0,0 +1,110 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class AtomOp : u64 { | ||||
|     ADD, | ||||
|     MIN, | ||||
|     MAX, | ||||
|     INC, | ||||
|     DEC, | ||||
|     AND, | ||||
|     OR, | ||||
|     XOR, | ||||
|     EXCH, | ||||
| }; | ||||
|  | ||||
| enum class AtomsSize : u64 { | ||||
|     U32, | ||||
|     S32, | ||||
|     U64, | ||||
| }; | ||||
|  | ||||
| IR::U32U64 ApplyAtomsOp(IR::IREmitter& ir, const IR::U32& offset, const IR::U32U64& op_b, AtomOp op, | ||||
|                         bool is_signed) { | ||||
|     switch (op) { | ||||
|     case AtomOp::ADD: | ||||
|         return ir.SharedAtomicIAdd(offset, op_b); | ||||
|     case AtomOp::MIN: | ||||
|         return ir.SharedAtomicIMin(offset, op_b, is_signed); | ||||
|     case AtomOp::MAX: | ||||
|         return ir.SharedAtomicIMax(offset, op_b, is_signed); | ||||
|     case AtomOp::INC: | ||||
|         return ir.SharedAtomicInc(offset, op_b); | ||||
|     case AtomOp::DEC: | ||||
|         return ir.SharedAtomicDec(offset, op_b); | ||||
|     case AtomOp::AND: | ||||
|         return ir.SharedAtomicAnd(offset, op_b); | ||||
|     case AtomOp::OR: | ||||
|         return ir.SharedAtomicOr(offset, op_b); | ||||
|     case AtomOp::XOR: | ||||
|         return ir.SharedAtomicXor(offset, op_b); | ||||
|     case AtomOp::EXCH: | ||||
|         return ir.SharedAtomicExchange(offset, op_b); | ||||
|     default: | ||||
|         throw NotImplementedException("Integer Atoms Operation {}", op); | ||||
|     } | ||||
| } | ||||
|  | ||||
| IR::U32 AtomsOffset(TranslatorVisitor& v, u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<8, 8, IR::Reg> offset_reg; | ||||
|         BitField<30, 22, u64> absolute_offset; | ||||
|         BitField<30, 22, s64> relative_offset; | ||||
|     } const encoding{insn}; | ||||
|  | ||||
|     if (encoding.offset_reg == IR::Reg::RZ) { | ||||
|         return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset << 2)); | ||||
|     } else { | ||||
|         const s32 relative{static_cast<s32>(encoding.relative_offset << 2)}; | ||||
|         return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void StoreResult(TranslatorVisitor& v, IR::Reg dest_reg, const IR::Value& result, AtomsSize size) { | ||||
|     switch (size) { | ||||
|     case AtomsSize::U32: | ||||
|     case AtomsSize::S32: | ||||
|         return v.X(dest_reg, IR::U32{result}); | ||||
|     case AtomsSize::U64: | ||||
|         return v.L(dest_reg, IR::U64{result}); | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void TranslatorVisitor::ATOMS(u64 insn) { | ||||
|     union { | ||||
|         u64 raw; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> addr_reg; | ||||
|         BitField<20, 8, IR::Reg> src_reg_b; | ||||
|         BitField<28, 2, AtomsSize> size; | ||||
|         BitField<52, 4, AtomOp> op; | ||||
|     } const atoms{insn}; | ||||
|  | ||||
|     const bool size_64{atoms.size == AtomsSize::U64}; | ||||
|     if (size_64 && atoms.op != AtomOp::EXCH) { | ||||
|         throw NotImplementedException("64-bit Atoms Operation {}", atoms.op.Value()); | ||||
|     } | ||||
|     const bool is_signed{atoms.size == AtomsSize::S32}; | ||||
|     const IR::U32 offset{AtomsOffset(*this, insn)}; | ||||
|  | ||||
|     IR::Value result; | ||||
|     if (size_64) { | ||||
|         result = ApplyAtomsOp(ir, offset, L(atoms.src_reg_b), atoms.op, is_signed); | ||||
|     } else { | ||||
|         result = ApplyAtomsOp(ir, offset, X(atoms.src_reg_b), atoms.op, is_signed); | ||||
|     } | ||||
|     StoreResult(*this, atoms.dest_reg, result, atoms.size); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Maxwell | ||||
| @@ -17,18 +17,10 @@ void TranslatorVisitor::ATOM_cas(u64) { | ||||
|     ThrowNotImplemented(Opcode::ATOM_cas); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ATOM(u64) { | ||||
|     ThrowNotImplemented(Opcode::ATOM); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ATOMS_cas(u64) { | ||||
|     ThrowNotImplemented(Opcode::ATOMS_cas); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::ATOMS(u64) { | ||||
|     ThrowNotImplemented(Opcode::ATOMS); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::B2R(u64) { | ||||
|     ThrowNotImplemented(Opcode::B2R); | ||||
| } | ||||
| @@ -241,10 +233,6 @@ void TranslatorVisitor::RAM(u64) { | ||||
|     ThrowNotImplemented(Opcode::RAM); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::RED(u64) { | ||||
|     ThrowNotImplemented(Opcode::RED); | ||||
| } | ||||
|  | ||||
| void TranslatorVisitor::RET(u64) { | ||||
|     ThrowNotImplemented(Opcode::RET); | ||||
| } | ||||
|   | ||||
| @@ -145,6 +145,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::FPOrdGreaterThanEqual16: | ||||
|     case IR::Opcode::FPUnordGreaterThanEqual16: | ||||
|     case IR::Opcode::FPIsNan16: | ||||
|     case IR::Opcode::GlobalAtomicAddF16x2: | ||||
|     case IR::Opcode::StorageAtomicAddF16x2: | ||||
|     case IR::Opcode::StorageAtomicMinF16x2: | ||||
|     case IR::Opcode::StorageAtomicMaxF16x2: | ||||
|         info.uses_fp16 = true; | ||||
|         break; | ||||
|     case IR::Opcode::CompositeConstructF64x2: | ||||
| @@ -310,6 +314,7 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::ConvertF16U64: | ||||
|     case IR::Opcode::ConvertF32U64: | ||||
|     case IR::Opcode::ConvertF64U64: | ||||
|     case IR::Opcode::SharedAtomicExchange64: | ||||
|         info.uses_int64 = true; | ||||
|         break; | ||||
|     default: | ||||
| @@ -444,6 +449,71 @@ void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     case IR::Opcode::FSwizzleAdd: | ||||
|         info.uses_fswzadd = true; | ||||
|         break; | ||||
|     case IR::Opcode::SharedAtomicInc32: | ||||
|         info.uses_shared_increment = true; | ||||
|         break; | ||||
|     case IR::Opcode::SharedAtomicDec32: | ||||
|         info.uses_shared_decrement = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicInc32: | ||||
|     case IR::Opcode::StorageAtomicInc32: | ||||
|         info.uses_global_increment = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicDec32: | ||||
|     case IR::Opcode::StorageAtomicDec32: | ||||
|         info.uses_global_decrement = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicAddF32: | ||||
|     case IR::Opcode::StorageAtomicAddF32: | ||||
|         info.uses_atomic_f32_add = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicAddF16x2: | ||||
|     case IR::Opcode::StorageAtomicAddF16x2: | ||||
|         info.uses_atomic_f16x2_add = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicAddF32x2: | ||||
|     case IR::Opcode::StorageAtomicAddF32x2: | ||||
|         info.uses_atomic_f32x2_add = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicMinF16x2: | ||||
|     case IR::Opcode::StorageAtomicMinF16x2: | ||||
|         info.uses_atomic_f16x2_min = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicMinF32x2: | ||||
|     case IR::Opcode::StorageAtomicMinF32x2: | ||||
|         info.uses_atomic_f32x2_min = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicMaxF16x2: | ||||
|     case IR::Opcode::StorageAtomicMaxF16x2: | ||||
|         info.uses_atomic_f16x2_max = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicMaxF32x2: | ||||
|     case IR::Opcode::StorageAtomicMaxF32x2: | ||||
|         info.uses_atomic_f32x2_max = true; | ||||
|         break; | ||||
|     case IR::Opcode::GlobalAtomicIAdd64: | ||||
|     case IR::Opcode::GlobalAtomicSMin64: | ||||
|     case IR::Opcode::GlobalAtomicUMin64: | ||||
|     case IR::Opcode::GlobalAtomicSMax64: | ||||
|     case IR::Opcode::GlobalAtomicUMax64: | ||||
|     case IR::Opcode::GlobalAtomicAnd64: | ||||
|     case IR::Opcode::GlobalAtomicOr64: | ||||
|     case IR::Opcode::GlobalAtomicXor64: | ||||
|     case IR::Opcode::GlobalAtomicExchange64: | ||||
|     case IR::Opcode::StorageAtomicIAdd64: | ||||
|     case IR::Opcode::StorageAtomicSMin64: | ||||
|     case IR::Opcode::StorageAtomicUMin64: | ||||
|     case IR::Opcode::StorageAtomicSMax64: | ||||
|     case IR::Opcode::StorageAtomicUMax64: | ||||
|     case IR::Opcode::StorageAtomicAnd64: | ||||
|     case IR::Opcode::StorageAtomicOr64: | ||||
|     case IR::Opcode::StorageAtomicXor64: | ||||
|         info.uses_64_bit_atomics = true; | ||||
|         break; | ||||
|     case IR::Opcode::SharedAtomicExchange64: | ||||
|         info.uses_64_bit_atomics = true; | ||||
|         info.uses_shared_memory_u32x2 = true; | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|   | ||||
| @@ -72,6 +72,33 @@ bool IsGlobalMemory(const IR::Inst& inst) { | ||||
|     case IR::Opcode::WriteGlobal32: | ||||
|     case IR::Opcode::WriteGlobal64: | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|     case IR::Opcode::GlobalAtomicIAdd32: | ||||
|     case IR::Opcode::GlobalAtomicSMin32: | ||||
|     case IR::Opcode::GlobalAtomicUMin32: | ||||
|     case IR::Opcode::GlobalAtomicSMax32: | ||||
|     case IR::Opcode::GlobalAtomicUMax32: | ||||
|     case IR::Opcode::GlobalAtomicInc32: | ||||
|     case IR::Opcode::GlobalAtomicDec32: | ||||
|     case IR::Opcode::GlobalAtomicAnd32: | ||||
|     case IR::Opcode::GlobalAtomicOr32: | ||||
|     case IR::Opcode::GlobalAtomicXor32: | ||||
|     case IR::Opcode::GlobalAtomicExchange32: | ||||
|     case IR::Opcode::GlobalAtomicIAdd64: | ||||
|     case IR::Opcode::GlobalAtomicSMin64: | ||||
|     case IR::Opcode::GlobalAtomicUMin64: | ||||
|     case IR::Opcode::GlobalAtomicSMax64: | ||||
|     case IR::Opcode::GlobalAtomicUMax64: | ||||
|     case IR::Opcode::GlobalAtomicAnd64: | ||||
|     case IR::Opcode::GlobalAtomicOr64: | ||||
|     case IR::Opcode::GlobalAtomicXor64: | ||||
|     case IR::Opcode::GlobalAtomicExchange64: | ||||
|     case IR::Opcode::GlobalAtomicAddF32: | ||||
|     case IR::Opcode::GlobalAtomicAddF16x2: | ||||
|     case IR::Opcode::GlobalAtomicAddF32x2: | ||||
|     case IR::Opcode::GlobalAtomicMinF16x2: | ||||
|     case IR::Opcode::GlobalAtomicMinF32x2: | ||||
|     case IR::Opcode::GlobalAtomicMaxF16x2: | ||||
|     case IR::Opcode::GlobalAtomicMaxF32x2: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
| @@ -125,6 +152,60 @@ IR::Opcode GlobalToStorage(IR::Opcode opcode) { | ||||
|         return IR::Opcode::WriteStorage64; | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|         return IR::Opcode::WriteStorage128; | ||||
|     case IR::Opcode::GlobalAtomicIAdd32: | ||||
|         return IR::Opcode::StorageAtomicIAdd32; | ||||
|     case IR::Opcode::GlobalAtomicSMin32: | ||||
|         return IR::Opcode::StorageAtomicSMin32; | ||||
|     case IR::Opcode::GlobalAtomicUMin32: | ||||
|         return IR::Opcode::StorageAtomicUMin32; | ||||
|     case IR::Opcode::GlobalAtomicSMax32: | ||||
|         return IR::Opcode::StorageAtomicSMax32; | ||||
|     case IR::Opcode::GlobalAtomicUMax32: | ||||
|         return IR::Opcode::StorageAtomicUMax32; | ||||
|     case IR::Opcode::GlobalAtomicInc32: | ||||
|         return IR::Opcode::StorageAtomicInc32; | ||||
|     case IR::Opcode::GlobalAtomicDec32: | ||||
|         return IR::Opcode::StorageAtomicDec32; | ||||
|     case IR::Opcode::GlobalAtomicAnd32: | ||||
|         return IR::Opcode::StorageAtomicAnd32; | ||||
|     case IR::Opcode::GlobalAtomicOr32: | ||||
|         return IR::Opcode::StorageAtomicOr32; | ||||
|     case IR::Opcode::GlobalAtomicXor32: | ||||
|         return IR::Opcode::StorageAtomicXor32; | ||||
|     case IR::Opcode::GlobalAtomicIAdd64: | ||||
|         return IR::Opcode::StorageAtomicIAdd64; | ||||
|     case IR::Opcode::GlobalAtomicSMin64: | ||||
|         return IR::Opcode::StorageAtomicSMin64; | ||||
|     case IR::Opcode::GlobalAtomicUMin64: | ||||
|         return IR::Opcode::StorageAtomicUMin64; | ||||
|     case IR::Opcode::GlobalAtomicSMax64: | ||||
|         return IR::Opcode::StorageAtomicSMax64; | ||||
|     case IR::Opcode::GlobalAtomicUMax64: | ||||
|         return IR::Opcode::StorageAtomicUMax64; | ||||
|     case IR::Opcode::GlobalAtomicAnd64: | ||||
|         return IR::Opcode::StorageAtomicAnd64; | ||||
|     case IR::Opcode::GlobalAtomicOr64: | ||||
|         return IR::Opcode::StorageAtomicOr64; | ||||
|     case IR::Opcode::GlobalAtomicXor64: | ||||
|         return IR::Opcode::StorageAtomicXor64; | ||||
|     case IR::Opcode::GlobalAtomicExchange32: | ||||
|         return IR::Opcode::StorageAtomicExchange32; | ||||
|     case IR::Opcode::GlobalAtomicExchange64: | ||||
|         return IR::Opcode::StorageAtomicExchange64; | ||||
|     case IR::Opcode::GlobalAtomicAddF32: | ||||
|         return IR::Opcode::StorageAtomicAddF32; | ||||
|     case IR::Opcode::GlobalAtomicAddF16x2: | ||||
|         return IR::Opcode::StorageAtomicAddF16x2; | ||||
|     case IR::Opcode::GlobalAtomicMinF16x2: | ||||
|         return IR::Opcode::StorageAtomicMinF16x2; | ||||
|     case IR::Opcode::GlobalAtomicMaxF16x2: | ||||
|         return IR::Opcode::StorageAtomicMaxF16x2; | ||||
|     case IR::Opcode::GlobalAtomicAddF32x2: | ||||
|         return IR::Opcode::StorageAtomicAddF32x2; | ||||
|     case IR::Opcode::GlobalAtomicMinF32x2: | ||||
|         return IR::Opcode::StorageAtomicMinF32x2; | ||||
|     case IR::Opcode::GlobalAtomicMaxF32x2: | ||||
|         return IR::Opcode::StorageAtomicMaxF32x2; | ||||
|     default: | ||||
|         throw InvalidArgument("Invalid global memory opcode {}", opcode); | ||||
|     } | ||||
| @@ -328,6 +409,16 @@ void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index | ||||
|     inst.Invalidate(); | ||||
| } | ||||
|  | ||||
| /// Replace an atomic operation on global memory instruction with its storage buffer equivalent | ||||
| void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||||
|                    const IR::U32& offset) { | ||||
|     const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())}; | ||||
|     const auto it{IR::Block::InstructionList::s_iterator_to(inst)}; | ||||
|     const IR::Value value{ | ||||
|         &*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})}; | ||||
|     inst.ReplaceUsesWith(value); | ||||
| } | ||||
|  | ||||
| /// Replace a global memory instruction with its storage buffer equivalent | ||||
| void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||||
|              const IR::U32& offset) { | ||||
| @@ -348,6 +439,34 @@ void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index, | ||||
|     case IR::Opcode::WriteGlobal64: | ||||
|     case IR::Opcode::WriteGlobal128: | ||||
|         return ReplaceWrite(block, inst, storage_index, offset); | ||||
|     case IR::Opcode::GlobalAtomicIAdd32: | ||||
|     case IR::Opcode::GlobalAtomicSMin32: | ||||
|     case IR::Opcode::GlobalAtomicUMin32: | ||||
|     case IR::Opcode::GlobalAtomicSMax32: | ||||
|     case IR::Opcode::GlobalAtomicUMax32: | ||||
|     case IR::Opcode::GlobalAtomicInc32: | ||||
|     case IR::Opcode::GlobalAtomicDec32: | ||||
|     case IR::Opcode::GlobalAtomicAnd32: | ||||
|     case IR::Opcode::GlobalAtomicOr32: | ||||
|     case IR::Opcode::GlobalAtomicXor32: | ||||
|     case IR::Opcode::GlobalAtomicExchange32: | ||||
|     case IR::Opcode::GlobalAtomicIAdd64: | ||||
|     case IR::Opcode::GlobalAtomicSMin64: | ||||
|     case IR::Opcode::GlobalAtomicUMin64: | ||||
|     case IR::Opcode::GlobalAtomicSMax64: | ||||
|     case IR::Opcode::GlobalAtomicUMax64: | ||||
|     case IR::Opcode::GlobalAtomicAnd64: | ||||
|     case IR::Opcode::GlobalAtomicOr64: | ||||
|     case IR::Opcode::GlobalAtomicXor64: | ||||
|     case IR::Opcode::GlobalAtomicExchange64: | ||||
|     case IR::Opcode::GlobalAtomicAddF32: | ||||
|     case IR::Opcode::GlobalAtomicAddF16x2: | ||||
|     case IR::Opcode::GlobalAtomicAddF32x2: | ||||
|     case IR::Opcode::GlobalAtomicMinF16x2: | ||||
|     case IR::Opcode::GlobalAtomicMinF32x2: | ||||
|     case IR::Opcode::GlobalAtomicMaxF16x2: | ||||
|     case IR::Opcode::GlobalAtomicMaxF32x2: | ||||
|         return ReplaceAtomic(block, inst, storage_index, offset); | ||||
|     default: | ||||
|         throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode()); | ||||
|     } | ||||
| @@ -364,7 +483,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | ||||
|             CollectStorageBuffers(*block, inst, info); | ||||
|         } | ||||
|     } | ||||
|     u32 storage_index{}; | ||||
|     for (const StorageBufferAddr& storage_buffer : info.set) { | ||||
|         program.info.storage_buffers_descriptors.push_back({ | ||||
|             .cbuf_index = storage_buffer.index, | ||||
| @@ -372,7 +490,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | ||||
|             .count = 1, | ||||
|             .is_written{info.writes.contains(storage_buffer)}, | ||||
|         }); | ||||
|         ++storage_index; | ||||
|     } | ||||
|     for (const StorageInst& storage_inst : info.to_replace) { | ||||
|         const StorageBufferAddr storage_buffer{storage_inst.storage_buffer}; | ||||
|   | ||||
| @@ -114,6 +114,18 @@ IR::Opcode Replace(IR::Opcode op) { | ||||
|         return IR::Opcode::ConvertF32U32; | ||||
|     case IR::Opcode::ConvertF16U64: | ||||
|         return IR::Opcode::ConvertF32U64; | ||||
|     case IR::Opcode::GlobalAtomicAddF16x2: | ||||
|         return IR::Opcode::GlobalAtomicAddF32x2; | ||||
|     case IR::Opcode::StorageAtomicAddF16x2: | ||||
|         return IR::Opcode::StorageAtomicAddF32x2; | ||||
|     case IR::Opcode::GlobalAtomicMinF16x2: | ||||
|         return IR::Opcode::GlobalAtomicMinF32x2; | ||||
|     case IR::Opcode::StorageAtomicMinF16x2: | ||||
|         return IR::Opcode::StorageAtomicMinF32x2; | ||||
|     case IR::Opcode::GlobalAtomicMaxF16x2: | ||||
|         return IR::Opcode::GlobalAtomicMaxF32x2; | ||||
|     case IR::Opcode::StorageAtomicMaxF16x2: | ||||
|         return IR::Opcode::StorageAtomicMaxF32x2; | ||||
|     default: | ||||
|         return op; | ||||
|     } | ||||
|   | ||||
| @@ -38,6 +38,7 @@ struct Profile { | ||||
|     bool support_viewport_index_layer_non_geometry{}; | ||||
|     bool support_typeless_image_loads{}; | ||||
|     bool warp_size_potentially_larger_than_guest{}; | ||||
|     bool support_int64_atomics{}; | ||||
|  | ||||
|     // FClamp is broken and OpFMax + OpFMin should be used instead | ||||
|     bool has_broken_spirv_clamp{}; | ||||
|   | ||||
| @@ -128,6 +128,19 @@ struct Info { | ||||
|     bool uses_subgroup_mask{}; | ||||
|     bool uses_fswzadd{}; | ||||
|     bool uses_typeless_image_reads{}; | ||||
|     bool uses_shared_increment{}; | ||||
|     bool uses_shared_decrement{}; | ||||
|     bool uses_global_increment{}; | ||||
|     bool uses_global_decrement{}; | ||||
|     bool uses_atomic_f32_add{}; | ||||
|     bool uses_atomic_f16x2_add{}; | ||||
|     bool uses_atomic_f16x2_min{}; | ||||
|     bool uses_atomic_f16x2_max{}; | ||||
|     bool uses_atomic_f32x2_add{}; | ||||
|     bool uses_atomic_f32x2_min{}; | ||||
|     bool uses_atomic_f32x2_max{}; | ||||
|     bool uses_64_bit_atomics{}; | ||||
|     bool uses_shared_memory_u32x2{}; | ||||
|  | ||||
|     IR::Type used_constant_buffer_types{}; | ||||
|  | ||||
|   | ||||
| @@ -637,6 +637,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, | ||||
|             device.IsExtShaderViewportIndexLayerSupported(), | ||||
|         .support_typeless_image_loads = device.IsFormatlessImageLoadSupported(), | ||||
|         .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), | ||||
|         .support_int64_atomics = device.IsExtShaderAtomicInt64Supported(), | ||||
|         .has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR, | ||||
|         .generic_input_types{}, | ||||
|         .fixed_state_point_size{}, | ||||
|   | ||||
| @@ -681,6 +681,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|     bool has_ext_transform_feedback{}; | ||||
|     bool has_ext_custom_border_color{}; | ||||
|     bool has_ext_extended_dynamic_state{}; | ||||
|     bool has_ext_shader_atomic_int64{}; | ||||
|     for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) { | ||||
|         const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name, | ||||
|                               bool push) { | ||||
| @@ -710,6 +711,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|         test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false); | ||||
|         test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false); | ||||
|         test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false); | ||||
|         test(has_ext_shader_atomic_int64, VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME, false); | ||||
|         test(has_khr_workgroup_memory_explicit_layout, | ||||
|              VK_KHR_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_EXTENSION_NAME, false); | ||||
|         if (Settings::values.renderer_debug) { | ||||
| @@ -760,6 +762,18 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|     } else { | ||||
|         is_warp_potentially_bigger = true; | ||||
|     } | ||||
|     if (has_ext_shader_atomic_int64) { | ||||
|         VkPhysicalDeviceShaderAtomicInt64Features atomic_int64; | ||||
|         atomic_int64.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT; | ||||
|         atomic_int64.pNext = nullptr; | ||||
|         features.pNext = &atomic_int64; | ||||
|         physical.GetFeatures2KHR(features); | ||||
|  | ||||
|         if (atomic_int64.shaderBufferInt64Atomics && atomic_int64.shaderSharedInt64Atomics) { | ||||
|             extensions.push_back(VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME); | ||||
|             ext_shader_atomic_int64 = true; | ||||
|         } | ||||
|     } | ||||
|     if (has_ext_transform_feedback) { | ||||
|         VkPhysicalDeviceTransformFeedbackFeaturesEXT tfb_features; | ||||
|         tfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; | ||||
|   | ||||
| @@ -229,6 +229,11 @@ public: | ||||
|         return ext_shader_stencil_export; | ||||
|     } | ||||
|  | ||||
|     /// Returns true if the device supports VK_KHR_shader_atomic_int64. | ||||
|     bool IsExtShaderAtomicInt64Supported() const { | ||||
|         return ext_shader_atomic_int64; | ||||
|     } | ||||
|  | ||||
|     /// Returns true when a known debugging tool is attached. | ||||
|     bool HasDebuggingToolAttached() const { | ||||
|         return has_renderdoc || has_nsight_graphics; | ||||
| @@ -320,6 +325,7 @@ private: | ||||
|     bool ext_custom_border_color{};         ///< Support for VK_EXT_custom_border_color. | ||||
|     bool ext_extended_dynamic_state{};      ///< Support for VK_EXT_extended_dynamic_state. | ||||
|     bool ext_shader_stencil_export{};       ///< Support for VK_EXT_shader_stencil_export. | ||||
|     bool ext_shader_atomic_int64{};         ///< Support for VK_KHR_shader_atomic_int64. | ||||
|     bool nv_device_diagnostics_config{};    ///< Support for VK_NV_device_diagnostics_config. | ||||
|     bool has_renderdoc{};                   ///< Has RenderDoc attached | ||||
|     bool has_nsight_graphics{};             ///< Has Nsight Graphics attached | ||||
|   | ||||
		Reference in New Issue
	
	Block a user