shader: Add NVN storage buffer fallbacks
When we can't track the SSBO origin of a global memory instruction, leave it as a global memory operation and assume these pointers are in the NVN storage buffer slots, then apply a linear search in the shader's runtime.
This commit is contained in:
		| @@ -411,6 +411,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program, u32& bin | ||||
|     DefineTextures(program.info, binding); | ||||
|     DefineImages(program.info, binding); | ||||
|     DefineAttributeMemAccess(program.info); | ||||
|     DefineGlobalMemoryFunctions(program.info); | ||||
|     DefineLabels(program); | ||||
| } | ||||
|  | ||||
| @@ -762,6 +763,82 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | ||||
|     if (!info.uses_global_memory) { | ||||
|         return; | ||||
|     } | ||||
|     using DefPtr = Id StorageDefinitions::*; | ||||
|     const Id zero{u32_zero_value}; | ||||
|     const auto define_body{[&](DefPtr ssbo_member, Id addr, Id element_pointer, u32 shift, | ||||
|                                auto&& callback) { | ||||
|         AddLabel(); | ||||
|         const size_t num_buffers{info.storage_buffers_descriptors.size()}; | ||||
|         for (size_t index = 0; index < num_buffers; ++index) { | ||||
|             const auto& ssbo{info.storage_buffers_descriptors[index]}; | ||||
|             const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)}; | ||||
|             const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)}; | ||||
|             const Id ssbo_addr_pointer{OpAccessChain( | ||||
|                 uniform_types.U32x2, cbufs[ssbo.cbuf_index].U32x2, zero, ssbo_addr_cbuf_offset)}; | ||||
|             const Id ssbo_size_pointer{OpAccessChain(uniform_types.U32, cbufs[ssbo.cbuf_index].U32, | ||||
|                                                      zero, ssbo_size_cbuf_offset)}; | ||||
|  | ||||
|             const Id ssbo_addr{OpBitcast(U64, OpLoad(U32[2], ssbo_addr_pointer))}; | ||||
|             const Id ssbo_size{OpUConvert(U64, OpLoad(U32[1], ssbo_size_pointer))}; | ||||
|             const Id ssbo_end{OpIAdd(U64, ssbo_addr, ssbo_size)}; | ||||
|             const Id cond{OpLogicalAnd(U1, OpUGreaterThanEqual(U1, addr, ssbo_addr), | ||||
|                                        OpULessThan(U1, addr, ssbo_end))}; | ||||
|             const Id then_label{OpLabel()}; | ||||
|             const Id else_label{OpLabel()}; | ||||
|             OpSelectionMerge(else_label, spv::SelectionControlMask::MaskNone); | ||||
|             OpBranchConditional(cond, then_label, else_label); | ||||
|             AddLabel(then_label); | ||||
|             const Id ssbo_id{ssbos[index].*ssbo_member}; | ||||
|             const Id ssbo_offset{OpUConvert(U32[1], OpISub(U64, addr, ssbo_addr))}; | ||||
|             const Id ssbo_index{OpShiftRightLogical(U32[1], ssbo_offset, Const(shift))}; | ||||
|             const Id ssbo_pointer{OpAccessChain(element_pointer, ssbo_id, zero, ssbo_index)}; | ||||
|             callback(ssbo_pointer); | ||||
|             AddLabel(else_label); | ||||
|         } | ||||
|     }}; | ||||
|     const auto define_load{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||||
|         const Id function_type{TypeFunction(type, U64)}; | ||||
|         const Id func_id{OpFunction(type, spv::FunctionControlMask::MaskNone, function_type)}; | ||||
|         const Id addr{OpFunctionParameter(U64)}; | ||||
|         define_body(ssbo_member, addr, element_pointer, shift, | ||||
|                     [&](Id ssbo_pointer) { OpReturnValue(OpLoad(type, ssbo_pointer)); }); | ||||
|         OpReturnValue(ConstantNull(type)); | ||||
|         OpFunctionEnd(); | ||||
|         return func_id; | ||||
|     }}; | ||||
|     const auto define_write{[&](DefPtr ssbo_member, Id element_pointer, Id type, u32 shift) { | ||||
|         const Id function_type{TypeFunction(void_id, U64, type)}; | ||||
|         const Id func_id{OpFunction(void_id, spv::FunctionControlMask::MaskNone, function_type)}; | ||||
|         const Id addr{OpFunctionParameter(U64)}; | ||||
|         const Id data{OpFunctionParameter(type)}; | ||||
|         define_body(ssbo_member, addr, element_pointer, shift, [&](Id ssbo_pointer) { | ||||
|             OpStore(ssbo_pointer, data); | ||||
|             OpReturn(); | ||||
|         }); | ||||
|         OpReturn(); | ||||
|         OpFunctionEnd(); | ||||
|         return func_id; | ||||
|     }}; | ||||
|     const auto define{ | ||||
|         [&](DefPtr ssbo_member, const StorageTypeDefinition& type_def, Id type, size_t size) { | ||||
|             const Id element_type{type_def.element}; | ||||
|             const u32 shift{static_cast<u32>(std::countr_zero(size))}; | ||||
|             const Id load_func{define_load(ssbo_member, element_type, type, shift)}; | ||||
|             const Id write_func{define_write(ssbo_member, element_type, type, shift)}; | ||||
|             return std::make_pair(load_func, write_func); | ||||
|         }}; | ||||
|     std::tie(load_global_func_u32, write_global_func_u32) = | ||||
|         define(&StorageDefinitions::U32, storage_types.U32, U32[1], sizeof(u32)); | ||||
|     std::tie(load_global_func_u32x2, write_global_func_u32x2) = | ||||
|         define(&StorageDefinitions::U32x2, storage_types.U32x2, U32[2], sizeof(u32[2])); | ||||
|     std::tie(load_global_func_u32x4, write_global_func_u32x4) = | ||||
|         define(&StorageDefinitions::U32x4, storage_types.U32x4, U32[4], sizeof(u32[4])); | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) { | ||||
|     if (info.constant_buffer_descriptors.empty()) { | ||||
|         return; | ||||
|   | ||||
| @@ -224,6 +224,13 @@ public: | ||||
|     Id f32x2_min_cas{}; | ||||
|     Id f32x2_max_cas{}; | ||||
|  | ||||
|     Id load_global_func_u32{}; | ||||
|     Id load_global_func_u32x2{}; | ||||
|     Id load_global_func_u32x4{}; | ||||
|     Id write_global_func_u32{}; | ||||
|     Id write_global_func_u32x2{}; | ||||
|     Id write_global_func_u32x4{}; | ||||
|  | ||||
|     Id input_position{}; | ||||
|     std::array<Id, 32> input_generics{}; | ||||
|  | ||||
| @@ -255,6 +262,7 @@ private: | ||||
|     void DefineTextures(const Info& info, u32& binding); | ||||
|     void DefineImages(const Info& info, u32& binding); | ||||
|     void DefineAttributeMemAccess(const Info& info); | ||||
|     void DefineGlobalMemoryFunctions(const Info& info); | ||||
|     void DefineLabels(IR::Program& program); | ||||
|  | ||||
|     void DefineInputs(const Info& info); | ||||
|   | ||||
| @@ -84,16 +84,16 @@ void EmitLoadGlobalU8(EmitContext& ctx); | ||||
| void EmitLoadGlobalS8(EmitContext& ctx); | ||||
| void EmitLoadGlobalU16(EmitContext& ctx); | ||||
| void EmitLoadGlobalS16(EmitContext& ctx); | ||||
| void EmitLoadGlobal32(EmitContext& ctx); | ||||
| void EmitLoadGlobal64(EmitContext& ctx); | ||||
| void EmitLoadGlobal128(EmitContext& ctx); | ||||
| Id EmitLoadGlobal32(EmitContext& ctx, Id address); | ||||
| Id EmitLoadGlobal64(EmitContext& ctx, Id address); | ||||
| Id EmitLoadGlobal128(EmitContext& ctx, Id address); | ||||
| void EmitWriteGlobalU8(EmitContext& ctx); | ||||
| void EmitWriteGlobalS8(EmitContext& ctx); | ||||
| void EmitWriteGlobalU16(EmitContext& ctx); | ||||
| void EmitWriteGlobalS16(EmitContext& ctx); | ||||
| void EmitWriteGlobal32(EmitContext& ctx); | ||||
| void EmitWriteGlobal64(EmitContext& ctx); | ||||
| void EmitWriteGlobal128(EmitContext& ctx); | ||||
| void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value); | ||||
| void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value); | ||||
| void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value); | ||||
| Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||||
| Id EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||||
| Id EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||||
| @@ -277,9 +277,9 @@ Id EmitFPIsNan16(EmitContext& ctx, Id value); | ||||
| Id EmitFPIsNan32(EmitContext& ctx, Id value); | ||||
| Id EmitFPIsNan64(EmitContext& ctx, Id value); | ||||
| Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b); | ||||
| void EmitIAdd64(EmitContext& ctx); | ||||
| Id EmitIAdd64(EmitContext& ctx, Id a, Id b); | ||||
| Id EmitISub32(EmitContext& ctx, Id a, Id b); | ||||
| void EmitISub64(EmitContext& ctx); | ||||
| Id EmitISub64(EmitContext& ctx, Id a, Id b); | ||||
| Id EmitIMul32(EmitContext& ctx, Id a, Id b); | ||||
| Id EmitINeg32(EmitContext& ctx, Id value); | ||||
| Id EmitINeg64(EmitContext& ctx, Id value); | ||||
|   | ||||
| @@ -55,16 +55,16 @@ Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) { | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| void EmitIAdd64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitIAdd64(EmitContext& ctx, Id a, Id b) { | ||||
|     return ctx.OpIAdd(ctx.U64, a, b); | ||||
| } | ||||
|  | ||||
| Id EmitISub32(EmitContext& ctx, Id a, Id b) { | ||||
|     return ctx.OpISub(ctx.U32[1], a, b); | ||||
| } | ||||
|  | ||||
| void EmitISub64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitISub64(EmitContext& ctx, Id a, Id b) { | ||||
|     return ctx.OpISub(ctx.U64, a, b); | ||||
| } | ||||
|  | ||||
| Id EmitIMul32(EmitContext& ctx, Id a, Id b) { | ||||
|   | ||||
| @@ -64,16 +64,16 @@ void EmitLoadGlobalS16(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitLoadGlobal32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitLoadGlobal32(EmitContext& ctx, Id address) { | ||||
|     return ctx.OpFunctionCall(ctx.U32[1], ctx.load_global_func_u32, address); | ||||
| } | ||||
|  | ||||
| void EmitLoadGlobal64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitLoadGlobal64(EmitContext& ctx, Id address) { | ||||
|     return ctx.OpFunctionCall(ctx.U32[2], ctx.load_global_func_u32x2, address); | ||||
| } | ||||
|  | ||||
| void EmitLoadGlobal128(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| Id EmitLoadGlobal128(EmitContext& ctx, Id address) { | ||||
|     return ctx.OpFunctionCall(ctx.U32[4], ctx.load_global_func_u32x4, address); | ||||
| } | ||||
|  | ||||
| void EmitWriteGlobalU8(EmitContext&) { | ||||
| @@ -92,16 +92,16 @@ void EmitWriteGlobalS16(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| void EmitWriteGlobal32(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| void EmitWriteGlobal32(EmitContext& ctx, Id address, Id value) { | ||||
|     ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32, address, value); | ||||
| } | ||||
|  | ||||
| void EmitWriteGlobal64(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| void EmitWriteGlobal64(EmitContext& ctx, Id address, Id value) { | ||||
|     ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x2, address, value); | ||||
| } | ||||
|  | ||||
| void EmitWriteGlobal128(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| void EmitWriteGlobal128(EmitContext& ctx, Id address, Id value) { | ||||
|     ctx.OpFunctionCall(ctx.void_id, ctx.write_global_func_u32x4, address, value); | ||||
| } | ||||
|  | ||||
| Id EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user