Merge pull request #8133 from liamwhite/gl-spv-cbuf
shader_recompiler: support const buffer indirect addressing on OpenGL
This commit is contained in:
		| @@ -35,6 +35,15 @@ std::string_view OutputVertexIndex(EmitContext& ctx) { | ||||
|     return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : ""; | ||||
| } | ||||
|  | ||||
| std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) { | ||||
|     if (binding.IsImmediate()) { | ||||
|         return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index); | ||||
|     } else { | ||||
|         const auto binding_var{ctx.var_alloc.Consume(binding)}; | ||||
|         return fmt::format("GetCbufIndirect({},{})", binding_var, index); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, | ||||
|              const IR::Value& offset, u32 num_bits, std::string_view cast = {}, | ||||
|              std::string_view bit_offset = {}) { | ||||
| @@ -55,8 +64,8 @@ void GetCbuf(EmitContext& ctx, std::string_view ret, const IR::Value& binding, | ||||
|     const auto swizzle{is_immediate ? fmt::format(".{}", OffsetSwizzle(offset.U32())) | ||||
|                                     : fmt::format("[({}>>2)%4]", offset_var)}; | ||||
|  | ||||
|     const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||||
|     const auto cbuf_cast{fmt::format("{}({}[{}]{{}})", cast, cbuf, index)}; | ||||
|     const auto cbuf{ChooseCbuf(ctx, binding, index)}; | ||||
|     const auto cbuf_cast{fmt::format("{}({}{{}})", cast, cbuf)}; | ||||
|     const auto extraction{num_bits == 32 ? cbuf_cast | ||||
|                                          : fmt::format("bitfieldExtract({},int({}),{})", cbuf_cast, | ||||
|                                                        bit_offset, num_bits)}; | ||||
| @@ -140,9 +149,9 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||||
|  | ||||
| void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, | ||||
|                       const IR::Value& offset) { | ||||
|     const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||||
|     const auto cast{ctx.profile.has_gl_cbuf_ftou_bug ? "" : "ftou"}; | ||||
|     if (offset.IsImmediate()) { | ||||
|         const auto cbuf{fmt::format("{}_cbuf{}", ctx.stage_name, binding.U32())}; | ||||
|         static constexpr u32 cbuf_size{0x10000}; | ||||
|         const u32 u32_offset{offset.U32()}; | ||||
|         const s32 signed_offset{static_cast<s32>(offset.U32())}; | ||||
| @@ -162,17 +171,17 @@ void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding | ||||
|         return; | ||||
|     } | ||||
|     const auto offset_var{ctx.var_alloc.Consume(offset)}; | ||||
|     const auto cbuf{ChooseCbuf(ctx, binding, fmt::format("{}>>4", offset_var))}; | ||||
|     if (!ctx.profile.has_gl_component_indexing_bug) { | ||||
|         ctx.AddU32x2("{}=uvec2({}({}[{}>>4][({}>>2)%4]),{}({}[({}+4)>>4][(({}+4)>>2)%4]));", inst, | ||||
|                      cast, cbuf, offset_var, offset_var, cast, cbuf, offset_var, offset_var); | ||||
|         ctx.AddU32x2("{}=uvec2({}({}[({}>>2)%4]),{}({}[(({}+4)>>2)%4]));", inst, cast, cbuf, | ||||
|                      offset_var, cast, cbuf, offset_var); | ||||
|         return; | ||||
|     } | ||||
|     const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32x2)}; | ||||
|     const auto cbuf_offset{fmt::format("{}>>2", offset_var)}; | ||||
|     for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||||
|         ctx.Add("if(({}&3)=={}){}=uvec2({}({}[{}>>4].{}),{}({}[({}+4)>>4].{}));", cbuf_offset, | ||||
|                 swizzle, ret, cast, cbuf, offset_var, "xyzw"[swizzle], cast, cbuf, offset_var, | ||||
|                 "xyzw"[(swizzle + 1) % 4]); | ||||
|         ctx.Add("if(({}&3)=={}){}=uvec2({}({}.{}),{}({}.{}));", cbuf_offset, swizzle, ret, cast, | ||||
|                 cbuf, "xyzw"[swizzle], cast, cbuf, "xyzw"[(swizzle + 1) % 4]); | ||||
|     } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -359,6 +359,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | ||||
|         header += "layout(location=0) uniform vec4 scaling;"; | ||||
|     } | ||||
|     DefineConstantBuffers(bindings); | ||||
|     DefineConstantBufferIndirect(); | ||||
|     DefineStorageBuffers(bindings); | ||||
|     SetupImages(bindings); | ||||
|     SetupTextures(bindings); | ||||
| @@ -436,6 +437,24 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineConstantBufferIndirect() { | ||||
|     if (!info.uses_cbuf_indirect) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     header += profile.has_gl_cbuf_ftou_bug ? "uvec4 " : "vec4 "; | ||||
|     header += "GetCbufIndirect(uint binding, uint offset){" | ||||
|               "switch(binding){" | ||||
|               "default:"; | ||||
|  | ||||
|     for (const auto& desc : info.constant_buffer_descriptors) { | ||||
|         header += | ||||
|             fmt::format("case {}:return {}_cbuf{}[offset];", desc.index, stage_name, desc.index); | ||||
|     } | ||||
|  | ||||
|     header += "}}"; | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineStorageBuffers(Bindings& bindings) { | ||||
|     if (info.storage_buffers_descriptors.empty()) { | ||||
|         return; | ||||
|   | ||||
| @@ -162,6 +162,7 @@ public: | ||||
| private: | ||||
|     void SetupExtensions(); | ||||
|     void DefineConstantBuffers(Bindings& bindings); | ||||
|     void DefineConstantBufferIndirect(); | ||||
|     void DefineStorageBuffers(Bindings& bindings); | ||||
|     void DefineGenericOutput(size_t index, u32 invocations); | ||||
|     void DefineHelperFunctions(); | ||||
|   | ||||
| @@ -1043,15 +1043,15 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) { | ||||
|         const Id merge_label{OpLabel()}; | ||||
|         const Id uniform_type{uniform_types.*member_ptr}; | ||||
|  | ||||
|         std::array<Id, Info::MAX_CBUFS> buf_labels; | ||||
|         std::array<Sirit::Literal, Info::MAX_CBUFS> buf_literals; | ||||
|         for (u32 i = 0; i < Info::MAX_CBUFS; i++) { | ||||
|         std::array<Id, Info::MAX_INDIRECT_CBUFS> buf_labels; | ||||
|         std::array<Sirit::Literal, Info::MAX_INDIRECT_CBUFS> buf_literals; | ||||
|         for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { | ||||
|             buf_labels[i] = OpLabel(); | ||||
|             buf_literals[i] = Sirit::Literal{i}; | ||||
|         } | ||||
|         OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); | ||||
|         OpSwitch(binding, buf_labels[0], buf_literals, buf_labels); | ||||
|         for (u32 i = 0; i < Info::MAX_CBUFS; i++) { | ||||
|         for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { | ||||
|             AddLabel(buf_labels[i]); | ||||
|             const Id cbuf{cbufs[i].*member_ptr}; | ||||
|             const Id access_chain{OpAccessChain(uniform_type, cbuf, u32_zero_value, offset)}; | ||||
| @@ -1064,22 +1064,23 @@ void EmitContext::DefineConstantBufferIndirectFunctions(const Info& info) { | ||||
|         return func; | ||||
|     }}; | ||||
|     IR::Type types{info.used_indirect_cbuf_types}; | ||||
|     if (True(types & IR::Type::U8)) { | ||||
|     bool supports_aliasing = profile.support_descriptor_aliasing; | ||||
|     if (supports_aliasing && True(types & IR::Type::U8)) { | ||||
|         load_const_func_u8 = make_accessor(U8, &UniformDefinitions::U8); | ||||
|     } | ||||
|     if (True(types & IR::Type::U16)) { | ||||
|     if (supports_aliasing && True(types & IR::Type::U16)) { | ||||
|         load_const_func_u16 = make_accessor(U16, &UniformDefinitions::U16); | ||||
|     } | ||||
|     if (True(types & IR::Type::F32)) { | ||||
|     if (supports_aliasing && True(types & IR::Type::F32)) { | ||||
|         load_const_func_f32 = make_accessor(F32[1], &UniformDefinitions::F32); | ||||
|     } | ||||
|     if (True(types & IR::Type::U32)) { | ||||
|     if (supports_aliasing && True(types & IR::Type::U32)) { | ||||
|         load_const_func_u32 = make_accessor(U32[1], &UniformDefinitions::U32); | ||||
|     } | ||||
|     if (True(types & IR::Type::U32x2)) { | ||||
|     if (supports_aliasing && True(types & IR::Type::U32x2)) { | ||||
|         load_const_func_u32x2 = make_accessor(U32[2], &UniformDefinitions::U32x2); | ||||
|     } | ||||
|     if (True(types & IR::Type::U32x4)) { | ||||
|     if (!supports_aliasing || True(types & IR::Type::U32x4)) { | ||||
|         load_const_func_u32x4 = make_accessor(U32[4], &UniformDefinitions::U32x4); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -32,13 +32,8 @@ void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { | ||||
| void AddRegisterIndexedLdc(Info& info) { | ||||
|     info.uses_cbuf_indirect = true; | ||||
|  | ||||
|     // The shader can use any possible constant buffer | ||||
|     info.constant_buffer_mask = (1 << Info::MAX_CBUFS) - 1; | ||||
|  | ||||
|     auto& cbufs{info.constant_buffer_descriptors}; | ||||
|     cbufs.clear(); | ||||
|     for (u32 i = 0; i < Info::MAX_CBUFS; i++) { | ||||
|         cbufs.push_back(ConstantBufferDescriptor{.index = i, .count = 1}); | ||||
|     for (u32 i = 0; i < Info::MAX_INDIRECT_CBUFS; i++) { | ||||
|         AddConstantBufferDescriptor(info, i, 1); | ||||
|  | ||||
|         // The shader can use any possible access size | ||||
|         info.constant_buffer_used_sizes[i] = 0x10'000; | ||||
|   | ||||
| @@ -105,6 +105,7 @@ struct ImageDescriptor { | ||||
| using ImageDescriptors = boost::container::small_vector<ImageDescriptor, 4>; | ||||
|  | ||||
| struct Info { | ||||
|     static constexpr size_t MAX_INDIRECT_CBUFS{14}; | ||||
|     static constexpr size_t MAX_CBUFS{18}; | ||||
|     static constexpr size_t MAX_SSBOS{32}; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user