glsl: WIP var forward declaration
to fix Loop control flow.
This commit is contained in:
		| @@ -29,7 +29,10 @@ std::string_view SamplerType(TextureType type) { | ||||
|         return "sampler2DArray"; | ||||
|     case TextureType::Color3D: | ||||
|         return "sampler3D"; | ||||
|     case TextureType::ColorCube: | ||||
|         return "samplerCube"; | ||||
|     default: | ||||
|         fmt::print("Texture type: {}", type); | ||||
|         throw NotImplementedException("Texture type: {}", type); | ||||
|     } | ||||
| } | ||||
| @@ -39,7 +42,6 @@ std::string_view SamplerType(TextureType type) { | ||||
| EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||||
|                          const RuntimeInfo& runtime_info_) | ||||
|     : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { | ||||
|     std::string header = ""; | ||||
|     SetupExtensions(header); | ||||
|     stage = program.stage; | ||||
|     switch (program.stage) { | ||||
| @@ -67,24 +69,23 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | ||||
|                               program.workgroup_size[2]); | ||||
|         break; | ||||
|     } | ||||
|     code += header; | ||||
|     const std::string_view attr_stage{stage == Stage::Fragment ? "fragment" : "vertex"}; | ||||
|     for (size_t index = 0; index < info.input_generics.size(); ++index) { | ||||
|         const auto& generic{info.input_generics[index]}; | ||||
|         if (generic.used) { | ||||
|             Add("layout(location={}) {} in vec4 in_attr{};", index, | ||||
|                 InterpDecorator(generic.interpolation), index); | ||||
|             header += fmt::format("layout(location={}) {} in vec4 in_attr{};", index, | ||||
|                                   InterpDecorator(generic.interpolation), index); | ||||
|         } | ||||
|     } | ||||
|     for (size_t index = 0; index < info.stores_frag_color.size(); ++index) { | ||||
|         if (!info.stores_frag_color[index]) { | ||||
|             continue; | ||||
|         } | ||||
|         Add("layout(location={})out vec4 frag_color{};", index, index); | ||||
|         header += fmt::format("layout(location={})out vec4 frag_color{};", index, index); | ||||
|     } | ||||
|     for (size_t index = 0; index < info.stores_generics.size(); ++index) { | ||||
|         if (info.stores_generics[index]) { | ||||
|             Add("layout(location={}) out vec4 out_attr{};", index, index); | ||||
|             header += fmt::format("layout(location={}) out vec4 out_attr{};", index, index); | ||||
|         } | ||||
|     } | ||||
|     DefineConstantBuffers(bindings); | ||||
| @@ -92,14 +93,15 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | ||||
|     SetupImages(bindings); | ||||
|     DefineHelperFunctions(); | ||||
|  | ||||
|     Add("void main(){{"); | ||||
|     header += "void main(){\n"; | ||||
|     if (stage == Stage::VertexA || stage == Stage::VertexB) { | ||||
|         Add("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::SetupExtensions(std::string& header) { | ||||
| void EmitContext::SetupExtensions(std::string&) { | ||||
|     header += "#extension GL_ARB_separate_shader_objects : enable\n"; | ||||
|     // header += "#extension GL_ARB_texture_cube_map_array : enable\n"; | ||||
|     if (info.uses_int64) { | ||||
|         header += "#extension GL_ARB_gpu_shader_int64 : enable\n"; | ||||
|     } | ||||
| @@ -127,7 +129,8 @@ void EmitContext::DefineConstantBuffers(Bindings& bindings) { | ||||
|         return; | ||||
|     } | ||||
|     for (const auto& desc : info.constant_buffer_descriptors) { | ||||
|         Add("layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", | ||||
|         header += fmt::format( | ||||
|             "layout(std140,binding={}) uniform {}_cbuf_{}{{vec4 {}_cbuf{}[{}];}};", | ||||
|             bindings.uniform_buffer, stage_name, desc.index, stage_name, desc.index, 4 * 1024); | ||||
|         bindings.uniform_buffer += desc.count; | ||||
|     } | ||||
| @@ -138,53 +141,53 @@ void EmitContext::DefineStorageBuffers(Bindings& bindings) { | ||||
|         return; | ||||
|     } | ||||
|     for (const auto& desc : info.storage_buffers_descriptors) { | ||||
|         Add("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", bindings.storage_buffer, | ||||
|             bindings.storage_buffer, desc.cbuf_index); | ||||
|         header += fmt::format("layout(std430,binding={}) buffer ssbo_{}{{uint ssbo{}[];}};", | ||||
|                               bindings.storage_buffer, bindings.storage_buffer, desc.cbuf_index); | ||||
|         bindings.storage_buffer += desc.count; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineHelperFunctions() { | ||||
|     if (info.uses_global_increment) { | ||||
|         code += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; | ||||
|         header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n"; | ||||
|     } | ||||
|     if (info.uses_global_decrement) { | ||||
|         code += | ||||
|         header += | ||||
|             "uint CasDecrement(uint op_a,uint op_b){return(op_a==0||op_a>op_b)?op_b:(op_a-1u);}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f32_add) { | ||||
|         code += "uint CasFloatAdd(uint op_a,float op_b){return " | ||||
|                 "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; | ||||
|         header += "uint CasFloatAdd(uint op_a,float op_b){return " | ||||
|                   "floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f32x2_add) { | ||||
|         code += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " | ||||
|                 "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; | ||||
|         header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return " | ||||
|                   "packHalf2x16(unpackHalf2x16(op_a)+op_b);}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f32x2_min) { | ||||
|         code += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " | ||||
|                 "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; | ||||
|         header += "uint CasFloatMin32x2(uint op_a,vec2 op_b){return " | ||||
|                   "packHalf2x16(min(unpackHalf2x16(op_a),op_b));}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f32x2_max) { | ||||
|         code += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " | ||||
|                 "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; | ||||
|         header += "uint CasFloatMax32x2(uint op_a,vec2 op_b){return " | ||||
|                   "packHalf2x16(max(unpackHalf2x16(op_a),op_b));}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_add) { | ||||
|         code += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " | ||||
|                 "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; | ||||
|         header += "uint CasFloatAdd16x2(uint op_a,f16vec2 op_b){return " | ||||
|                   "packFloat2x16(unpackFloat2x16(op_a)+op_b);}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_min) { | ||||
|         code += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " | ||||
|                 "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; | ||||
|         header += "uint CasFloatMin16x2(uint op_a,f16vec2 op_b){return " | ||||
|                   "packFloat2x16(min(unpackFloat2x16(op_a),op_b));}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_max) { | ||||
|         code += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " | ||||
|                 "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; | ||||
|         header += "uint CasFloatMax16x2(uint op_a,f16vec2 op_b){return " | ||||
|                   "packFloat2x16(max(unpackFloat2x16(op_a),op_b));}\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_s32_min) { | ||||
|         code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; | ||||
|         header += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; | ||||
|     } | ||||
|     if (info.uses_atomic_s32_max) { | ||||
|         code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; | ||||
|         header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -215,7 +218,8 @@ void EmitContext::SetupImages(Bindings& bindings) { | ||||
|         texture_bindings.push_back(bindings.texture); | ||||
|         const auto indices{bindings.texture + desc.count}; | ||||
|         for (u32 index = bindings.texture; index < indices; ++index) { | ||||
|             Add("layout(binding={}) uniform {} tex{};", bindings.texture, sampler_type, index); | ||||
|             header += fmt::format("layout(binding={}) uniform {} tex{};", bindings.texture, | ||||
|                                   sampler_type, index); | ||||
|         } | ||||
|         bindings.texture += desc.count; | ||||
|     } | ||||
|   | ||||
| @@ -119,6 +119,7 @@ public: | ||||
|         code += '\n'; | ||||
|     } | ||||
|  | ||||
|     std::string header; | ||||
|     std::string code; | ||||
|     RegAlloc reg_alloc; | ||||
|     const Info& info; | ||||
|   | ||||
| @@ -83,6 +83,7 @@ void Invoke(EmitContext& ctx, IR::Inst* inst) { | ||||
| } | ||||
|  | ||||
| void EmitInst(EmitContext& ctx, IR::Inst* inst) { | ||||
|     // ctx.Add("/* {} */", inst->GetOpcode()); | ||||
|     switch (inst->GetOpcode()) { | ||||
| #define OPCODE(name, result_type, ...)                                                             \ | ||||
|     case IR::Opcode::name:                                                                         \ | ||||
| @@ -108,12 +109,9 @@ void PrecolorInst(IR::Inst& phi) { | ||||
|         if (arg.IsImmediate()) { | ||||
|             ir.PhiMove(phi, arg); | ||||
|         } else { | ||||
|             ir.PhiMove(phi, IR::Value{&*arg.InstRecursive()}); | ||||
|             ir.PhiMove(phi, IR::Value{arg.InstRecursive()}); | ||||
|         } | ||||
|     } | ||||
|     for (size_t i = 0; i < num_args; ++i) { | ||||
|         IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void Precolor(const IR::Program& program) { | ||||
| @@ -144,10 +142,7 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||||
|                     ctx.Add("break;"); | ||||
|                 } | ||||
|             } else { | ||||
|                 // TODO: implement this | ||||
|                 ctx.Add("MOV.S.CC RC,{};" | ||||
|                         "BRK (NE.x);", | ||||
|                         0); | ||||
|                 ctx.Add("if({}){{break;}}", ctx.reg_alloc.Consume(node.data.break_node.cond)); | ||||
|             } | ||||
|             break; | ||||
|         case IR::AbstractSyntaxNode::Type::Return: | ||||
| @@ -155,10 +150,12 @@ void EmitCode(EmitContext& ctx, const IR::Program& program) { | ||||
|             ctx.Add("return;"); | ||||
|             break; | ||||
|         case IR::AbstractSyntaxNode::Type::Loop: | ||||
|             ctx.Add("do{{"); | ||||
|             ctx.Add("for(;;){{"); | ||||
|             break; | ||||
|         case IR::AbstractSyntaxNode::Type::Repeat: | ||||
|             ctx.Add("}}while({});", ctx.reg_alloc.Consume(node.data.repeat.cond)); | ||||
|             ctx.Add("if({}){{", ctx.reg_alloc.Consume(node.data.repeat.cond)); | ||||
|             ctx.Add("continue;\n}}else{{"); | ||||
|             ctx.Add("break;\n}}\n}}"); | ||||
|             break; | ||||
|         default: | ||||
|             fmt::print("{}", node.type); | ||||
| @@ -182,7 +179,11 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR | ||||
|     Precolor(program); | ||||
|     EmitCode(ctx, program); | ||||
|     const std::string version{fmt::format("#version 450{}\n", GlslVersionSpecifier(ctx))}; | ||||
|     ctx.code.insert(0, version); | ||||
|     ctx.header.insert(0, version); | ||||
|     for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { | ||||
|         ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); | ||||
|     } | ||||
|     ctx.code.insert(0, ctx.header); | ||||
|     ctx.code += "}"; | ||||
|     fmt::print("\n{}\n", ctx.code); | ||||
|     return ctx.code; | ||||
|   | ||||
| @@ -28,8 +28,8 @@ void SetSignFlag(EmitContext& ctx, IR::Inst& inst, std::string_view result) { | ||||
| } | ||||
| } // Anonymous namespace | ||||
| void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { | ||||
|     const auto result{ctx.reg_alloc.Define(inst)}; | ||||
|     ctx.Add("uint {}={}+{};", result, a, b); | ||||
|     const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; | ||||
|     ctx.Add("{}={}+{};", result, a, b); | ||||
|     SetZeroFlag(ctx, inst, result); | ||||
|     SetSignFlag(ctx, inst, result); | ||||
| } | ||||
| @@ -120,8 +120,8 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, std::string_view bas | ||||
|  | ||||
| void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, std::string_view base, | ||||
|                           std::string_view offset, std::string_view count) { | ||||
|     const auto result{ctx.reg_alloc.Define(inst)}; | ||||
|     ctx.Add("uint {}=bitfieldExtract({},int({}),int({}));", result, base, offset, count); | ||||
|     const auto result{ctx.reg_alloc.Define(inst, Type::U32)}; | ||||
|     ctx.Add("{}=bitfieldExtract({},int({}),int({}));", result, base, offset, count); | ||||
|     SetZeroFlag(ctx, inst, result); | ||||
|     SetSignFlag(ctx, inst, result); | ||||
| } | ||||
|   | ||||
| @@ -74,7 +74,9 @@ std::string RegAlloc::Define(IR::Inst& inst, Type type) { | ||||
|     std::string type_str = ""; | ||||
|     if (!register_defined[id.index]) { | ||||
|         register_defined[id.index] = true; | ||||
|         type_str = GetGlslType(type); | ||||
|         // type_str = GetGlslType(type); | ||||
|         reg_types.push_back(GetGlslType(type)); | ||||
|         ++num_used_registers; | ||||
|     } | ||||
|     inst.SetDefinition<Id>(id); | ||||
|     return type_str + Representation(id); | ||||
|   | ||||
| @@ -5,6 +5,7 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <bitset> | ||||
| #include <vector> | ||||
|  | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| @@ -61,19 +62,21 @@ public: | ||||
|     std::string Define(IR::Inst& inst, IR::Type type); | ||||
|  | ||||
|     std::string Consume(const IR::Value& value); | ||||
|     std::string Consume(IR::Inst& inst); | ||||
|  | ||||
|     std::string GetGlslType(Type type); | ||||
|     std::string GetGlslType(IR::Type type); | ||||
|  | ||||
|     size_t num_used_registers{}; | ||||
|     std::vector<std::string> reg_types; | ||||
|  | ||||
| private: | ||||
|     static constexpr size_t NUM_REGS = 4096; | ||||
|     static constexpr size_t NUM_ELEMENTS = 4; | ||||
|  | ||||
|     std::string Consume(IR::Inst& inst); | ||||
|     Type RegType(IR::Type type); | ||||
|     Id Alloc(); | ||||
|     void Free(Id id); | ||||
|  | ||||
|     size_t num_used_registers{}; | ||||
|     std::bitset<NUM_REGS> register_use{}; | ||||
|     std::bitset<NUM_REGS> register_defined{}; | ||||
| }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user