glsl: Address rest of feedback
This commit is contained in:
		| @@ -148,6 +148,16 @@ std::string_view ImageFormatString(ImageFormat format) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| std::string_view ImageAccessQualifier(bool is_written, bool is_read) { | ||||
|     if (is_written && !is_read) { | ||||
|         return "writeonly "; | ||||
|     } | ||||
|     if (is_read && !is_written) { | ||||
|         return "readonly "; | ||||
|     } | ||||
|     return ""; | ||||
| } | ||||
|  | ||||
| std::string_view GetTessMode(TessPrimitive primitive) { | ||||
|     switch (primitive) { | ||||
|     case TessPrimitive::Triangles: | ||||
| @@ -262,7 +272,9 @@ void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) { | ||||
| EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_, | ||||
|                          const RuntimeInfo& runtime_info_) | ||||
|     : info{program.info}, profile{profile_}, runtime_info{runtime_info_} { | ||||
|     if (profile.need_fastmath_off) { | ||||
|         header += "#pragma optionNV(fastmath off)\n"; | ||||
|     } | ||||
|     SetupExtensions(); | ||||
|     stage = program.stage; | ||||
|     switch (program.stage) { | ||||
| @@ -335,7 +347,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | ||||
|     } | ||||
|     for (size_t index = 0; index < info.stores_generics.size(); ++index) { | ||||
|         // TODO: Properly resolve attribute issues | ||||
|         if (info.stores_generics[index] || stage == Stage::VertexA || stage == Stage::VertexB) { | ||||
|         if (info.stores_generics[index] || StageInitializesVaryings()) { | ||||
|             DefineGenericOutput(index, program.invocations); | ||||
|         } | ||||
|     } | ||||
| @@ -347,6 +359,17 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile | ||||
|     DefineConstants(); | ||||
| } | ||||
|  | ||||
| bool EmitContext::StageInitializesVaryings() const noexcept { | ||||
|     switch (stage) { | ||||
|     case Stage::VertexA: | ||||
|     case Stage::VertexB: | ||||
|     case Stage::Geometry: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::SetupExtensions() { | ||||
|     if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) { | ||||
|         header += "#extension GL_EXT_texture_shadow_lod : enable\n"; | ||||
| @@ -361,7 +384,7 @@ void EmitContext::SetupExtensions() { | ||||
|         header += "#extension GL_NV_shader_atomic_float : enable\n"; | ||||
|     } | ||||
|     if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { | ||||
|         header += "#extension NV_shader_atomic_fp16_vector : enable\n"; | ||||
|         header += "#extension GL_NV_shader_atomic_fp16_vector : enable\n"; | ||||
|     } | ||||
|     if (info.uses_fp16) { | ||||
|         if (profile.support_gl_nv_gpu_shader_5) { | ||||
| @@ -392,7 +415,7 @@ void EmitContext::SetupExtensions() { | ||||
|     if (info.stores_viewport_mask && profile.support_viewport_mask) { | ||||
|         header += "#extension GL_NV_viewport_array2 : enable\n"; | ||||
|     } | ||||
|     if (info.uses_typeless_image_reads || info.uses_typeless_image_writes) { | ||||
|     if (info.uses_typeless_image_reads) { | ||||
|         header += "#extension GL_EXT_shader_image_load_formatted : enable\n"; | ||||
|     } | ||||
|     if (info.uses_derivatives && profile.support_gl_derivative_control) { | ||||
| @@ -593,9 +616,9 @@ std::string EmitContext::DefineGlobalMemoryFunctions() { | ||||
|                     "return uvec4({0}[uint(addr-{1})>>2],{0}[uint(addr-{1}+4)>>2],{0}[" | ||||
|                     "uint(addr-{1}+8)>>2],{0}[uint(addr-{1}+12)>>2]);}}"); | ||||
|     } | ||||
|     write_func += "}"; | ||||
|     write_func_64 += "}"; | ||||
|     write_func_128 += "}"; | ||||
|     write_func += '}'; | ||||
|     write_func_64 += '}'; | ||||
|     write_func_128 += '}'; | ||||
|     load_func += "return 0u;}"; | ||||
|     load_func_64 += "return uvec2(0);}"; | ||||
|     load_func_128 += "return uvec4(0);}"; | ||||
| @@ -607,9 +630,10 @@ void EmitContext::SetupImages(Bindings& bindings) { | ||||
|     for (const auto& desc : info.image_buffer_descriptors) { | ||||
|         image_buffers.push_back({bindings.image, desc.count}); | ||||
|         const auto format{ImageFormatString(desc.format)}; | ||||
|         const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||||
|         const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||||
|         header += fmt::format("layout(binding={}{}) uniform uimageBuffer img{}{};", bindings.image, | ||||
|                               format, bindings.image, array_decorator); | ||||
|         header += fmt::format("layout(binding={}{}) uniform {}uimageBuffer img{}{};", | ||||
|                               bindings.image, format, qualifier, bindings.image, array_decorator); | ||||
|         bindings.image += desc.count; | ||||
|     } | ||||
|     images.reserve(info.image_descriptors.size()); | ||||
| @@ -617,7 +641,7 @@ void EmitContext::SetupImages(Bindings& bindings) { | ||||
|         images.push_back({bindings.image, desc.count}); | ||||
|         const auto format{ImageFormatString(desc.format)}; | ||||
|         const auto image_type{ImageType(desc.type)}; | ||||
|         const auto qualifier{desc.is_written ? "" : "readonly "}; | ||||
|         const auto qualifier{ImageAccessQualifier(desc.is_written, desc.is_read)}; | ||||
|         const auto array_decorator{desc.count > 1 ? fmt::format("[{}]", desc.count) : ""}; | ||||
|         header += fmt::format("layout(binding={}{})uniform {}{} img{}{};", bindings.image, format, | ||||
|                               qualifier, image_type, bindings.image, array_decorator); | ||||
|   | ||||
| @@ -136,6 +136,8 @@ public: | ||||
|         code += '\n'; | ||||
|     } | ||||
|  | ||||
|     [[nodiscard]] bool StageInitializesVaryings() const noexcept; | ||||
|  | ||||
|     std::string header; | ||||
|     std::string code; | ||||
|     VarAlloc var_alloc; | ||||
|   | ||||
| @@ -329,7 +329,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val | ||||
|         ctx.Add("gl_BackSecondaryColor.{}={};", swizzle, value); | ||||
|         break; | ||||
|     case IR::Attribute::FogCoordinate: | ||||
|         ctx.Add("gl_FogFragCoord.x={};", value); | ||||
|         ctx.Add("gl_FogFragCoord={};", value); | ||||
|         break; | ||||
|     case IR::Attribute::ClipDistance0: | ||||
|     case IR::Attribute::ClipDistance1: | ||||
|   | ||||
| @@ -10,6 +10,17 @@ | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
|  | ||||
| namespace Shader::Backend::GLSL { | ||||
| namespace { | ||||
| void InitializeVaryings(EmitContext& ctx) { | ||||
|     ctx.Add("gl_Position=vec4(0,0,0,1);"); | ||||
|     // TODO: Properly resolve attribute issues | ||||
|     for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { | ||||
|         if (!ctx.info.stores_generics[index]) { | ||||
|             ctx.Add("out_attr{}=vec4(0,0,0,1);", index); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void EmitPhi(EmitContext& ctx, IR::Inst& phi) { | ||||
|     const size_t num_args{phi.NumArgs()}; | ||||
| @@ -44,14 +55,8 @@ void EmitPhiMove(EmitContext& ctx, const IR::Value& phi_value, const IR::Value& | ||||
| } | ||||
|  | ||||
| void EmitPrologue(EmitContext& ctx) { | ||||
|     if (ctx.stage == Stage::VertexA || ctx.stage == Stage::VertexB) { | ||||
|         ctx.Add("gl_Position=vec4(0.0f, 0.0f, 0.0f, 1.0f);"); | ||||
|         // TODO: Properly resolve attribute issues | ||||
|         for (size_t index = 0; index < ctx.info.stores_generics.size() / 2; ++index) { | ||||
|             if (!ctx.info.stores_generics[index]) { | ||||
|                 ctx.Add("out_attr{}=vec4(0,0,0,1);", index); | ||||
|             } | ||||
|         } | ||||
|     if (ctx.StageInitializesVaryings()) { | ||||
|         InitializeVaryings(ctx); | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -59,6 +64,7 @@ void EmitEpilogue(EmitContext&) {} | ||||
|  | ||||
| void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | ||||
|     ctx.Add("EmitStreamVertex(int({}));", ctx.var_alloc.Consume(stream)); | ||||
|     InitializeVaryings(ctx); | ||||
| } | ||||
|  | ||||
| void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||||
|   | ||||
| @@ -312,11 +312,14 @@ public: | ||||
|     } | ||||
|  | ||||
|     u32 Add(const ImageBufferDescriptor& desc) { | ||||
|         return Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { | ||||
|         const u32 index{Add(image_buffer_descriptors, desc, [&desc](const auto& existing) { | ||||
|             return desc.format == existing.format && desc.cbuf_index == existing.cbuf_index && | ||||
|                    desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count && | ||||
|                    desc.size_shift == existing.size_shift; | ||||
|         }); | ||||
|         })}; | ||||
|         image_buffer_descriptors[index].is_written |= desc.is_written; | ||||
|         image_buffer_descriptors[index].is_read |= desc.is_read; | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
|     u32 Add(const TextureDescriptor& desc) { | ||||
| @@ -339,6 +342,7 @@ public: | ||||
|                    desc.size_shift == existing.size_shift; | ||||
|         })}; | ||||
|         image_descriptors[index].is_written |= desc.is_written; | ||||
|         image_descriptors[index].is_read |= desc.is_read; | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
| @@ -430,10 +434,12 @@ void TexturePass(Environment& env, IR::Program& program) { | ||||
|                 throw NotImplementedException("Unexpected separate sampler"); | ||||
|             } | ||||
|             const bool is_written{inst->GetOpcode() != IR::Opcode::ImageRead}; | ||||
|             const bool is_read{inst->GetOpcode() == IR::Opcode::ImageRead}; | ||||
|             if (flags.type == TextureType::Buffer) { | ||||
|                 index = descriptors.Add(ImageBufferDescriptor{ | ||||
|                     .format = flags.image_format, | ||||
|                     .is_written = is_written, | ||||
|                     .is_read = is_read, | ||||
|                     .cbuf_index = cbuf.index, | ||||
|                     .cbuf_offset = cbuf.offset, | ||||
|                     .count = cbuf.count, | ||||
| @@ -444,6 +450,7 @@ void TexturePass(Environment& env, IR::Program& program) { | ||||
|                     .type = flags.type, | ||||
|                     .format = flags.image_format, | ||||
|                     .is_written = is_written, | ||||
|                     .is_read = is_read, | ||||
|                     .cbuf_index = cbuf.index, | ||||
|                     .cbuf_offset = cbuf.offset, | ||||
|                     .count = cbuf.count, | ||||
|   | ||||
| @@ -97,6 +97,8 @@ struct Profile { | ||||
|     /// Fragment outputs have to be declared even if they are not written to avoid undefined values. | ||||
|     /// See Ori and the Blind Forest's main menu for reference. | ||||
|     bool need_declared_frag_colors{}; | ||||
|     /// Prevents fast math optimizations that may cause inaccuracies | ||||
|     bool need_fastmath_off{}; | ||||
|  | ||||
|     /// OpFClamp is broken and OpFMax + OpFMin should be used instead | ||||
|     bool has_broken_spirv_clamp{}; | ||||
|   | ||||
| @@ -75,6 +75,7 @@ using TextureBufferDescriptors = boost::container::small_vector<TextureBufferDes | ||||
| struct ImageBufferDescriptor { | ||||
|     ImageFormat format; | ||||
|     bool is_written; | ||||
|     bool is_read; | ||||
|     u32 cbuf_index; | ||||
|     u32 cbuf_offset; | ||||
|     u32 count; | ||||
| @@ -99,6 +100,7 @@ struct ImageDescriptor { | ||||
|     TextureType type; | ||||
|     ImageFormat format; | ||||
|     bool is_written; | ||||
|     bool is_read; | ||||
|     u32 cbuf_index; | ||||
|     u32 cbuf_offset; | ||||
|     u32 count; | ||||
|   | ||||
| @@ -162,6 +162,7 @@ Device::Device() { | ||||
|     has_amd_shader_half_float = GLAD_GL_AMD_gpu_shader_half_float; | ||||
|     has_sparse_texture_2 = GLAD_GL_ARB_sparse_texture2; | ||||
|     warp_size_potentially_larger_than_guest = !is_nvidia && !is_intel; | ||||
|     need_fastmath_off = is_nvidia; | ||||
|  | ||||
|     // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive | ||||
|     // uniform buffers as "push constants" | ||||
|   | ||||
| @@ -136,6 +136,10 @@ public: | ||||
|         return warp_size_potentially_larger_than_guest; | ||||
|     } | ||||
|  | ||||
|     bool NeedsFastmathOff() const { | ||||
|         return need_fastmath_off; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     static bool TestVariableAoffi(); | ||||
|     static bool TestPreciseBug(); | ||||
| @@ -171,6 +175,7 @@ private: | ||||
|     bool has_amd_shader_half_float{}; | ||||
|     bool has_sparse_texture_2{}; | ||||
|     bool warp_size_potentially_larger_than_guest{}; | ||||
|     bool need_fastmath_off{}; | ||||
| }; | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -132,28 +132,23 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | ||||
|     std::ranges::transform(infos, stage_infos.begin(), | ||||
|                            [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | ||||
|     auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { | ||||
|         if (device.UseAssemblyShaders()) { | ||||
|         if (!device.UseAssemblyShaders()) { | ||||
|             program.handle = glCreateProgram(); | ||||
|         } | ||||
|         for (size_t stage = 0; stage < 5; ++stage) { | ||||
|             const auto code{sources[stage]}; | ||||
|             if (code.empty()) { | ||||
|                 continue; | ||||
|             } | ||||
|             if (device.UseAssemblyShaders()) { | ||||
|                 assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); | ||||
|                 enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; | ||||
|             } | ||||
|             } else { | ||||
|             program.handle = glCreateProgram(); | ||||
|             for (size_t stage = 0; stage < 5; ++stage) { | ||||
|                 const auto code{sources[stage]}; | ||||
|                 if (code.empty()) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 AttachShader(Stage(stage), program.handle, code); | ||||
|             } | ||||
|             LinkProgram(program.handle); | ||||
|         } | ||||
|         if (shader_notify) { | ||||
|             shader_notify->MarkShaderComplete(); | ||||
|         if (!device.UseAssemblyShaders()) { | ||||
|             LinkProgram(program.handle); | ||||
|         } | ||||
|         u32 num_textures{}; | ||||
|         u32 num_images{}; | ||||
| @@ -198,6 +193,9 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c | ||||
|         if (assembly_shaders && xfb_state) { | ||||
|             GenerateTransformFeedbackState(*xfb_state); | ||||
|         } | ||||
|         if (shader_notify) { | ||||
|             shader_notify->MarkShaderComplete(); | ||||
|         } | ||||
|         is_built.store(true, std::memory_order_relaxed); | ||||
|     }}; | ||||
|     if (thread_worker) { | ||||
|   | ||||
| @@ -193,6 +193,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | ||||
|  | ||||
|           .lower_left_origin_mode = true, | ||||
|           .need_declared_frag_colors = true, | ||||
|           .need_fastmath_off = device.NeedsFastmathOff(), | ||||
|  | ||||
|           .has_broken_spirv_clamp = true, | ||||
|           .has_broken_unsigned_image_offsets = true, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user