Merge pull request #1987 from ReinUsesLisp/explicit-shader-ldg
gl_shader_cache: Use explicit bindings
This commit is contained in:
		| @@ -297,11 +297,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Shader); | ||||
|     auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||||
|  | ||||
|     // Next available bindpoints to use when uploading the const buffers and textures to the GLSL | ||||
|     // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. | ||||
|     u32 current_constbuffer_bindpoint = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; | ||||
|     u32 current_gmem_bindpoint = 0; | ||||
|     u32 current_texture_bindpoint = 0; | ||||
|     BaseBindings base_bindings; | ||||
|     std::array<bool, Maxwell::NumClipDistances> clip_distances{}; | ||||
|  | ||||
|     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
| @@ -325,47 +321,35 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | ||||
|         const GLintptr offset = buffer_cache.UploadHostMemory( | ||||
|             &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); | ||||
|  | ||||
|         // Bind the buffer | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(stage), buffer_cache.GetHandle(), | ||||
|                           offset, static_cast<GLsizeiptr>(sizeof(ubo))); | ||||
|         // Bind the emulation info buffer | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, base_bindings.cbuf, buffer_cache.GetHandle(), offset, | ||||
|                           static_cast<GLsizeiptr>(sizeof(ubo))); | ||||
|  | ||||
|         Shader shader{shader_cache.GetStageProgram(program)}; | ||||
|         const auto [program_handle, next_bindings] = | ||||
|             shader->GetProgramHandle(primitive_mode, base_bindings); | ||||
|  | ||||
|         switch (program) { | ||||
|         case Maxwell::ShaderProgram::VertexA: | ||||
|         case Maxwell::ShaderProgram::VertexB: { | ||||
|             shader_program_manager->UseProgrammableVertexShader( | ||||
|                 shader->GetProgramHandle(primitive_mode)); | ||||
|         case Maxwell::ShaderProgram::VertexB: | ||||
|             shader_program_manager->UseProgrammableVertexShader(program_handle); | ||||
|             break; | ||||
|         } | ||||
|         case Maxwell::ShaderProgram::Geometry: { | ||||
|             shader_program_manager->UseProgrammableGeometryShader( | ||||
|                 shader->GetProgramHandle(primitive_mode)); | ||||
|         case Maxwell::ShaderProgram::Geometry: | ||||
|             shader_program_manager->UseProgrammableGeometryShader(program_handle); | ||||
|             break; | ||||
|         } | ||||
|         case Maxwell::ShaderProgram::Fragment: { | ||||
|             shader_program_manager->UseProgrammableFragmentShader( | ||||
|                 shader->GetProgramHandle(primitive_mode)); | ||||
|         case Maxwell::ShaderProgram::Fragment: | ||||
|             shader_program_manager->UseProgrammableFragmentShader(program_handle); | ||||
|             break; | ||||
|         } | ||||
|         default: | ||||
|             LOG_CRITICAL(HW_GPU, "Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, | ||||
|                          shader_config.enable.Value(), shader_config.offset); | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|  | ||||
|         // Configure the const buffers for this shader stage. | ||||
|         current_constbuffer_bindpoint = | ||||
|             SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage), shader, primitive_mode, | ||||
|                               current_constbuffer_bindpoint); | ||||
|  | ||||
|         // Configure global memory regions for this shader stage. | ||||
|         current_gmem_bindpoint = SetupGlobalRegions(static_cast<Maxwell::ShaderStage>(stage), | ||||
|                                                     shader, primitive_mode, current_gmem_bindpoint); | ||||
|  | ||||
|         // Configure the textures for this shader stage. | ||||
|         current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, | ||||
|                                                   primitive_mode, current_texture_bindpoint); | ||||
|         const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); | ||||
|         SetupConstBuffers(stage_enum, shader, program_handle, base_bindings); | ||||
|         SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings); | ||||
|         SetupTextures(stage_enum, shader, program_handle, base_bindings); | ||||
|  | ||||
|         // Workaround for Intel drivers. | ||||
|         // When a clip distance is enabled but not set in the shader it crops parts of the screen | ||||
| @@ -380,6 +364,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | ||||
|             // VertexB was combined with VertexA, so we skip the VertexB iteration | ||||
|             index++; | ||||
|         } | ||||
|  | ||||
|         base_bindings = next_bindings; | ||||
|     } | ||||
|  | ||||
|     SyncClipEnabled(clip_distances); | ||||
| @@ -929,8 +915,9 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr | ||||
|     } | ||||
| } | ||||
|  | ||||
| u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader, | ||||
|                                         GLenum primitive_mode, u32 current_bindpoint) { | ||||
| void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                                          const Shader& shader, GLuint program_handle, | ||||
|                                          BaseBindings base_bindings) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto& gpu = Core::System::GetInstance().GPU(); | ||||
|     const auto& maxwell3d = gpu.Maxwell3D(); | ||||
| @@ -978,92 +965,73 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shad | ||||
|         size = Common::AlignUp(size, sizeof(GLvec4)); | ||||
|         ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); | ||||
|  | ||||
|         GLintptr const_buffer_offset = buffer_cache.UploadMemory( | ||||
|         const GLintptr const_buffer_offset = buffer_cache.UploadMemory( | ||||
|             buffer.address, size, static_cast<std::size_t>(uniform_buffer_alignment)); | ||||
|  | ||||
|         // Now configure the bindpoint of the buffer inside the shader | ||||
|         glUniformBlockBinding(shader->GetProgramHandle(primitive_mode), | ||||
|                               shader->GetProgramResourceIndex(used_buffer), | ||||
|                               current_bindpoint + bindpoint); | ||||
|  | ||||
|         // Prepare values for multibind | ||||
|         bind_buffers[bindpoint] = buffer_cache.GetHandle(); | ||||
|         bind_offsets[bindpoint] = const_buffer_offset; | ||||
|         bind_sizes[bindpoint] = size; | ||||
|     } | ||||
|  | ||||
|     glBindBuffersRange(GL_UNIFORM_BUFFER, current_bindpoint, static_cast<GLsizei>(entries.size()), | ||||
|     // The first binding is reserved for emulation values | ||||
|     const GLuint ubo_base_binding = base_bindings.cbuf + 1; | ||||
|     glBindBuffersRange(GL_UNIFORM_BUFFER, ubo_base_binding, static_cast<GLsizei>(entries.size()), | ||||
|                        bind_buffers.data(), bind_offsets.data(), bind_sizes.data()); | ||||
|  | ||||
|     return current_bindpoint + static_cast<u32>(entries.size()); | ||||
| } | ||||
|  | ||||
| u32 RasterizerOpenGL::SetupGlobalRegions(Maxwell::ShaderStage stage, Shader& shader, | ||||
|                                          GLenum primitive_mode, u32 current_bindpoint) { | ||||
|     for (const auto& global_region : shader->GetShaderEntries().global_memory_entries) { | ||||
|         const auto& region = | ||||
|             global_cache.GetGlobalRegion(global_region, static_cast<Maxwell::ShaderStage>(stage)); | ||||
|         const GLuint block_index{shader->GetProgramResourceIndex(global_region)}; | ||||
|         ASSERT(block_index != GL_INVALID_INDEX); | ||||
| void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                                           const Shader& shader, GLenum primitive_mode, | ||||
|                                           BaseBindings base_bindings) { | ||||
|     // TODO(Rodrigo): Use ARB_multi_bind here | ||||
|     const auto& entries = shader->GetShaderEntries().global_memory_entries; | ||||
|  | ||||
|     for (u32 bindpoint = 0; bindpoint < static_cast<u32>(entries.size()); ++bindpoint) { | ||||
|         const auto& entry = entries[bindpoint]; | ||||
|         const u32 current_bindpoint = base_bindings.gmem + bindpoint; | ||||
|         const auto& region = global_cache.GetGlobalRegion(entry, stage); | ||||
|  | ||||
|         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, current_bindpoint, region->GetBufferHandle()); | ||||
|         glShaderStorageBlockBinding(shader->GetProgramHandle(primitive_mode), block_index, | ||||
|                                     current_bindpoint); | ||||
|         ++current_bindpoint; | ||||
|     } | ||||
|  | ||||
|     return current_bindpoint; | ||||
| } | ||||
|  | ||||
| u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, | ||||
|                                     GLenum primitive_mode, u32 current_unit) { | ||||
| void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | ||||
|                                      GLuint program_handle, BaseBindings base_bindings) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Texture); | ||||
|     const auto& gpu = Core::System::GetInstance().GPU(); | ||||
|     const auto& maxwell3d = gpu.Maxwell3D(); | ||||
|     const auto& entries = shader->GetShaderEntries().samplers; | ||||
|  | ||||
|     ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units), | ||||
|     ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.texture_units), | ||||
|                "Exceeded the number of active textures."); | ||||
|  | ||||
|     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { | ||||
|         const auto& entry = entries[bindpoint]; | ||||
|         const u32 current_bindpoint = current_unit + bindpoint; | ||||
|  | ||||
|         // Bind the uniform to the sampler. | ||||
|  | ||||
|         glProgramUniform1i(shader->GetProgramHandle(primitive_mode), | ||||
|                            shader->GetUniformLocation(entry), current_bindpoint); | ||||
|         const u32 current_bindpoint = base_bindings.sampler + bindpoint; | ||||
|         auto& unit = state.texture_units[current_bindpoint]; | ||||
|  | ||||
|         const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset()); | ||||
|  | ||||
|         if (!texture.enabled) { | ||||
|             state.texture_units[current_bindpoint].texture = 0; | ||||
|             unit.texture = 0; | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc); | ||||
|  | ||||
|         Surface surface = res_cache.GetTextureSurface(texture, entry); | ||||
|         if (surface != nullptr) { | ||||
|             const GLuint handle = | ||||
|             unit.texture = | ||||
|                 entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle; | ||||
|             const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); | ||||
|             state.texture_units[current_bindpoint].texture = handle; | ||||
|             state.texture_units[current_bindpoint].target = target; | ||||
|             state.texture_units[current_bindpoint].swizzle.r = | ||||
|                 MaxwellToGL::SwizzleSource(texture.tic.x_source); | ||||
|             state.texture_units[current_bindpoint].swizzle.g = | ||||
|                 MaxwellToGL::SwizzleSource(texture.tic.y_source); | ||||
|             state.texture_units[current_bindpoint].swizzle.b = | ||||
|                 MaxwellToGL::SwizzleSource(texture.tic.z_source); | ||||
|             state.texture_units[current_bindpoint].swizzle.a = | ||||
|                 MaxwellToGL::SwizzleSource(texture.tic.w_source); | ||||
|             unit.target = entry.IsArray() ? surface->TargetLayer() : surface->Target(); | ||||
|             unit.swizzle.r = MaxwellToGL::SwizzleSource(texture.tic.x_source); | ||||
|             unit.swizzle.g = MaxwellToGL::SwizzleSource(texture.tic.y_source); | ||||
|             unit.swizzle.b = MaxwellToGL::SwizzleSource(texture.tic.z_source); | ||||
|             unit.swizzle.a = MaxwellToGL::SwizzleSource(texture.tic.w_source); | ||||
|         } else { | ||||
|             // Can occur when texture addr is null or its memory is unmapped/invalid | ||||
|             state.texture_units[current_bindpoint].texture = 0; | ||||
|             unit.texture = 0; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return current_unit + static_cast<u32>(entries.size()); | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { | ||||
|   | ||||
| @@ -127,35 +127,18 @@ private: | ||||
|                                bool using_depth_fb = true, bool preserve_contents = true, | ||||
|                                std::optional<std::size_t> single_color_target = {}); | ||||
|  | ||||
|     /** | ||||
|      * Configures the current constbuffers to use for the draw command. | ||||
|      * @param stage The shader stage to configure buffers for. | ||||
|      * @param shader The shader object that contains the specified stage. | ||||
|      * @param current_bindpoint The offset at which to start counting new buffer bindpoints. | ||||
|      * @returns The next available bindpoint for use in the next shader stage. | ||||
|      */ | ||||
|     u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | ||||
|                           GLenum primitive_mode, u32 current_bindpoint); | ||||
|     /// Configures the current constbuffers to use for the draw command. | ||||
|     void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, | ||||
|                            GLuint program_handle, BaseBindings base_bindings); | ||||
|  | ||||
|     /** | ||||
|      * Configures the current global memory regions to use for the draw command. | ||||
|      * @param stage The shader stage to configure buffers for. | ||||
|      * @param shader The shader object that contains the specified stage. | ||||
|      * @param current_bindpoint The offset at which to start counting new buffer bindpoints. | ||||
|      * @returns The next available bindpoint for use in the next shader stage. | ||||
|      */ | ||||
|     u32 SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | ||||
|                            GLenum primitive_mode, u32 current_bindpoint); | ||||
|     /// Configures the current global memory entries to use for the draw command. | ||||
|     void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                             const Shader& shader, GLenum primitive_mode, | ||||
|                             BaseBindings base_bindings); | ||||
|  | ||||
|     /** | ||||
|      * Configures the current textures to use for the draw command. | ||||
|      * @param stage The shader stage to configure textures for. | ||||
|      * @param shader The shader object that contains the specified stage. | ||||
|      * @param current_unit The offset at which to start counting unused texture units. | ||||
|      * @returns The next available bindpoint for use in the next shader stage. | ||||
|      */ | ||||
|     u32 SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader, | ||||
|                       GLenum primitive_mode, u32 current_unit); | ||||
|     /// Configures the current textures to use for the draw command. | ||||
|     void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, | ||||
|                        GLuint program_handle, BaseBindings base_bindings); | ||||
|  | ||||
|     /// Syncs the viewport and depth range to match the guest state | ||||
|     void SyncViewport(OpenGLState& current_state); | ||||
|   | ||||
| @@ -34,36 +34,25 @@ static ProgramCode GetShaderCode(VAddr addr) { | ||||
|     return program_code; | ||||
| } | ||||
|  | ||||
| /// Helper function to set shader uniform block bindings for a single shader stage | ||||
| static void SetShaderUniformBlockBinding(GLuint shader, const char* name, | ||||
|                                          Maxwell::ShaderStage binding, std::size_t expected_size) { | ||||
|     const GLuint ub_index = glGetUniformBlockIndex(shader, name); | ||||
|     if (ub_index == GL_INVALID_INDEX) { | ||||
|         return; | ||||
| /// Gets the shader type from a Maxwell program type | ||||
| constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | ||||
|     switch (program_type) { | ||||
|     case Maxwell::ShaderProgram::VertexA: | ||||
|     case Maxwell::ShaderProgram::VertexB: | ||||
|         return GL_VERTEX_SHADER; | ||||
|     case Maxwell::ShaderProgram::Geometry: | ||||
|         return GL_GEOMETRY_SHADER; | ||||
|     case Maxwell::ShaderProgram::Fragment: | ||||
|         return GL_FRAGMENT_SHADER; | ||||
|     default: | ||||
|         return GL_NONE; | ||||
|     } | ||||
|  | ||||
|     GLint ub_size = 0; | ||||
|     glGetActiveUniformBlockiv(shader, ub_index, GL_UNIFORM_BLOCK_DATA_SIZE, &ub_size); | ||||
|     ASSERT_MSG(static_cast<std::size_t>(ub_size) == expected_size, | ||||
|                "Uniform block size did not match! Got {}, expected {}", ub_size, expected_size); | ||||
|     glUniformBlockBinding(shader, ub_index, static_cast<GLuint>(binding)); | ||||
| } | ||||
|  | ||||
| /// Sets shader uniform block bindings for an entire shader program | ||||
| static void SetShaderUniformBlockBindings(GLuint shader) { | ||||
|     SetShaderUniformBlockBinding(shader, "vs_config", Maxwell::ShaderStage::Vertex, | ||||
|                                  sizeof(GLShader::MaxwellUniformData)); | ||||
|     SetShaderUniformBlockBinding(shader, "gs_config", Maxwell::ShaderStage::Geometry, | ||||
|                                  sizeof(GLShader::MaxwellUniformData)); | ||||
|     SetShaderUniformBlockBinding(shader, "fs_config", Maxwell::ShaderStage::Fragment, | ||||
|                                  sizeof(GLShader::MaxwellUniformData)); | ||||
| } | ||||
|  | ||||
| CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | ||||
|     : addr{addr}, program_type{program_type}, setup{GetShaderCode(addr)} { | ||||
|  | ||||
|     GLShader::ProgramResult program_result; | ||||
|     GLenum gl_type{}; | ||||
|  | ||||
|     switch (program_type) { | ||||
|     case Maxwell::ShaderProgram::VertexA: | ||||
| @@ -74,17 +63,14 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | ||||
|     case Maxwell::ShaderProgram::VertexB: | ||||
|         CalculateProperties(); | ||||
|         program_result = GLShader::GenerateVertexShader(setup); | ||||
|         gl_type = GL_VERTEX_SHADER; | ||||
|         break; | ||||
|     case Maxwell::ShaderProgram::Geometry: | ||||
|         CalculateProperties(); | ||||
|         program_result = GLShader::GenerateGeometryShader(setup); | ||||
|         gl_type = GL_GEOMETRY_SHADER; | ||||
|         break; | ||||
|     case Maxwell::ShaderProgram::Fragment: | ||||
|         CalculateProperties(); | ||||
|         program_result = GLShader::GenerateFragmentShader(setup); | ||||
|         gl_type = GL_FRAGMENT_SHADER; | ||||
|         break; | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | ||||
| @@ -92,71 +78,105 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type) | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     code = program_result.first; | ||||
|     entries = program_result.second; | ||||
|     shader_length = entries.shader_length; | ||||
| } | ||||
|  | ||||
|     if (program_type != Maxwell::ShaderProgram::Geometry) { | ||||
|         OGLShader shader; | ||||
|         shader.Create(program_result.first.c_str(), gl_type); | ||||
|         program.Create(true, shader.handle); | ||||
|         SetShaderUniformBlockBindings(program.handle); | ||||
|         LabelGLObject(GL_PROGRAM, program.handle, addr); | ||||
| std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive_mode, | ||||
|                                                                 BaseBindings base_bindings) { | ||||
|     GLuint handle{}; | ||||
|     if (program_type == Maxwell::ShaderProgram::Geometry) { | ||||
|         handle = GetGeometryShader(primitive_mode, base_bindings); | ||||
|     } else { | ||||
|         // Store shader's code to lazily build it on draw | ||||
|         geometry_programs.code = program_result.first; | ||||
|         const auto [entry, is_cache_miss] = programs.try_emplace(base_bindings); | ||||
|         auto& program = entry->second; | ||||
|         if (is_cache_miss) { | ||||
|             std::string source = AllocateBindings(base_bindings); | ||||
|             source += code; | ||||
|  | ||||
|             OGLShader shader; | ||||
|             shader.Create(source.c_str(), GetShaderType(program_type)); | ||||
|             program.Create(true, shader.handle); | ||||
|             LabelGLObject(GL_PROGRAM, program.handle, addr); | ||||
|         } | ||||
|  | ||||
|         handle = program.handle; | ||||
|     } | ||||
|  | ||||
|     // Add const buffer and samplers offset reserved by this shader. One UBO binding is reserved for | ||||
|     // emulation values | ||||
|     base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + 1; | ||||
|     base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||||
|     base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||||
|  | ||||
|     return {handle, base_bindings}; | ||||
| } | ||||
|  | ||||
| std::string CachedShader::AllocateBindings(BaseBindings base_bindings) { | ||||
|     std::string code = "#version 430 core\n"; | ||||
|     code += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||||
|  | ||||
|     for (const auto& cbuf : entries.const_buffers) { | ||||
|         code += fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); | ||||
|     } | ||||
|  | ||||
|     for (const auto& gmem : entries.global_memory_entries) { | ||||
|         code += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), | ||||
|                             gmem.GetCbufOffset(), base_bindings.gmem++); | ||||
|     } | ||||
|  | ||||
|     for (const auto& sampler : entries.samplers) { | ||||
|         code += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), | ||||
|                             base_bindings.sampler++); | ||||
|     } | ||||
|  | ||||
|     return code; | ||||
| } | ||||
|  | ||||
| GLuint CachedShader::GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings) { | ||||
|     const auto [entry, is_cache_miss] = geometry_programs.try_emplace(base_bindings); | ||||
|     auto& programs = entry->second; | ||||
|  | ||||
|     switch (primitive_mode) { | ||||
|     case GL_POINTS: | ||||
|         return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | ||||
|     case GL_LINES: | ||||
|     case GL_LINE_STRIP: | ||||
|         return LazyGeometryProgram(programs.lines, base_bindings, "lines", 2, "ShaderLines"); | ||||
|     case GL_LINES_ADJACENCY: | ||||
|     case GL_LINE_STRIP_ADJACENCY: | ||||
|         return LazyGeometryProgram(programs.lines_adjacency, base_bindings, "lines_adjacency", 4, | ||||
|                                    "ShaderLinesAdjacency"); | ||||
|     case GL_TRIANGLES: | ||||
|     case GL_TRIANGLE_STRIP: | ||||
|     case GL_TRIANGLE_FAN: | ||||
|         return LazyGeometryProgram(programs.triangles, base_bindings, "triangles", 3, | ||||
|                                    "ShaderTriangles"); | ||||
|     case GL_TRIANGLES_ADJACENCY: | ||||
|     case GL_TRIANGLE_STRIP_ADJACENCY: | ||||
|         return LazyGeometryProgram(programs.triangles_adjacency, base_bindings, | ||||
|                                    "triangles_adjacency", 6, "ShaderTrianglesAdjacency"); | ||||
|     default: | ||||
|         UNREACHABLE_MSG("Unknown primitive mode."); | ||||
|         return LazyGeometryProgram(programs.points, base_bindings, "points", 1, "ShaderPoints"); | ||||
|     } | ||||
| } | ||||
|  | ||||
| GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) { | ||||
|     const auto search{cbuf_resource_cache.find(buffer.GetHash())}; | ||||
|     if (search == cbuf_resource_cache.end()) { | ||||
|         const GLuint index{ | ||||
|             glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())}; | ||||
|         cbuf_resource_cache[buffer.GetHash()] = index; | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
|     return search->second; | ||||
| } | ||||
|  | ||||
| GLuint CachedShader::GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem) { | ||||
|     const auto search{gmem_resource_cache.find(global_mem.GetHash())}; | ||||
|     if (search == gmem_resource_cache.end()) { | ||||
|         const GLuint index{glGetProgramResourceIndex(program.handle, GL_SHADER_STORAGE_BLOCK, | ||||
|                                                      global_mem.GetName().c_str())}; | ||||
|         gmem_resource_cache[global_mem.GetHash()] = index; | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
|     return search->second; | ||||
| } | ||||
|  | ||||
| GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) { | ||||
|     const auto search{uniform_cache.find(sampler.GetHash())}; | ||||
|     if (search == uniform_cache.end()) { | ||||
|         const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())}; | ||||
|         uniform_cache[sampler.GetHash()] = index; | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
|     return search->second; | ||||
| } | ||||
|  | ||||
| GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, | ||||
| GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, | ||||
|                                          const std::string& glsl_topology, u32 max_vertices, | ||||
|                                          const std::string& debug_name) { | ||||
|     if (target_program.handle != 0) { | ||||
|         return target_program.handle; | ||||
|     } | ||||
|     std::string source = "#version 430 core\n"; | ||||
|     std::string source = AllocateBindings(base_bindings); | ||||
|     source += "layout (" + glsl_topology + ") in;\n"; | ||||
|     source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | ||||
|     source += geometry_programs.code; | ||||
|     source += code; | ||||
|  | ||||
|     OGLShader shader; | ||||
|     shader.Create(source.c_str(), GL_GEOMETRY_SHADER); | ||||
|     target_program.Create(true, shader.handle); | ||||
|     SetShaderUniformBlockBindings(target_program.handle); | ||||
|     LabelGLObject(GL_PROGRAM, target_program.handle, addr, debug_name); | ||||
|     return target_program.handle; | ||||
| }; | ||||
|   | ||||
| @@ -7,6 +7,9 @@ | ||||
| #include <array> | ||||
| #include <map> | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
|  | ||||
| #include <glad/glad.h> | ||||
|  | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| @@ -23,6 +26,16 @@ class RasterizerOpenGL; | ||||
| using Shader = std::shared_ptr<CachedShader>; | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
|  | ||||
| struct BaseBindings { | ||||
|     u32 cbuf{}; | ||||
|     u32 gmem{}; | ||||
|     u32 sampler{}; | ||||
|  | ||||
|     bool operator<(const BaseBindings& rhs) const { | ||||
|         return std::tie(cbuf, gmem, sampler) < std::tie(rhs.cbuf, rhs.gmem, rhs.sampler); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| class CachedShader final : public RasterizerCacheObject { | ||||
| public: | ||||
|     CachedShader(VAddr addr, Maxwell::ShaderProgram program_type); | ||||
| @@ -44,71 +57,42 @@ public: | ||||
|     } | ||||
|  | ||||
|     /// Gets the GL program handle for the shader | ||||
|     GLuint GetProgramHandle(GLenum primitive_mode) { | ||||
|         if (program_type != Maxwell::ShaderProgram::Geometry) { | ||||
|             return program.handle; | ||||
|         } | ||||
|         switch (primitive_mode) { | ||||
|         case GL_POINTS: | ||||
|             return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); | ||||
|         case GL_LINES: | ||||
|         case GL_LINE_STRIP: | ||||
|             return LazyGeometryProgram(geometry_programs.lines, "lines", 2, "ShaderLines"); | ||||
|         case GL_LINES_ADJACENCY: | ||||
|         case GL_LINE_STRIP_ADJACENCY: | ||||
|             return LazyGeometryProgram(geometry_programs.lines_adjacency, "lines_adjacency", 4, | ||||
|                                        "ShaderLinesAdjacency"); | ||||
|         case GL_TRIANGLES: | ||||
|         case GL_TRIANGLE_STRIP: | ||||
|         case GL_TRIANGLE_FAN: | ||||
|             return LazyGeometryProgram(geometry_programs.triangles, "triangles", 3, | ||||
|                                        "ShaderTriangles"); | ||||
|         case GL_TRIANGLES_ADJACENCY: | ||||
|         case GL_TRIANGLE_STRIP_ADJACENCY: | ||||
|             return LazyGeometryProgram(geometry_programs.triangles_adjacency, "triangles_adjacency", | ||||
|                                        6, "ShaderTrianglesAdjacency"); | ||||
|         default: | ||||
|             UNREACHABLE_MSG("Unknown primitive mode."); | ||||
|             return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints"); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// Gets the GL program resource location for the specified resource, caching as needed | ||||
|     GLuint GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer); | ||||
|  | ||||
|     /// Gets the GL program resource location for the specified resource, caching as needed | ||||
|     GLuint GetProgramResourceIndex(const GLShader::GlobalMemoryEntry& global_mem); | ||||
|  | ||||
|     /// Gets the GL uniform location for the specified resource, caching as needed | ||||
|     GLint GetUniformLocation(const GLShader::SamplerEntry& sampler); | ||||
|     std::tuple<GLuint, BaseBindings> GetProgramHandle(GLenum primitive_mode, | ||||
|                                                       BaseBindings base_bindings); | ||||
|  | ||||
| private: | ||||
|     /// Generates a geometry shader or returns one that already exists. | ||||
|     GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology, | ||||
|                                u32 max_vertices, const std::string& debug_name); | ||||
|  | ||||
|     void CalculateProperties(); | ||||
|  | ||||
|     VAddr addr; | ||||
|     std::size_t shader_length; | ||||
|     Maxwell::ShaderProgram program_type; | ||||
|     GLShader::ShaderSetup setup; | ||||
|     GLShader::ShaderEntries entries; | ||||
|  | ||||
|     // Non-geometry program. | ||||
|     OGLProgram program; | ||||
|  | ||||
|     // Geometry programs. These are needed because GLSL needs an input topology but it's not | ||||
|     // declared by the hardware. Workaround this issue by generating a different shader per input | ||||
|     // topology class. | ||||
|     struct { | ||||
|         std::string code; | ||||
|     struct GeometryPrograms { | ||||
|         OGLProgram points; | ||||
|         OGLProgram lines; | ||||
|         OGLProgram lines_adjacency; | ||||
|         OGLProgram triangles; | ||||
|         OGLProgram triangles_adjacency; | ||||
|     } geometry_programs; | ||||
|     }; | ||||
|  | ||||
|     std::string AllocateBindings(BaseBindings base_bindings); | ||||
|  | ||||
|     GLuint GetGeometryShader(GLenum primitive_mode, BaseBindings base_bindings); | ||||
|  | ||||
|     /// Generates a geometry shader or returns one that already exists. | ||||
|     GLuint LazyGeometryProgram(OGLProgram& target_program, BaseBindings base_bindings, | ||||
|                                const std::string& glsl_topology, u32 max_vertices, | ||||
|                                const std::string& debug_name); | ||||
|  | ||||
|     void CalculateProperties(); | ||||
|  | ||||
|     VAddr addr{}; | ||||
|     std::size_t shader_length{}; | ||||
|     Maxwell::ShaderProgram program_type{}; | ||||
|     GLShader::ShaderSetup setup; | ||||
|     GLShader::ShaderEntries entries; | ||||
|  | ||||
|     std::string code; | ||||
|  | ||||
|     std::map<BaseBindings, OGLProgram> programs; | ||||
|     std::map<BaseBindings, GeometryPrograms> geometry_programs; | ||||
|  | ||||
|     std::map<u32, GLuint> cbuf_resource_cache; | ||||
|     std::map<u32, GLuint> gmem_resource_cache; | ||||
|   | ||||
| @@ -374,7 +374,8 @@ private: | ||||
|     void DeclareConstantBuffers() { | ||||
|         for (const auto& entry : ir.GetConstantBuffers()) { | ||||
|             const auto [index, size] = entry; | ||||
|             code.AddLine("layout (std140) uniform " + GetConstBufferBlock(index) + " {"); | ||||
|             code.AddLine("layout (std140, binding = CBUF_BINDING_" + std::to_string(index) + | ||||
|                          ") uniform " + GetConstBufferBlock(index) + " {"); | ||||
|             code.AddLine("    vec4 " + GetConstBuffer(index) + "[MAX_CONSTBUFFER_ELEMENTS];"); | ||||
|             code.AddLine("};"); | ||||
|             code.AddNewLine(); | ||||
| @@ -383,7 +384,10 @@ private: | ||||
|  | ||||
|     void DeclareGlobalMemory() { | ||||
|         for (const auto& entry : ir.GetGlobalMemoryBases()) { | ||||
|             code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {"); | ||||
|             const std::string binding = | ||||
|                 fmt::format("GMEM_BINDING_{}_{}", entry.cbuf_index, entry.cbuf_offset); | ||||
|             code.AddLine("layout (std430, binding = " + binding + ") buffer " + | ||||
|                          GetGlobalMemoryBlock(entry) + " {"); | ||||
|             code.AddLine("    float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];"); | ||||
|             code.AddLine("};"); | ||||
|             code.AddNewLine(); | ||||
| @@ -413,7 +417,8 @@ private: | ||||
|             if (sampler.IsShadow()) | ||||
|                 sampler_type += "Shadow"; | ||||
|  | ||||
|             code.AddLine("uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); | ||||
|             code.AddLine("layout (binding = SAMPLER_BINDING_" + std::to_string(sampler.GetIndex()) + | ||||
|                          ") uniform " + sampler_type + ' ' + GetSampler(sampler) + ';'); | ||||
|         } | ||||
|         if (!samplers.empty()) | ||||
|             code.AddNewLine(); | ||||
|   | ||||
| @@ -38,10 +38,6 @@ public: | ||||
|         return index; | ||||
|     } | ||||
|  | ||||
|     u32 GetHash() const { | ||||
|         return (static_cast<u32>(stage) << 16) | index; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     std::string name; | ||||
|     Maxwell::ShaderStage stage{}; | ||||
| @@ -62,10 +58,6 @@ public: | ||||
|         return stage; | ||||
|     } | ||||
|  | ||||
|     u32 GetHash() const { | ||||
|         return (static_cast<u32>(stage) << 16) | static_cast<u32>(GetIndex()); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     std::string name; | ||||
|     Maxwell::ShaderStage stage{}; | ||||
| @@ -93,10 +85,6 @@ public: | ||||
|         return stage; | ||||
|     } | ||||
|  | ||||
|     u32 GetHash() const { | ||||
|         return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     u32 cbuf_index{}; | ||||
|     u32 cbuf_offset{}; | ||||
|   | ||||
| @@ -20,15 +20,14 @@ static constexpr u32 PROGRAM_OFFSET{10}; | ||||
| ProgramResult GenerateVertexShader(const ShaderSetup& setup) { | ||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||||
|  | ||||
|     std::string out = "#version 430 core\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     out += "// Shader Unique Id: VS" + id + "\n\n"; | ||||
|     out += GetCommonDeclarations(); | ||||
|  | ||||
|     out += R"( | ||||
| layout (location = 0) out vec4 position; | ||||
|  | ||||
| layout(std140) uniform vs_config { | ||||
| layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | ||||
|     vec4 viewport_flip; | ||||
|     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | ||||
|     uvec4 alpha_test; | ||||
| @@ -78,7 +77,6 @@ void main() { | ||||
| } | ||||
|  | ||||
| ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | ||||
|     // Version is intentionally skipped in shader generation, it's added by the lazy compilation. | ||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||||
|  | ||||
|     std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
| @@ -89,7 +87,7 @@ ProgramResult GenerateGeometryShader(const ShaderSetup& setup) { | ||||
| layout (location = 0) in vec4 gs_position[]; | ||||
| layout (location = 0) out vec4 position; | ||||
|  | ||||
| layout (std140) uniform gs_config { | ||||
| layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | ||||
|     vec4 viewport_flip; | ||||
|     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | ||||
|     uvec4 alpha_test; | ||||
| @@ -112,8 +110,7 @@ void main() { | ||||
| ProgramResult GenerateFragmentShader(const ShaderSetup& setup) { | ||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||||
|  | ||||
|     std::string out = "#version 430 core\n"; | ||||
|     out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n"; | ||||
|     out += "// Shader Unique Id: FS" + id + "\n\n"; | ||||
|     out += GetCommonDeclarations(); | ||||
|  | ||||
| @@ -129,7 +126,7 @@ layout (location = 7) out vec4 FragColor7; | ||||
|  | ||||
| layout (location = 0) in vec4 position; | ||||
|  | ||||
| layout (std140) uniform fs_config { | ||||
| layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | ||||
|     vec4 viewport_flip; | ||||
|     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding | ||||
|     uvec4 alpha_test; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user