Merge pull request #2512 from ReinUsesLisp/comp-indexing
gl_shader_decompiler: Pessimize uniform buffer access on AMD's prorpietary driver
This commit is contained in:
		| @@ -2,11 +2,14 @@ | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <glad/glad.h> | ||||
|  | ||||
| #include "common/logging/log.h" | ||||
| #include "common/scope_exit.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| @@ -24,6 +27,7 @@ Device::Device() { | ||||
|     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | ||||
|     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); | ||||
|     has_variable_aoffi = TestVariableAoffi(); | ||||
|     has_component_indexing_bug = TestComponentIndexingBug(); | ||||
| } | ||||
|  | ||||
| Device::Device(std::nullptr_t) { | ||||
| @@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) { | ||||
|     max_vertex_attributes = 16; | ||||
|     max_varyings = 15; | ||||
|     has_variable_aoffi = true; | ||||
|     has_component_indexing_bug = false; | ||||
| } | ||||
|  | ||||
| bool Device::TestVariableAoffi() { | ||||
| @@ -52,4 +57,53 @@ void main() { | ||||
|     return supported; | ||||
| } | ||||
|  | ||||
| bool Device::TestComponentIndexingBug() { | ||||
|     constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; | ||||
|     const GLchar* COMPONENT_TEST = R"(#version 430 core | ||||
| layout (std430, binding = 0) buffer OutputBuffer { | ||||
|     uint output_value; | ||||
| }; | ||||
| layout (std140, binding = 0) uniform InputBuffer { | ||||
|     uvec4 input_value[4096]; | ||||
| }; | ||||
| layout (location = 0) uniform uint idx; | ||||
| void main() { | ||||
|     output_value = input_value[idx >> 2][idx & 3]; | ||||
| })"; | ||||
|     const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; | ||||
|     SCOPE_EXIT({ glDeleteProgram(shader); }); | ||||
|     glUseProgram(shader); | ||||
|  | ||||
|     OGLVertexArray vao; | ||||
|     vao.Create(); | ||||
|     glBindVertexArray(vao.handle); | ||||
|  | ||||
|     constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; | ||||
|     OGLBuffer ubo; | ||||
|     ubo.Create(); | ||||
|     glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); | ||||
|     glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); | ||||
|  | ||||
|     OGLBuffer ssbo; | ||||
|     ssbo.Create(); | ||||
|     glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); | ||||
|  | ||||
|     for (GLuint index = 4; index < 8; ++index) { | ||||
|         glInvalidateBufferData(ssbo.handle); | ||||
|         glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); | ||||
|  | ||||
|         glProgramUniform1ui(shader, 0, index); | ||||
|         glDrawArrays(GL_POINTS, 0, 1); | ||||
|  | ||||
|         GLuint result; | ||||
|         glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); | ||||
|         if (result != values.at(index)) { | ||||
|             LOG_INFO(Render_OpenGL, log_message, true); | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|     LOG_INFO(Render_OpenGL, log_message, false); | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -30,13 +30,19 @@ public: | ||||
|         return has_variable_aoffi; | ||||
|     } | ||||
|  | ||||
|     bool HasComponentIndexingBug() const { | ||||
|         return has_component_indexing_bug; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     static bool TestVariableAoffi(); | ||||
|     static bool TestComponentIndexingBug(); | ||||
|  | ||||
|     std::size_t uniform_buffer_alignment{}; | ||||
|     u32 max_vertex_attributes{}; | ||||
|     u32 max_varyings{}; | ||||
|     bool has_variable_aoffi{}; | ||||
|     bool has_component_indexing_bug{}; | ||||
| }; | ||||
|  | ||||
| } // namespace OpenGL | ||||
|   | ||||
| @@ -577,9 +577,26 @@ private: | ||||
|             if (std::holds_alternative<OperationNode>(*offset)) { | ||||
|                 // Indirect access | ||||
|                 const std::string final_offset = code.GenerateTemporary(); | ||||
|                 code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); | ||||
|                 return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), | ||||
|                                    final_offset, final_offset); | ||||
|                 code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); | ||||
|  | ||||
|                 if (!device.HasComponentIndexingBug()) { | ||||
|                     return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), | ||||
|                                        final_offset, final_offset); | ||||
|                 } | ||||
|  | ||||
|                 // AMD's proprietary GLSL compiler emits ill code for variable component access. | ||||
|                 // To bypass this driver bug generate 4 ifs, one per each component. | ||||
|                 const std::string pack = code.GenerateTemporary(); | ||||
|                 code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), | ||||
|                              final_offset); | ||||
|  | ||||
|                 const std::string result = code.GenerateTemporary(); | ||||
|                 code.AddLine("float {};", result); | ||||
|                 for (u32 swizzle = 0; swizzle < 4; ++swizzle) { | ||||
|                     code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, | ||||
|                                  pack, GetSwizzle(swizzle)); | ||||
|                 } | ||||
|                 return result; | ||||
|             } | ||||
|  | ||||
|             UNREACHABLE_MSG("Unmanaged offset node type"); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user