From 459fce3a8f26241ff2a68c323e75fb70e7e1ba79 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 25 Jun 2019 07:57:32 -0400
Subject: [PATCH] shader_ir: propagate shader size to the IR

---
 .../renderer_opengl/gl_shader_cache.cpp       | 22 +++++++++++++------
 .../renderer_opengl/gl_shader_gen.cpp         |  8 +++----
 .../renderer_opengl/gl_shader_gen.h           |  2 ++
 src/video_core/shader/decode.cpp              |  6 ++---
 src/video_core/shader/shader_ir.cpp           |  4 ++--
 src/video_core/shader/shader_ir.h             |  3 ++-
 6 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f9b2b03a0..5d76ee12d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -129,9 +129,11 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
 
 /// Hashes one (or two) program streams
 u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code,
-                        const ProgramCode& code_b) {
-    u64 unique_identifier =
-        Common::CityHash64(reinterpret_cast<const char*>(code.data()), CalculateProgramSize(code));
+                        const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
+    if (size_a == 0) {
+        size_a = CalculateProgramSize(code);
+    }
+    u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
     if (program_type != Maxwell::ShaderProgram::VertexA) {
         return unique_identifier;
     }
@@ -140,8 +142,11 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
     std::size_t seed = 0;
     boost::hash_combine(seed, unique_identifier);
 
-    const u64 identifier_b = Common::CityHash64(reinterpret_cast<const char*>(code_b.data()),
-                                                CalculateProgramSize(code_b));
+    if (size_b == 0) {
+        size_b = CalculateProgramSize(code_b);
+    }
+    const u64 identifier_b =
+        Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
     boost::hash_combine(seed, identifier_b);
     return static_cast<u64>(seed);
 }
@@ -150,14 +155,17 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode&
 GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type,
                                       ProgramCode program_code, ProgramCode program_code_b) {
     GLShader::ShaderSetup setup(program_code);
+    setup.program.size_a = CalculateProgramSize(program_code);
+    setup.program.size_b = 0;
     if (program_type == Maxwell::ShaderProgram::VertexA) {
         // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
         // Conventional HW does not support this, so we combine VertexA and VertexB into one
         // stage here.
         setup.SetProgramB(program_code_b);
+        setup.program.size_b = CalculateProgramSize(program_code_b);
     }
-    setup.program.unique_identifier =
-        GetUniqueIdentifier(program_type, program_code, program_code_b);
+    setup.program.unique_identifier = GetUniqueIdentifier(
+        program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
 
     switch (program_type) {
     case Maxwell::ShaderProgram::VertexA:
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 9148629ec..f9ee8429e 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -29,14 +29,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
 };
 
 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
     ProgramResult program =
         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex");
 
     out += program.first;
 
     if (setup.IsDualProgram()) {
-        const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET);
+        const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
         ProgramResult program_b =
             Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b");
 
@@ -80,7 +80,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
 };
 
 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
     ProgramResult program =
         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry");
     out += program.first;
@@ -115,7 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
 };
 
 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
+    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
     ProgramResult program =
         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment");
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 0536c8a03..7cbc590f8 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -27,6 +27,8 @@ struct ShaderSetup {
         ProgramCode code;
         ProgramCode code_b; // Used for dual vertex shaders
         u64 unique_identifier;
+        std::size_t size_a;
+        std::size_t size_b;
     } program;
 
     /// Used in scenarios where we have a dual vertex shaders
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 65029d35e..09f55bd21 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -39,7 +39,7 @@ void ShaderIR::Decode() {
     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
 
     ShaderCharacteristics shader_info{};
-    bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info);
+    bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info);
     if (can_proceed) {
         coverage_begin = shader_info.start;
         coverage_end = shader_info.end;
@@ -52,12 +52,12 @@ void ShaderIR::Decode() {
         }
         return;
     }
-    LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling");
+    LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling");
 
     // Now we need to deal with an undecompilable shader. We need to brute force
     // a shader that captures every position.
     coverage_begin = shader_info.start;
-    const u32 shader_end = static_cast<u32>(MAX_PROGRAM_LENGTH);
+    const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
     coverage_end = shader_end;
     for (u32 label = main_offset; label < shader_end; label++) {
         basic_blocks.insert({label, DecodeRange(label, label + 1)});
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 11b545cca..5994bfc4e 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition;
 using Tegra::Shader::PredOperation;
 using Tegra::Shader::Register;
 
-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset)
-    : program_code{program_code}, main_offset{main_offset} {
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
+    : program_code{program_code}, main_offset{main_offset}, program_size{size} {
     Decode();
 }
 
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index e71462e02..a67d4f390 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -65,7 +65,7 @@ struct GlobalMemoryUsage {
 
 class ShaderIR final {
 public:
-    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset);
+    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
     ~ShaderIR();
 
     const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -316,6 +316,7 @@ private:
 
     const ProgramCode& program_code;
     const u32 main_offset;
+    const std::size_t program_size;
 
     u32 coverage_begin{};
     u32 coverage_end{};