Merge pull request #2976 from FernandoS27/cache-fast-brx-rebased
Implement Fast BRX, fix TXQ and addapt the Shader Cache for it
This commit is contained in:
		| @@ -85,10 +85,12 @@ set(HASH_FILES | |||||||
|     "${VIDEO_CORE}/shader/decode/xmad.cpp" |     "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||||||
|     "${VIDEO_CORE}/shader/ast.cpp" |     "${VIDEO_CORE}/shader/ast.cpp" | ||||||
|     "${VIDEO_CORE}/shader/ast.h" |     "${VIDEO_CORE}/shader/ast.h" | ||||||
|     "${VIDEO_CORE}/shader/control_flow.cpp" |  | ||||||
|     "${VIDEO_CORE}/shader/control_flow.h" |  | ||||||
|     "${VIDEO_CORE}/shader/compiler_settings.cpp" |     "${VIDEO_CORE}/shader/compiler_settings.cpp" | ||||||
|     "${VIDEO_CORE}/shader/compiler_settings.h" |     "${VIDEO_CORE}/shader/compiler_settings.h" | ||||||
|  |     "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||||||
|  |     "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||||||
|  |     "${VIDEO_CORE}/shader/control_flow.cpp" | ||||||
|  |     "${VIDEO_CORE}/shader/control_flow.h" | ||||||
|     "${VIDEO_CORE}/shader/decode.cpp" |     "${VIDEO_CORE}/shader/decode.cpp" | ||||||
|     "${VIDEO_CORE}/shader/expr.cpp" |     "${VIDEO_CORE}/shader/expr.cpp" | ||||||
|     "${VIDEO_CORE}/shader/expr.h" |     "${VIDEO_CORE}/shader/expr.h" | ||||||
|   | |||||||
| @@ -74,10 +74,12 @@ add_custom_command(OUTPUT scm_rev.cpp | |||||||
|       "${VIDEO_CORE}/shader/decode/xmad.cpp" |       "${VIDEO_CORE}/shader/decode/xmad.cpp" | ||||||
|       "${VIDEO_CORE}/shader/ast.cpp" |       "${VIDEO_CORE}/shader/ast.cpp" | ||||||
|       "${VIDEO_CORE}/shader/ast.h" |       "${VIDEO_CORE}/shader/ast.h" | ||||||
|       "${VIDEO_CORE}/shader/control_flow.cpp" |  | ||||||
|       "${VIDEO_CORE}/shader/control_flow.h" |  | ||||||
|       "${VIDEO_CORE}/shader/compiler_settings.cpp" |       "${VIDEO_CORE}/shader/compiler_settings.cpp" | ||||||
|       "${VIDEO_CORE}/shader/compiler_settings.h" |       "${VIDEO_CORE}/shader/compiler_settings.h" | ||||||
|  |       "${VIDEO_CORE}/shader/const_buffer_locker.cpp" | ||||||
|  |       "${VIDEO_CORE}/shader/const_buffer_locker.h" | ||||||
|  |       "${VIDEO_CORE}/shader/control_flow.cpp" | ||||||
|  |       "${VIDEO_CORE}/shader/control_flow.h" | ||||||
|       "${VIDEO_CORE}/shader/decode.cpp" |       "${VIDEO_CORE}/shader/decode.cpp" | ||||||
|       "${VIDEO_CORE}/shader/expr.cpp" |       "${VIDEO_CORE}/shader/expr.cpp" | ||||||
|       "${VIDEO_CORE}/shader/expr.h" |       "${VIDEO_CORE}/shader/expr.h" | ||||||
|   | |||||||
| @@ -6,6 +6,8 @@ | |||||||
|  |  | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <cstring> | #include <cstring> | ||||||
|  | #include <utility> | ||||||
|  | #include <boost/functional/hash.hpp> | ||||||
| #include "common/cityhash.h" | #include "common/cityhash.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  |  | ||||||
| @@ -68,4 +70,13 @@ struct HashableStruct { | |||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | struct PairHash { | ||||||
|  |     template <class T1, class T2> | ||||||
|  |     std::size_t operator()(const std::pair<T1, T2>& pair) const noexcept { | ||||||
|  |         std::size_t seed = std::hash<T1>()(pair.first); | ||||||
|  |         boost::hash_combine(seed, std::hash<T2>()(pair.second)); | ||||||
|  |         return seed; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  |  | ||||||
| } // namespace Common | } // namespace Common | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ add_library(video_core STATIC | |||||||
|     dma_pusher.h |     dma_pusher.h | ||||||
|     debug_utils/debug_utils.cpp |     debug_utils/debug_utils.cpp | ||||||
|     debug_utils/debug_utils.h |     debug_utils/debug_utils.h | ||||||
|  |     engines/const_buffer_engine_interface.h | ||||||
|     engines/const_buffer_info.h |     engines/const_buffer_info.h | ||||||
|     engines/engine_upload.cpp |     engines/engine_upload.cpp | ||||||
|     engines/engine_upload.h |     engines/engine_upload.h | ||||||
| @@ -107,10 +108,12 @@ add_library(video_core STATIC | |||||||
|     shader/decode/other.cpp |     shader/decode/other.cpp | ||||||
|     shader/ast.cpp |     shader/ast.cpp | ||||||
|     shader/ast.h |     shader/ast.h | ||||||
|     shader/control_flow.cpp |  | ||||||
|     shader/control_flow.h |  | ||||||
|     shader/compiler_settings.cpp |     shader/compiler_settings.cpp | ||||||
|     shader/compiler_settings.h |     shader/compiler_settings.h | ||||||
|  |     shader/const_buffer_locker.cpp | ||||||
|  |     shader/const_buffer_locker.h | ||||||
|  |     shader/control_flow.cpp | ||||||
|  |     shader/control_flow.h | ||||||
|     shader/decode.cpp |     shader/decode.cpp | ||||||
|     shader/expr.cpp |     shader/expr.cpp | ||||||
|     shader/expr.h |     shader/expr.h | ||||||
|   | |||||||
							
								
								
									
										119
									
								
								src/video_core/engines/const_buffer_engine_interface.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								src/video_core/engines/const_buffer_engine_interface.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | |||||||
|  | // Copyright 2019 yuzu Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <type_traits> | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/shader_bytecode.h" | ||||||
|  | #include "video_core/textures/texture.h" | ||||||
|  |  | ||||||
|  | namespace Tegra::Engines { | ||||||
|  |  | ||||||
|  | enum class ShaderType : u32 { | ||||||
|  |     Vertex = 0, | ||||||
|  |     TesselationControl = 1, | ||||||
|  |     TesselationEval = 2, | ||||||
|  |     Geometry = 3, | ||||||
|  |     Fragment = 4, | ||||||
|  |     Compute = 5, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct SamplerDescriptor { | ||||||
|  |     union { | ||||||
|  |         BitField<0, 20, Tegra::Shader::TextureType> texture_type; | ||||||
|  |         BitField<20, 1, u32> is_array; | ||||||
|  |         BitField<21, 1, u32> is_buffer; | ||||||
|  |         BitField<22, 1, u32> is_shadow; | ||||||
|  |         u32 raw{}; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     bool operator==(const SamplerDescriptor& rhs) const noexcept { | ||||||
|  |         return raw == rhs.raw; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool operator!=(const SamplerDescriptor& rhs) const noexcept { | ||||||
|  |         return !operator==(rhs); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) { | ||||||
|  |         SamplerDescriptor result; | ||||||
|  |         switch (tic_texture_type) { | ||||||
|  |         case Tegra::Texture::TextureType::Texture1D: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::Texture2D: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::Texture3D: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::TextureCubemap: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::Texture1DArray: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||||||
|  |             result.is_array.Assign(1); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::Texture2DArray: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||||||
|  |             result.is_array.Assign(1); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::Texture1DBuffer: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(1); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::Texture2DNoMipmap: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         case Tegra::Texture::TextureType::TextureCubeArray: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube); | ||||||
|  |             result.is_array.Assign(1); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         default: | ||||||
|  |             result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D); | ||||||
|  |             result.is_array.Assign(0); | ||||||
|  |             result.is_buffer.Assign(0); | ||||||
|  |             result.is_shadow.Assign(0); | ||||||
|  |             return result; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | static_assert(std::is_trivially_copyable_v<SamplerDescriptor>); | ||||||
|  |  | ||||||
|  | class ConstBufferEngineInterface { | ||||||
|  | public: | ||||||
|  |     virtual ~ConstBufferEngineInterface() = default; | ||||||
|  |     virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0; | ||||||
|  |     virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; | ||||||
|  |     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||||||
|  |                                                     u64 offset) const = 0; | ||||||
|  |     virtual u32 GetBoundBuffer() const = 0; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } // namespace Tegra::Engines | ||||||
| @@ -70,13 +70,31 @@ Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHan | |||||||
|                                     GetTSCEntry(tex_handle.tsc_id)}; |                                     GetTSCEntry(tex_handle.tsc_id)}; | ||||||
| } | } | ||||||
|  |  | ||||||
| u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const { | u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | ||||||
|  |     ASSERT(stage == ShaderType::Compute); | ||||||
|     const auto& buffer = launch_description.const_buffer_config[const_buffer]; |     const auto& buffer = launch_description.const_buffer_config[const_buffer]; | ||||||
|     u32 result; |     u32 result; | ||||||
|     std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); |     std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32)); | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||||||
|  |     return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||||||
|  |                                                        u64 offset) const { | ||||||
|  |     ASSERT(stage == ShaderType::Compute); | ||||||
|  |     const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; | ||||||
|  |     const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; | ||||||
|  |  | ||||||
|  |     const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||||||
|  |     const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); | ||||||
|  |     SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | ||||||
|  |     result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
| void KeplerCompute::ProcessLaunch() { | void KeplerCompute::ProcessLaunch() { | ||||||
|     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); |     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||||||
|     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, |     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ | |||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/textures/texture.h" | #include "video_core/textures/texture.h" | ||||||
| @@ -37,7 +38,7 @@ namespace Tegra::Engines { | |||||||
| #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \ | ||||||
|     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) | ||||||
|  |  | ||||||
| class KeplerCompute final { | class KeplerCompute final : public ConstBufferEngineInterface { | ||||||
| public: | public: | ||||||
|     explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |     explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|                            MemoryManager& memory_manager); |                            MemoryManager& memory_manager); | ||||||
| @@ -201,7 +202,16 @@ public: | |||||||
|     Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, |     Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle, | ||||||
|                                             std::size_t offset) const; |                                             std::size_t offset) const; | ||||||
|  |  | ||||||
|     u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const; |     u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | ||||||
|  |  | ||||||
|  |     SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||||||
|  |  | ||||||
|  |     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||||||
|  |                                             u64 offset) const override; | ||||||
|  |  | ||||||
|  |     u32 GetBoundBuffer() const override { | ||||||
|  |         return regs.tex_cb_index; | ||||||
|  |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|   | |||||||
| @@ -847,7 +847,8 @@ void Maxwell3D::ProcessClearBuffers() { | |||||||
|     rasterizer.Clear(); |     rasterizer.Clear(); | ||||||
| } | } | ||||||
|  |  | ||||||
| u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const { | u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { | ||||||
|  |     ASSERT(stage != ShaderType::Compute); | ||||||
|     const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; |     const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)]; | ||||||
|     const auto& buffer = shader_stage.const_buffers[const_buffer]; |     const auto& buffer = shader_stage.const_buffers[const_buffer]; | ||||||
|     u32 result; |     u32 result; | ||||||
| @@ -855,4 +856,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6 | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const { | ||||||
|  |     return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||||||
|  |                                                    u64 offset) const { | ||||||
|  |     ASSERT(stage != ShaderType::Compute); | ||||||
|  |     const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; | ||||||
|  |     const auto& tex_info_buffer = shader.const_buffers[const_buffer]; | ||||||
|  |     const GPUVAddr tex_info_address = tex_info_buffer.address + offset; | ||||||
|  |  | ||||||
|  |     const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; | ||||||
|  |     const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle, offset); | ||||||
|  |     SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value()); | ||||||
|  |     result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace Tegra::Engines | } // namespace Tegra::Engines | ||||||
|   | |||||||
| @@ -15,6 +15,7 @@ | |||||||
| #include "common/common_funcs.h" | #include "common/common_funcs.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/math_util.h" | #include "common/math_util.h" | ||||||
|  | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
| #include "video_core/engines/const_buffer_info.h" | #include "video_core/engines/const_buffer_info.h" | ||||||
| #include "video_core/engines/engine_upload.h" | #include "video_core/engines/engine_upload.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| @@ -44,7 +45,7 @@ namespace Tegra::Engines { | |||||||
| #define MAXWELL3D_REG_INDEX(field_name)                                                            \ | #define MAXWELL3D_REG_INDEX(field_name)                                                            \ | ||||||
|     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) |     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32)) | ||||||
|  |  | ||||||
| class Maxwell3D final { | class Maxwell3D final : public ConstBufferEngineInterface { | ||||||
| public: | public: | ||||||
|     explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |     explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|                        MemoryManager& memory_manager); |                        MemoryManager& memory_manager); | ||||||
| @@ -1257,7 +1258,16 @@ public: | |||||||
|     /// Returns the texture information for a specific texture in a specific shader stage. |     /// Returns the texture information for a specific texture in a specific shader stage. | ||||||
|     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; |     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const; | ||||||
|  |  | ||||||
|     u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const; |     u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override; | ||||||
|  |  | ||||||
|  |     SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override; | ||||||
|  |  | ||||||
|  |     SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, | ||||||
|  |                                             u64 offset) const override; | ||||||
|  |  | ||||||
|  |     u32 GetBoundBuffer() const override { | ||||||
|  |         return regs.tex_cb_index; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than |     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than | ||||||
|     /// we've seen used. |     /// we've seen used. | ||||||
|   | |||||||
| @@ -975,7 +975,8 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag | |||||||
|             } |             } | ||||||
|             const auto cbuf = entry.GetBindlessCBuf(); |             const auto cbuf = entry.GetBindlessCBuf(); | ||||||
|             Tegra::Texture::TextureHandle tex_handle; |             Tegra::Texture::TextureHandle tex_handle; | ||||||
|             tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second); |             Tegra::Engines::ShaderType shader_type = static_cast<Tegra::Engines::ShaderType>(stage); | ||||||
|  |             tex_handle.raw = maxwell3d.AccessConstBuffer32(shader_type, cbuf.first, cbuf.second); | ||||||
|             return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); |             return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset()); | ||||||
|         }(); |         }(); | ||||||
|  |  | ||||||
| @@ -1005,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) | |||||||
|             } |             } | ||||||
|             const auto cbuf = entry.GetBindlessCBuf(); |             const auto cbuf = entry.GetBindlessCBuf(); | ||||||
|             Tegra::Texture::TextureHandle tex_handle; |             Tegra::Texture::TextureHandle tex_handle; | ||||||
|             tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); |             tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, | ||||||
|  |                                                          cbuf.first, cbuf.second); | ||||||
|             return compute.GetTextureInfo(tex_handle, entry.GetOffset()); |             return compute.GetTextureInfo(tex_handle, entry.GetOffset()); | ||||||
|         }(); |         }(); | ||||||
|  |  | ||||||
| @@ -1050,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { | |||||||
|             } |             } | ||||||
|             const auto cbuf = entry.GetBindlessCBuf(); |             const auto cbuf = entry.GetBindlessCBuf(); | ||||||
|             Tegra::Texture::TextureHandle tex_handle; |             Tegra::Texture::TextureHandle tex_handle; | ||||||
|             tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second); |             tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, | ||||||
|  |                                                          cbuf.first, cbuf.second); | ||||||
|             return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; |             return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; | ||||||
|         }(); |         }(); | ||||||
|         SetupImage(bindpoint, tic, entry); |         SetupImage(bindpoint, tic, entry); | ||||||
|   | |||||||
| @@ -3,13 +3,16 @@ | |||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #include <mutex> | #include <mutex> | ||||||
|  | #include <optional> | ||||||
|  | #include <string> | ||||||
| #include <thread> | #include <thread> | ||||||
|  | #include <unordered_set> | ||||||
| #include <boost/functional/hash.hpp> | #include <boost/functional/hash.hpp> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/hash.h" |  | ||||||
| #include "common/scope_exit.h" | #include "common/scope_exit.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/frontend/emu_window.h" | #include "core/frontend/emu_window.h" | ||||||
|  | #include "video_core/engines/kepler_compute.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||||
| @@ -21,18 +24,20 @@ | |||||||
|  |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
|  |  | ||||||
|  | using Tegra::Engines::ShaderType; | ||||||
|  | using VideoCommon::Shader::ConstBufferLocker; | ||||||
| using VideoCommon::Shader::ProgramCode; | using VideoCommon::Shader::ProgramCode; | ||||||
|  | using VideoCommon::Shader::ShaderIR; | ||||||
|  |  | ||||||
|  | namespace { | ||||||
|  |  | ||||||
| // One UBO is always reserved for emulation values on staged shaders | // One UBO is always reserved for emulation values on staged shaders | ||||||
| constexpr u32 STAGE_RESERVED_UBOS = 1; | constexpr u32 STAGE_RESERVED_UBOS = 1; | ||||||
|  |  | ||||||
| struct UnspecializedShader { | constexpr u32 STAGE_MAIN_OFFSET = 10; | ||||||
|     std::string code; | constexpr u32 KERNEL_MAIN_OFFSET = 0; | ||||||
|     GLShader::ShaderEntries entries; |  | ||||||
|     ProgramType program_type; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| namespace { | constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; | ||||||
|  |  | ||||||
| /// Gets the address for the specified shader stage program | /// Gets the address for the specified shader stage program | ||||||
| GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { | ||||||
| @@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) | |||||||
|     return gpu.regs.code_address.CodeAddress() + shader_config.offset; |     return gpu.regs.code_address.CodeAddress() + shader_config.offset; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | /// Gets if the current instruction offset is a scheduler instruction | ||||||
|  | constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { | ||||||
|  |     // Sched instructions appear once every 4 instructions. | ||||||
|  |     constexpr std::size_t SchedPeriod = 4; | ||||||
|  |     const std::size_t absolute_offset = offset - main_offset; | ||||||
|  |     return (absolute_offset % SchedPeriod) == 0; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Calculates the size of a program stream | ||||||
|  | std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||||||
|  |     constexpr std::size_t start_offset = 10; | ||||||
|  |     // This is the encoded version of BRA that jumps to itself. All Nvidia | ||||||
|  |     // shaders end with one. | ||||||
|  |     constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; | ||||||
|  |     constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; | ||||||
|  |     std::size_t offset = start_offset; | ||||||
|  |     while (offset < program.size()) { | ||||||
|  |         const u64 instruction = program[offset]; | ||||||
|  |         if (!IsSchedInstruction(offset, start_offset)) { | ||||||
|  |             if ((instruction & mask) == self_jumping_branch) { | ||||||
|  |                 // End on Maxwell's "nop" instruction | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             if (instruction == 0) { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         offset++; | ||||||
|  |     } | ||||||
|  |     // The last instruction is included in the program size | ||||||
|  |     return std::min(offset + 1, program.size()); | ||||||
|  | } | ||||||
|  |  | ||||||
| /// Gets the shader program code from memory for the specified address | /// Gets the shader program code from memory for the specified address | ||||||
| ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr, | ||||||
|                           const u8* host_ptr) { |                           const u8* host_ptr) { | ||||||
| @@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||||||
|     }); |     }); | ||||||
|     memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), |     memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(), | ||||||
|                                    program_code.size() * sizeof(u64)); |                                    program_code.size() * sizeof(u64)); | ||||||
|  |     program_code.resize(CalculateProgramSize(program_code)); | ||||||
|     return program_code; |     return program_code; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Gets if the current instruction offset is a scheduler instruction |  | ||||||
| constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { |  | ||||||
|     // Sched instructions appear once every 4 instructions. |  | ||||||
|     constexpr std::size_t SchedPeriod = 4; |  | ||||||
|     const std::size_t absolute_offset = offset - main_offset; |  | ||||||
|     return (absolute_offset % SchedPeriod) == 0; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Describes primitive behavior on geometry shaders | /// Describes primitive behavior on geometry shaders | ||||||
| constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { | ||||||
|     switch (primitive_mode) { |     switch (primitive_mode) { | ||||||
| @@ -121,110 +152,142 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) { | |||||||
|     return {}; |     return {}; | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Calculates the size of a program stream |  | ||||||
| std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { |  | ||||||
|     constexpr std::size_t start_offset = 10; |  | ||||||
|     // This is the encoded version of BRA that jumps to itself. All Nvidia |  | ||||||
|     // shaders end with one. |  | ||||||
|     constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL; |  | ||||||
|     constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL; |  | ||||||
|     std::size_t offset = start_offset; |  | ||||||
|     std::size_t size = start_offset * sizeof(u64); |  | ||||||
|     while (offset < program.size()) { |  | ||||||
|         const u64 instruction = program[offset]; |  | ||||||
|         if (!IsSchedInstruction(offset, start_offset)) { |  | ||||||
|             if ((instruction & mask) == self_jumping_branch) { |  | ||||||
|                 // End on Maxwell's "nop" instruction |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|             if (instruction == 0) { |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|         size += sizeof(u64); |  | ||||||
|         offset++; |  | ||||||
|     } |  | ||||||
|     // The last instruction is included in the program size |  | ||||||
|     return std::min(size + sizeof(u64), program.size() * sizeof(u64)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// Hashes one (or two) program streams | /// Hashes one (or two) program streams | ||||||
| u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | ||||||
|                         const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { |                         const ProgramCode& code_b) { | ||||||
|     if (size_a == 0) { |     u64 unique_identifier = boost::hash_value(code); | ||||||
|         size_a = CalculateProgramSize(code); |     if (program_type == ProgramType::VertexA) { | ||||||
|  |         // VertexA programs include two programs | ||||||
|  |         boost::hash_combine(unique_identifier, boost::hash_value(code_b)); | ||||||
|     } |     } | ||||||
|     u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); |     return unique_identifier; | ||||||
|     if (program_type != ProgramType::VertexA) { |  | ||||||
|         return unique_identifier; |  | ||||||
|     } |  | ||||||
|     // VertexA programs include two programs |  | ||||||
|  |  | ||||||
|     std::size_t seed = 0; |  | ||||||
|     boost::hash_combine(seed, unique_identifier); |  | ||||||
|  |  | ||||||
|     if (size_b == 0) { |  | ||||||
|         size_b = CalculateProgramSize(code_b); |  | ||||||
|     } |  | ||||||
|     const u64 identifier_b = |  | ||||||
|         Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b); |  | ||||||
|     boost::hash_combine(seed, identifier_b); |  | ||||||
|     return static_cast<u64>(seed); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Creates an unspecialized program from code streams | /// Creates an unspecialized program from code streams | ||||||
| GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, | std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir, | ||||||
|                                       ProgramCode program_code, ProgramCode program_code_b) { |                          const std::optional<ShaderIR>& ir_b) { | ||||||
|     GLShader::ShaderSetup setup(program_code); |  | ||||||
|     setup.program.size_a = CalculateProgramSize(program_code); |  | ||||||
|     setup.program.size_b = 0; |  | ||||||
|     if (program_type == ProgramType::VertexA) { |  | ||||||
|         // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. |  | ||||||
|         // Conventional HW does not support this, so we combine VertexA and VertexB into one |  | ||||||
|         // stage here. |  | ||||||
|         setup.SetProgramB(program_code_b); |  | ||||||
|         setup.program.size_b = CalculateProgramSize(program_code_b); |  | ||||||
|     } |  | ||||||
|     setup.program.unique_identifier = GetUniqueIdentifier( |  | ||||||
|         program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); |  | ||||||
|  |  | ||||||
|     switch (program_type) { |     switch (program_type) { | ||||||
|     case ProgramType::VertexA: |     case ProgramType::VertexA: | ||||||
|     case ProgramType::VertexB: |     case ProgramType::VertexB: | ||||||
|         return GLShader::GenerateVertexShader(device, setup); |         return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); | ||||||
|     case ProgramType::Geometry: |     case ProgramType::Geometry: | ||||||
|         return GLShader::GenerateGeometryShader(device, setup); |         return GLShader::GenerateGeometryShader(device, ir); | ||||||
|     case ProgramType::Fragment: |     case ProgramType::Fragment: | ||||||
|         return GLShader::GenerateFragmentShader(device, setup); |         return GLShader::GenerateFragmentShader(device, ir); | ||||||
|     case ProgramType::Compute: |     case ProgramType::Compute: | ||||||
|         return GLShader::GenerateComputeShader(device, setup); |         return GLShader::GenerateComputeShader(device, ir); | ||||||
|     default: |     default: | ||||||
|         UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); |         UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | constexpr const char* GetProgramTypeName(ProgramType program_type) { | ||||||
|                                ProgramType program_type, const ProgramVariant& variant, |     switch (program_type) { | ||||||
|                                bool hint_retrievable = false) { |     case ProgramType::VertexA: | ||||||
|  |     case ProgramType::VertexB: | ||||||
|  |         return "VS"; | ||||||
|  |     case ProgramType::TessellationControl: | ||||||
|  |         return "TCS"; | ||||||
|  |     case ProgramType::TessellationEval: | ||||||
|  |         return "TES"; | ||||||
|  |     case ProgramType::Geometry: | ||||||
|  |         return "GS"; | ||||||
|  |     case ProgramType::Fragment: | ||||||
|  |         return "FS"; | ||||||
|  |     case ProgramType::Compute: | ||||||
|  |         return "CS"; | ||||||
|  |     } | ||||||
|  |     return "UNK"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) { | ||||||
|  |     switch (program_type) { | ||||||
|  |     case ProgramType::VertexA: | ||||||
|  |     case ProgramType::VertexB: | ||||||
|  |         return Tegra::Engines::ShaderType::Vertex; | ||||||
|  |     case ProgramType::TessellationControl: | ||||||
|  |         return Tegra::Engines::ShaderType::TesselationControl; | ||||||
|  |     case ProgramType::TessellationEval: | ||||||
|  |         return Tegra::Engines::ShaderType::TesselationEval; | ||||||
|  |     case ProgramType::Geometry: | ||||||
|  |         return Tegra::Engines::ShaderType::Geometry; | ||||||
|  |     case ProgramType::Fragment: | ||||||
|  |         return Tegra::Engines::ShaderType::Fragment; | ||||||
|  |     case ProgramType::Compute: | ||||||
|  |         return Tegra::Engines::ShaderType::Compute; | ||||||
|  |     } | ||||||
|  |     UNREACHABLE(); | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string GetShaderId(u64 unique_identifier, ProgramType program_type) { | ||||||
|  |     return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface( | ||||||
|  |     Core::System& system, ProgramType program_type) { | ||||||
|  |     if (program_type == ProgramType::Compute) { | ||||||
|  |         return system.GPU().KeplerCompute(); | ||||||
|  |     } else { | ||||||
|  |         return system.GPU().Maxwell3D(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) { | ||||||
|  |     return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type), | ||||||
|  |                                                GetConstBufferEngineInterface(system, program_type)); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { | ||||||
|  |     for (const auto& key : usage.keys) { | ||||||
|  |         const auto [buffer, offset] = key.first; | ||||||
|  |         locker.InsertKey(buffer, offset, key.second); | ||||||
|  |     } | ||||||
|  |     for (const auto& [offset, sampler] : usage.bound_samplers) { | ||||||
|  |         locker.InsertBoundSampler(offset, sampler); | ||||||
|  |     } | ||||||
|  |     for (const auto& [key, sampler] : usage.bindless_samplers) { | ||||||
|  |         const auto [buffer, offset] = key; | ||||||
|  |         locker.InsertBindlessSampler(buffer, offset, sampler); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type, | ||||||
|  |                           const ProgramCode& program_code, const ProgramCode& program_code_b, | ||||||
|  |                           const ProgramVariant& variant, ConstBufferLocker& locker, | ||||||
|  |                           bool hint_retrievable = false) { | ||||||
|  |     LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type)); | ||||||
|  |  | ||||||
|  |     const bool is_compute = program_type == ProgramType::Compute; | ||||||
|  |     const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||||||
|  |     const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker); | ||||||
|  |     std::optional<ShaderIR> ir_b; | ||||||
|  |     if (!program_code_b.empty()) { | ||||||
|  |         ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker); | ||||||
|  |     } | ||||||
|  |     const auto entries = GLShader::GetEntries(ir); | ||||||
|  |  | ||||||
|     auto base_bindings{variant.base_bindings}; |     auto base_bindings{variant.base_bindings}; | ||||||
|     const auto primitive_mode{variant.primitive_mode}; |     const auto primitive_mode{variant.primitive_mode}; | ||||||
|     const auto texture_buffer_usage{variant.texture_buffer_usage}; |     const auto texture_buffer_usage{variant.texture_buffer_usage}; | ||||||
|  |  | ||||||
|     std::string source = R"(#version 430 core |     std::string source = fmt::format(R"(// {} | ||||||
|  | #version 430 core | ||||||
| #extension GL_ARB_separate_shader_objects : enable | #extension GL_ARB_separate_shader_objects : enable | ||||||
| #extension GL_ARB_shader_viewport_layer_array : enable | #extension GL_ARB_shader_viewport_layer_array : enable | ||||||
| #extension GL_EXT_shader_image_load_formatted : enable | #extension GL_EXT_shader_image_load_formatted : enable | ||||||
| #extension GL_NV_gpu_shader5 : enable | #extension GL_NV_gpu_shader5 : enable | ||||||
| #extension GL_NV_shader_thread_group : enable | #extension GL_NV_shader_thread_group : enable | ||||||
| #extension GL_NV_shader_thread_shuffle : enable | #extension GL_NV_shader_thread_shuffle : enable | ||||||
| )"; | )", | ||||||
|     if (program_type == ProgramType::Compute) { |                                      GetShaderId(unique_identifier, program_type)); | ||||||
|  |     if (is_compute) { | ||||||
|         source += "#extension GL_ARB_compute_variable_group_size : require\n"; |         source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||||||
|     } |     } | ||||||
|     source += '\n'; |     source += '\n'; | ||||||
|  |  | ||||||
|     if (program_type != ProgramType::Compute) { |     if (!is_compute) { | ||||||
|         source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); |         source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -268,7 +331,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     source += '\n'; |     source += '\n'; | ||||||
|     source += code; |     source += GenerateGLSL(device, program_type, ir, ir_b); | ||||||
|  |  | ||||||
|     OGLShader shader; |     OGLShader shader; | ||||||
|     shader.Create(source.c_str(), GetShaderType(program_type)); |     shader.Create(source.c_str(), GetShaderType(program_type)); | ||||||
| @@ -278,85 +341,97 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||||||
|     return program; |     return program; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::set<GLenum> GetSupportedFormats() { | std::unordered_set<GLenum> GetSupportedFormats() { | ||||||
|     std::set<GLenum> supported_formats; |  | ||||||
|  |  | ||||||
|     GLint num_formats{}; |     GLint num_formats{}; | ||||||
|     glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); |     glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); | ||||||
|  |  | ||||||
|     std::vector<GLint> formats(num_formats); |     std::vector<GLint> formats(num_formats); | ||||||
|     glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); |     glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data()); | ||||||
|  |  | ||||||
|     for (const GLint format : formats) |     std::unordered_set<GLenum> supported_formats; | ||||||
|  |     for (const GLint format : formats) { | ||||||
|         supported_formats.insert(static_cast<GLenum>(format)); |         supported_formats.insert(static_cast<GLenum>(format)); | ||||||
|  |     } | ||||||
|     return supported_formats; |     return supported_formats; | ||||||
| } | } | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | ||||||
|                            GLShader::ProgramResult result) |                            GLShader::ShaderEntries entries, ProgramCode program_code, | ||||||
|     : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr}, |                            ProgramCode program_code_b) | ||||||
|       unique_identifier{params.unique_identifier}, program_type{program_type}, |     : RasterizerCacheObject{params.host_ptr}, system{params.system}, | ||||||
|       disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs}, |       disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, | ||||||
|       entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {} |       unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries}, | ||||||
|  |       program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} { | ||||||
|  |     if (!params.precompiled_variants) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |     for (const auto& pair : *params.precompiled_variants) { | ||||||
|  |         auto locker = MakeLocker(system, program_type); | ||||||
|  |         const auto& usage = pair->first; | ||||||
|  |         FillLocker(*locker, usage); | ||||||
|  |  | ||||||
|  |         std::unique_ptr<LockerVariant>* locker_variant = nullptr; | ||||||
|  |         const auto it = | ||||||
|  |             std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { | ||||||
|  |                 return variant->locker->HasEqualKeys(*locker); | ||||||
|  |             }); | ||||||
|  |         if (it == locker_variants.end()) { | ||||||
|  |             locker_variant = &locker_variants.emplace_back(); | ||||||
|  |             *locker_variant = std::make_unique<LockerVariant>(); | ||||||
|  |             locker_variant->get()->locker = std::move(locker); | ||||||
|  |         } else { | ||||||
|  |             locker_variant = &*it; | ||||||
|  |         } | ||||||
|  |         locker_variant->get()->programs.emplace(usage.variant, pair->second); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | ||||||
|                                            Maxwell::ShaderProgram program_type, |                                            Maxwell::ShaderProgram program_type, | ||||||
|                                            ProgramCode&& program_code, |                                            ProgramCode program_code, ProgramCode program_code_b) { | ||||||
|                                            ProgramCode&& program_code_b) { |  | ||||||
|     const auto code_size{CalculateProgramSize(program_code)}; |  | ||||||
|     const auto code_size_b{CalculateProgramSize(program_code_b)}; |  | ||||||
|     auto result{ |  | ||||||
|         CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; |  | ||||||
|     if (result.first.empty()) { |  | ||||||
|         // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     params.disk_cache.SaveRaw(ShaderDiskCacheRaw( |     params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | ||||||
|         params.unique_identifier, GetProgramType(program_type), |         params.unique_identifier, GetProgramType(program_type), program_code, program_code_b)); | ||||||
|         static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), |  | ||||||
|         std::move(program_code), std::move(program_code_b))); |  | ||||||
|  |  | ||||||
|  |     ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type))); | ||||||
|  |     const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); | ||||||
|  |     // TODO(Rodrigo): Handle VertexA shaders | ||||||
|  |     // std::optional<ShaderIR> ir_b; | ||||||
|  |     // if (!program_code_b.empty()) { | ||||||
|  |     //     ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET); | ||||||
|  |     // } | ||||||
|     return std::shared_ptr<CachedShader>( |     return std::shared_ptr<CachedShader>( | ||||||
|         new CachedShader(params, GetProgramType(program_type), std::move(result))); |         new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir), | ||||||
|  |                          std::move(program_code), std::move(program_code_b))); | ||||||
| } | } | ||||||
|  |  | ||||||
| Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | ||||||
|                                           Maxwell::ShaderProgram program_type, |     params.disk_cache.SaveRaw( | ||||||
|                                           GLShader::ProgramResult result) { |         ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code)); | ||||||
|     return std::shared_ptr<CachedShader>( |  | ||||||
|         new CachedShader(params, GetProgramType(program_type), std::move(result))); |     ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute); | ||||||
|  |     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); | ||||||
|  |     return std::shared_ptr<CachedShader>(new CachedShader( | ||||||
|  |         params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {})); | ||||||
| } | } | ||||||
|  |  | ||||||
| Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | Shader CachedShader::CreateFromCache(const ShaderParameters& params, | ||||||
|     auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; |                                      const UnspecializedShader& unspecialized) { | ||||||
|  |     return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type, | ||||||
|     const auto code_size{CalculateProgramSize(code)}; |                                                           unspecialized.entries, unspecialized.code, | ||||||
|     params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, |                                                           unspecialized.code_b)); | ||||||
|                                                  static_cast<u32>(code_size / sizeof(u64)), 0, |  | ||||||
|                                                  std::move(code), {})); |  | ||||||
|  |  | ||||||
|     return std::shared_ptr<CachedShader>( |  | ||||||
|         new CachedShader(params, ProgramType::Compute, std::move(result))); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, |  | ||||||
|                                            GLShader::ProgramResult result) { |  | ||||||
|     return std::shared_ptr<CachedShader>( |  | ||||||
|         new CachedShader(params, ProgramType::Compute, std::move(result))); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | ||||||
|     const auto [entry, is_cache_miss] = programs.try_emplace(variant); |     UpdateVariant(); | ||||||
|  |  | ||||||
|  |     const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant); | ||||||
|     auto& program = entry->second; |     auto& program = entry->second; | ||||||
|     if (is_cache_miss) { |     if (is_cache_miss) { | ||||||
|         program = TryLoadProgram(variant); |         program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b, | ||||||
|         if (!program) { |                               variant, *curr_variant->locker); | ||||||
|             program = SpecializeShader(code, entries, program_type, variant); |         disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker)); | ||||||
|             disk_cache.SaveUsage(GetUsage(variant)); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); |         LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); | ||||||
|     } |     } | ||||||
| @@ -372,18 +447,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||||||
|     return {program->handle, base_bindings}; |     return {program->handle, base_bindings}; | ||||||
| } | } | ||||||
|  |  | ||||||
| CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const { | void CachedShader::UpdateVariant() { | ||||||
|     const auto found = precompiled_programs.find(GetUsage(variant)); |     if (curr_variant && !curr_variant->locker->IsConsistent()) { | ||||||
|     if (found == precompiled_programs.end()) { |         curr_variant = nullptr; | ||||||
|         return {}; |     } | ||||||
|  |     if (!curr_variant) { | ||||||
|  |         for (auto& variant : locker_variants) { | ||||||
|  |             if (variant->locker->IsConsistent()) { | ||||||
|  |                 curr_variant = variant.get(); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (!curr_variant) { | ||||||
|  |         auto& new_variant = locker_variants.emplace_back(); | ||||||
|  |         new_variant = std::make_unique<LockerVariant>(); | ||||||
|  |         new_variant->locker = MakeLocker(system, program_type); | ||||||
|  |         curr_variant = new_variant.get(); | ||||||
|     } |     } | ||||||
|     return found->second; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const { | ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, | ||||||
|  |                                             const ConstBufferLocker& locker) const { | ||||||
|     ShaderDiskCacheUsage usage; |     ShaderDiskCacheUsage usage; | ||||||
|     usage.unique_identifier = unique_identifier; |     usage.unique_identifier = unique_identifier; | ||||||
|     usage.variant = variant; |     usage.variant = variant; | ||||||
|  |     usage.keys = locker.GetKeys(); | ||||||
|  |     usage.bound_samplers = locker.GetBoundSamplers(); | ||||||
|  |     usage.bindless_samplers = locker.GetBindlessSamplers(); | ||||||
|     return usage; |     return usage; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -399,18 +489,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     const auto [raws, shader_usages] = *transferable; |     const auto [raws, shader_usages] = *transferable; | ||||||
|  |     if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { | ||||||
|     auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); |  | ||||||
|  |  | ||||||
|     const auto supported_formats{GetSupportedFormats()}; |  | ||||||
|     const auto unspecialized_shaders{ |  | ||||||
|         GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; |  | ||||||
|     if (stop_loading) { |  | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Track if precompiled cache was altered during loading to know if we have to serialize the |     const auto dumps = disk_cache.LoadPrecompiled(); | ||||||
|     // virtual precompiled cache file back to the hard drive |     const auto supported_formats = GetSupportedFormats(); | ||||||
|  |  | ||||||
|  |     // Track if precompiled cache was altered during loading to know if we have to | ||||||
|  |     // serialize the virtual precompiled cache file back to the hard drive | ||||||
|     bool precompiled_cache_altered = false; |     bool precompiled_cache_altered = false; | ||||||
|  |  | ||||||
|     // Inform the frontend about shader build initialization |     // Inform the frontend about shader build initialization | ||||||
| @@ -433,9 +520,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||||||
|                 return; |                 return; | ||||||
|             } |             } | ||||||
|             const auto& usage{shader_usages[i]}; |             const auto& usage{shader_usages[i]}; | ||||||
|             LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})", |  | ||||||
|                      usage.unique_identifier, i, shader_usages.size()); |  | ||||||
|  |  | ||||||
|             const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; |             const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; | ||||||
|             const auto dump{dumps.find(usage)}; |             const auto dump{dumps.find(usage)}; | ||||||
|  |  | ||||||
| @@ -449,21 +533,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             if (!shader) { |             if (!shader) { | ||||||
|                 shader = SpecializeShader(unspecialized.code, unspecialized.entries, |                 auto locker{MakeLocker(system, unspecialized.program_type)}; | ||||||
|                                           unspecialized.program_type, usage.variant, true); |                 FillLocker(*locker, usage); | ||||||
|  |                 shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type, | ||||||
|  |                                      unspecialized.code, unspecialized.code_b, usage.variant, | ||||||
|  |                                      *locker, true); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             std::scoped_lock lock(mutex); |             std::scoped_lock lock{mutex}; | ||||||
|             if (callback) { |             if (callback) { | ||||||
|                 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, |                 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, | ||||||
|                          shader_usages.size()); |                          shader_usages.size()); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             precompiled_programs.emplace(usage, std::move(shader)); |             precompiled_programs.emplace(usage, std::move(shader)); | ||||||
|  |  | ||||||
|  |             // TODO(Rodrigo): Is there a better way to do this? | ||||||
|  |             precompiled_variants[usage.unique_identifier].push_back( | ||||||
|  |                 precompiled_programs.find(usage)); | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)}; |     const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; | ||||||
|     const std::size_t bucket_size{shader_usages.size() / num_workers}; |     const std::size_t bucket_size{shader_usages.size() / num_workers}; | ||||||
|     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); |     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); | ||||||
|     std::vector<std::thread> threads(num_workers); |     std::vector<std::thread> threads(num_workers); | ||||||
| @@ -483,7 +574,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||||||
|     if (compilation_failed) { |     if (compilation_failed) { | ||||||
|         // Invalidate the precompiled cache if a shader dumped shader was rejected |         // Invalidate the precompiled cache if a shader dumped shader was rejected | ||||||
|         disk_cache.InvalidatePrecompiled(); |         disk_cache.InvalidatePrecompiled(); | ||||||
|         dumps.clear(); |  | ||||||
|         precompiled_cache_altered = true; |         precompiled_cache_altered = true; | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| @@ -491,8 +581,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before |     // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw | ||||||
|     // precompiling them |     // before precompiling them | ||||||
|  |  | ||||||
|     for (std::size_t i = 0; i < shader_usages.size(); ++i) { |     for (std::size_t i = 0; i < shader_usages.size(); ++i) { | ||||||
|         const auto& usage{shader_usages[i]}; |         const auto& usage{shader_usages[i]}; | ||||||
| @@ -508,9 +598,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { | ||||||
|     const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) { |     const auto it = precompiled_variants.find(unique_identifier); | ||||||
|  |     return it == precompiled_variants.end() ? nullptr : &it->second; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | ||||||
|  |     const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { | ||||||
|     if (supported_formats.find(dump.binary_format) == supported_formats.end()) { |     if (supported_formats.find(dump.binary_format) == supported_formats.end()) { | ||||||
|         LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); |         LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); | ||||||
|         return {}; |         return {}; | ||||||
| @@ -532,56 +626,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( | |||||||
|     return shader; |     return shader; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders( | bool ShaderCacheOpenGL::GenerateUnspecializedShaders( | ||||||
|     const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, |     const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, | ||||||
|     const std::vector<ShaderDiskCacheRaw>& raws, |     const std::vector<ShaderDiskCacheRaw>& raws) { | ||||||
|     const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) { |  | ||||||
|     std::unordered_map<u64, UnspecializedShader> unspecialized; |  | ||||||
|  |  | ||||||
|     if (callback) { |     if (callback) { | ||||||
|         callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); |         callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     for (std::size_t i = 0; i < raws.size(); ++i) { |     for (std::size_t i = 0; i < raws.size(); ++i) { | ||||||
|         if (stop_loading) { |         if (stop_loading) { | ||||||
|             return {}; |             return false; | ||||||
|         } |         } | ||||||
|         const auto& raw{raws[i]}; |         const auto& raw{raws[i]}; | ||||||
|         const u64 unique_identifier{raw.GetUniqueIdentifier()}; |         const u64 unique_identifier{raw.GetUniqueIdentifier()}; | ||||||
|         const u64 calculated_hash{ |         const u64 calculated_hash{ | ||||||
|             GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; |             GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())}; | ||||||
|         if (unique_identifier != calculated_hash) { |         if (unique_identifier != calculated_hash) { | ||||||
|             LOG_ERROR( |             LOG_ERROR(Render_OpenGL, | ||||||
|                 Render_OpenGL, |                       "Invalid hash in entry={:016x} (obtained hash={:016x}) - " | ||||||
|                 "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache", |                       "removing shader cache", | ||||||
|                 raw.GetUniqueIdentifier(), calculated_hash); |                       raw.GetUniqueIdentifier(), calculated_hash); | ||||||
|             disk_cache.InvalidateTransferable(); |             disk_cache.InvalidateTransferable(); | ||||||
|             return {}; |             return false; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         GLShader::ProgramResult result; |         const u32 main_offset = | ||||||
|         if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) { |             raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; | ||||||
|             // If it's stored in the precompiled file, avoid decompiling it here |         ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType())); | ||||||
|             const auto& stored_decompiled{it->second}; |         const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker); | ||||||
|             result = {stored_decompiled.code, stored_decompiled.entries}; |         // TODO(Rodrigo): Handle VertexA shaders | ||||||
|         } else { |         // std::optional<ShaderIR> ir_b; | ||||||
|             // Otherwise decompile the shader at boot and save the result to the decompiled file |         // if (raw.HasProgramA()) { | ||||||
|             result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), |         //     ir_b.emplace(raw.GetProgramCodeB(), main_offset); | ||||||
|                                    raw.GetProgramCodeB()); |         // } | ||||||
|             disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         precompiled_shaders.insert({unique_identifier, result}); |         UnspecializedShader unspecialized; | ||||||
|  |         unspecialized.entries = GLShader::GetEntries(ir); | ||||||
|         unspecialized.insert( |         unspecialized.program_type = raw.GetProgramType(); | ||||||
|             {raw.GetUniqueIdentifier(), |         unspecialized.code = raw.GetProgramCode(); | ||||||
|              {std::move(result.first), std::move(result.second), raw.GetProgramType()}}); |         unspecialized.code_b = raw.GetProgramCodeB(); | ||||||
|  |         unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); | ||||||
|  |  | ||||||
|         if (callback) { |         if (callback) { | ||||||
|             callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); |             callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     return unspecialized; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||||
| @@ -590,37 +680,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     auto& memory_manager{system.GPU().MemoryManager()}; |     auto& memory_manager{system.GPU().MemoryManager()}; | ||||||
|     const GPUVAddr program_addr{GetShaderAddress(system, program)}; |     const GPUVAddr address{GetShaderAddress(system, program)}; | ||||||
|  |  | ||||||
|     // Look up shader in the cache based on address |     // Look up shader in the cache based on address | ||||||
|     const auto host_ptr{memory_manager.GetPointer(program_addr)}; |     const auto host_ptr{memory_manager.GetPointer(address)}; | ||||||
|     Shader shader{TryGet(host_ptr)}; |     Shader shader{TryGet(host_ptr)}; | ||||||
|     if (shader) { |     if (shader) { | ||||||
|         return last_shaders[static_cast<std::size_t>(program)] = shader; |         return last_shaders[static_cast<std::size_t>(program)] = shader; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // No shader found - create a new one |     // No shader found - create a new one | ||||||
|     ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; |     ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)}; | ||||||
|     ProgramCode program_code_b; |     ProgramCode code_b; | ||||||
|     const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; |     if (program == Maxwell::ShaderProgram::VertexA) { | ||||||
|     if (is_program_a) { |         const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | ||||||
|         const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; |         code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b)); | ||||||
|         program_code_b = GetShaderCode(memory_manager, program_addr_b, |  | ||||||
|                                        memory_manager.GetPointer(program_addr_b)); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const auto unique_identifier = |     const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b); | ||||||
|         GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); |     const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||||||
|     const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; |     const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; | ||||||
|     const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |     const ShaderParameters params{system,   disk_cache, precompiled_variants, device, | ||||||
|                                   host_ptr,   unique_identifier}; |                                   cpu_addr, host_ptr,   unique_identifier}; | ||||||
|  |  | ||||||
|     const auto found = precompiled_shaders.find(unique_identifier); |     const auto found = unspecialized_shaders.find(unique_identifier); | ||||||
|     if (found == precompiled_shaders.end()) { |     if (found == unspecialized_shaders.end()) { | ||||||
|         shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code), |         shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), | ||||||
|                                                      std::move(program_code_b)); |                                                      std::move(code_b)); | ||||||
|     } else { |     } else { | ||||||
|         shader = CachedShader::CreateStageFromCache(params, program, found->second); |         shader = CachedShader::CreateFromCache(params, found->second); | ||||||
|     } |     } | ||||||
|     Register(shader); |     Register(shader); | ||||||
|  |  | ||||||
| @@ -638,15 +726,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | |||||||
|     // No kernel found - create a new one |     // No kernel found - create a new one | ||||||
|     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; |     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||||||
|     const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; |     const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||||||
|  |     const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); | ||||||
|     const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; |     const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||||||
|     const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, |     const ShaderParameters params{system,   disk_cache, precompiled_variants, device, | ||||||
|                                   host_ptr,   unique_identifier}; |                                   cpu_addr, host_ptr,   unique_identifier}; | ||||||
|  |  | ||||||
|     const auto found = precompiled_shaders.find(unique_identifier); |     const auto found = unspecialized_shaders.find(unique_identifier); | ||||||
|     if (found == precompiled_shaders.end()) { |     if (found == unspecialized_shaders.end()) { | ||||||
|         kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); |         kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||||||
|     } else { |     } else { | ||||||
|         kernel = CachedShader::CreateKernelFromCache(params, found->second); |         kernel = CachedShader::CreateFromCache(params, found->second); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     Register(kernel); |     Register(kernel); | ||||||
|   | |||||||
| @@ -8,9 +8,10 @@ | |||||||
| #include <atomic> | #include <atomic> | ||||||
| #include <bitset> | #include <bitset> | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <set> | #include <string> | ||||||
| #include <tuple> | #include <tuple> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
|  | #include <unordered_set> | ||||||
| #include <vector> | #include <vector> | ||||||
|  |  | ||||||
| #include <glad/glad.h> | #include <glad/glad.h> | ||||||
| @@ -20,6 +21,8 @@ | |||||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_decompiler.h" | #include "video_core/renderer_opengl/gl_shader_decompiler.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | #include "video_core/renderer_opengl/gl_shader_disk_cache.h" | ||||||
|  | #include "video_core/shader/const_buffer_locker.h" | ||||||
|  | #include "video_core/shader/shader_ir.h" | ||||||
|  |  | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| @@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>; | |||||||
| using CachedProgram = std::shared_ptr<OGLProgram>; | using CachedProgram = std::shared_ptr<OGLProgram>; | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
| using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; | ||||||
| using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; | ||||||
|  |  | ||||||
|  | struct UnspecializedShader { | ||||||
|  |     GLShader::ShaderEntries entries; | ||||||
|  |     ProgramType program_type; | ||||||
|  |     ProgramCode code; | ||||||
|  |     ProgramCode code_b; | ||||||
|  | }; | ||||||
|  |  | ||||||
| struct ShaderParameters { | struct ShaderParameters { | ||||||
|  |     Core::System& system; | ||||||
|     ShaderDiskCacheOpenGL& disk_cache; |     ShaderDiskCacheOpenGL& disk_cache; | ||||||
|     const PrecompiledPrograms& precompiled_programs; |     const PrecompiledVariants* precompiled_variants; | ||||||
|     const Device& device; |     const Device& device; | ||||||
|     VAddr cpu_addr; |     VAddr cpu_addr; | ||||||
|     u8* host_ptr; |     u8* host_ptr; | ||||||
| @@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject { | |||||||
| public: | public: | ||||||
|     static Shader CreateStageFromMemory(const ShaderParameters& params, |     static Shader CreateStageFromMemory(const ShaderParameters& params, | ||||||
|                                         Maxwell::ShaderProgram program_type, |                                         Maxwell::ShaderProgram program_type, | ||||||
|                                         ProgramCode&& program_code, ProgramCode&& program_code_b); |                                         ProgramCode program_code, ProgramCode program_code_b); | ||||||
|  |     static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); | ||||||
|  |  | ||||||
|     static Shader CreateStageFromCache(const ShaderParameters& params, |     static Shader CreateFromCache(const ShaderParameters& params, | ||||||
|                                        Maxwell::ShaderProgram program_type, |                                   const UnspecializedShader& unspecialized); | ||||||
|                                        GLShader::ProgramResult result); |  | ||||||
|  |  | ||||||
|     static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); |  | ||||||
|  |  | ||||||
|     static Shader CreateKernelFromCache(const ShaderParameters& params, |  | ||||||
|                                         GLShader::ProgramResult result); |  | ||||||
|  |  | ||||||
|     VAddr GetCpuAddr() const override { |     VAddr GetCpuAddr() const override { | ||||||
|         return cpu_addr; |         return cpu_addr; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|         return shader_length; |         return program_code.size() * sizeof(u64); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Gets the shader entries for the shader |     /// Gets the shader entries for the shader | ||||||
| @@ -83,24 +89,36 @@ public: | |||||||
|     std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); |     std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|  |     struct LockerVariant { | ||||||
|  |         std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; | ||||||
|  |         std::unordered_map<ProgramVariant, CachedProgram> programs; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     explicit CachedShader(const ShaderParameters& params, ProgramType program_type, |     explicit CachedShader(const ShaderParameters& params, ProgramType program_type, | ||||||
|                           GLShader::ProgramResult result); |                           GLShader::ShaderEntries entries, ProgramCode program_code, | ||||||
|  |                           ProgramCode program_code_b); | ||||||
|  |  | ||||||
|     CachedProgram TryLoadProgram(const ProgramVariant& variant) const; |     void UpdateVariant(); | ||||||
|  |  | ||||||
|     ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const; |     ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, | ||||||
|  |                                   const VideoCommon::Shader::ConstBufferLocker& locker) const; | ||||||
|  |  | ||||||
|  |     Core::System& system; | ||||||
|  |     ShaderDiskCacheOpenGL& disk_cache; | ||||||
|  |     const Device& device; | ||||||
|  |  | ||||||
|     VAddr cpu_addr{}; |     VAddr cpu_addr{}; | ||||||
|  |  | ||||||
|     u64 unique_identifier{}; |     u64 unique_identifier{}; | ||||||
|     ProgramType program_type{}; |     ProgramType program_type{}; | ||||||
|     ShaderDiskCacheOpenGL& disk_cache; |  | ||||||
|     const PrecompiledPrograms& precompiled_programs; |  | ||||||
|  |  | ||||||
|     GLShader::ShaderEntries entries; |     GLShader::ShaderEntries entries; | ||||||
|     std::string code; |  | ||||||
|     std::size_t shader_length{}; |  | ||||||
|  |  | ||||||
|     std::unordered_map<ProgramVariant, CachedProgram> programs; |     ProgramCode program_code; | ||||||
|  |     ProgramCode program_code_b; | ||||||
|  |  | ||||||
|  |     LockerVariant* curr_variant = nullptr; | ||||||
|  |     std::vector<std::unique_ptr<LockerVariant>> locker_variants; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | class ShaderCacheOpenGL final : public RasterizerCache<Shader> { | ||||||
| @@ -123,21 +141,26 @@ protected: | |||||||
|     void FlushObjectInner(const Shader& object) override {} |     void FlushObjectInner(const Shader& object) override {} | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders( |     bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, | ||||||
|         const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, |                                       const VideoCore::DiskResourceLoadCallback& callback, | ||||||
|         const std::vector<ShaderDiskCacheRaw>& raws, |                                       const std::vector<ShaderDiskCacheRaw>& raws); | ||||||
|         const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled); |  | ||||||
|  |  | ||||||
|     CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, |     CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, | ||||||
|                                              const std::set<GLenum>& supported_formats); |                                              const std::unordered_set<GLenum>& supported_formats); | ||||||
|  |  | ||||||
|  |     const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; | ||||||
|  |  | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     Core::Frontend::EmuWindow& emu_window; |     Core::Frontend::EmuWindow& emu_window; | ||||||
|     const Device& device; |     const Device& device; | ||||||
|  |  | ||||||
|     ShaderDiskCacheOpenGL disk_cache; |     ShaderDiskCacheOpenGL disk_cache; | ||||||
|  |  | ||||||
|     PrecompiledShaders precompiled_shaders; |  | ||||||
|     PrecompiledPrograms precompiled_programs; |     PrecompiledPrograms precompiled_programs; | ||||||
|  |     std::unordered_map<u64, PrecompiledVariants> precompiled_variants; | ||||||
|  |  | ||||||
|  |     std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; | ||||||
|  |  | ||||||
|     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; |     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -415,27 +415,6 @@ public: | |||||||
|         return code.GetResult(); |         return code.GetResult(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     ShaderEntries GetShaderEntries() const { |  | ||||||
|         ShaderEntries entries; |  | ||||||
|         for (const auto& cbuf : ir.GetConstantBuffers()) { |  | ||||||
|             entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), |  | ||||||
|                                                cbuf.first); |  | ||||||
|         } |  | ||||||
|         for (const auto& sampler : ir.GetSamplers()) { |  | ||||||
|             entries.samplers.emplace_back(sampler); |  | ||||||
|         } |  | ||||||
|         for (const auto& [offset, image] : ir.GetImages()) { |  | ||||||
|             entries.images.emplace_back(image); |  | ||||||
|         } |  | ||||||
|         for (const auto& [base, usage] : ir.GetGlobalMemory()) { |  | ||||||
|             entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, |  | ||||||
|                                                        usage.is_read, usage.is_written); |  | ||||||
|         } |  | ||||||
|         entries.clip_distances = ir.GetClipDistances(); |  | ||||||
|         entries.shader_length = ir.GetLength(); |  | ||||||
|         return entries; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     friend class ASTDecompiler; |     friend class ASTDecompiler; | ||||||
|     friend class ExprDecompiler; |     friend class ExprDecompiler; | ||||||
| @@ -2338,6 +2317,11 @@ public: | |||||||
|         inner += expr.value ? "true" : "false"; |         inner += expr.value ? "true" : "false"; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void operator()(VideoCommon::Shader::ExprGprEqual& expr) { | ||||||
|  |         inner += | ||||||
|  |             "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')'; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const std::string& GetResult() const { |     const std::string& GetResult() const { | ||||||
|         return inner; |         return inner; | ||||||
|     } |     } | ||||||
| @@ -2476,25 +2460,46 @@ void GLSLDecompiler::DecompileAST() { | |||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| std::string GetCommonDeclarations() { | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { | ||||||
|     return fmt::format( |     ShaderEntries entries; | ||||||
|         "#define ftoi floatBitsToInt\n" |     for (const auto& cbuf : ir.GetConstantBuffers()) { | ||||||
|         "#define ftou floatBitsToUint\n" |         entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), | ||||||
|         "#define itof intBitsToFloat\n" |                                            cbuf.first); | ||||||
|         "#define utof uintBitsToFloat\n\n" |     } | ||||||
|         "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n" |     for (const auto& sampler : ir.GetSamplers()) { | ||||||
|         "    bvec2 is_nan1 = isnan(pair1);\n" |         entries.samplers.emplace_back(sampler); | ||||||
|         "    bvec2 is_nan2 = isnan(pair2);\n" |     } | ||||||
|         "    return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || " |     for (const auto& [offset, image] : ir.GetImages()) { | ||||||
|         "is_nan2.y);\n" |         entries.images.emplace_back(image); | ||||||
|         "}}\n\n"); |     } | ||||||
|  |     for (const auto& [base, usage] : ir.GetGlobalMemory()) { | ||||||
|  |         entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read, | ||||||
|  |                                                    usage.is_written); | ||||||
|  |     } | ||||||
|  |     entries.clip_distances = ir.GetClipDistances(); | ||||||
|  |     entries.shader_length = ir.GetLength(); | ||||||
|  |     return entries; | ||||||
| } | } | ||||||
|  |  | ||||||
| ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, | std::string GetCommonDeclarations() { | ||||||
|                         const std::string& suffix) { |     return R"(#define ftoi floatBitsToInt | ||||||
|  | #define ftou floatBitsToUint | ||||||
|  | #define itof intBitsToFloat | ||||||
|  | #define utof uintBitsToFloat | ||||||
|  |  | ||||||
|  | bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { | ||||||
|  |     bvec2 is_nan1 = isnan(pair1); | ||||||
|  |     bvec2 is_nan2 = isnan(pair2); | ||||||
|  |     return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); | ||||||
|  | } | ||||||
|  | )"; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, | ||||||
|  |                       const std::string& suffix) { | ||||||
|     GLSLDecompiler decompiler(device, ir, stage, suffix); |     GLSLDecompiler decompiler(device, ir, stage, suffix); | ||||||
|     decompiler.Decompile(); |     decompiler.Decompile(); | ||||||
|     return {decompiler.GetResult(), decompiler.GetShaderEntries()}; |     return decompiler.GetResult(); | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace OpenGL::GLShader | } // namespace OpenGL::GLShader | ||||||
|   | |||||||
| @@ -34,10 +34,7 @@ enum class ProgramType : u32 { | |||||||
|  |  | ||||||
| namespace OpenGL::GLShader { | namespace OpenGL::GLShader { | ||||||
|  |  | ||||||
| struct ShaderEntries; |  | ||||||
|  |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
| using ProgramResult = std::pair<std::string, ShaderEntries>; |  | ||||||
| using SamplerEntry = VideoCommon::Shader::Sampler; | using SamplerEntry = VideoCommon::Shader::Sampler; | ||||||
| using ImageEntry = VideoCommon::Shader::Image; | using ImageEntry = VideoCommon::Shader::Image; | ||||||
|  |  | ||||||
| @@ -93,9 +90,11 @@ struct ShaderEntries { | |||||||
|     std::size_t shader_length{}; |     std::size_t shader_length{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); | ||||||
|  |  | ||||||
| std::string GetCommonDeclarations(); | std::string GetCommonDeclarations(); | ||||||
|  |  | ||||||
| ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||||||
|                         ProgramType stage, const std::string& suffix); |                       ProgramType stage, const std::string& suffix); | ||||||
|  |  | ||||||
| } // namespace OpenGL::GLShader | } // namespace OpenGL::GLShader | ||||||
|   | |||||||
| @@ -22,6 +22,29 @@ | |||||||
|  |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
|  |  | ||||||
|  | using VideoCommon::Shader::BindlessSamplerMap; | ||||||
|  | using VideoCommon::Shader::BoundSamplerMap; | ||||||
|  | using VideoCommon::Shader::KeyMap; | ||||||
|  |  | ||||||
|  | namespace { | ||||||
|  |  | ||||||
|  | struct ConstBufferKey { | ||||||
|  |     u32 cbuf; | ||||||
|  |     u32 offset; | ||||||
|  |     u32 value; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct BoundSamplerKey { | ||||||
|  |     u32 offset; | ||||||
|  |     Tegra::Engines::SamplerDescriptor sampler; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct BindlessSamplerKey { | ||||||
|  |     u32 cbuf; | ||||||
|  |     u32 offset; | ||||||
|  |     Tegra::Engines::SamplerDescriptor sampler; | ||||||
|  | }; | ||||||
|  |  | ||||||
| using ShaderCacheVersionHash = std::array<u8, 64>; | using ShaderCacheVersionHash = std::array<u8, 64>; | ||||||
|  |  | ||||||
| enum class TransferableEntryKind : u32 { | enum class TransferableEntryKind : u32 { | ||||||
| @@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 { | |||||||
|     Usage, |     Usage, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| enum class PrecompiledEntryKind : u32 { | constexpr u32 NativeVersion = 5; | ||||||
|     Decompiled, |  | ||||||
|     Dump, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| constexpr u32 NativeVersion = 4; |  | ||||||
|  |  | ||||||
| // Making sure sizes doesn't change by accident | // Making sure sizes doesn't change by accident | ||||||
| static_assert(sizeof(BaseBindings) == 16); | static_assert(sizeof(BaseBindings) == 16); | ||||||
| static_assert(sizeof(ShaderDiskCacheUsage) == 40); |  | ||||||
|  |  | ||||||
| namespace { |  | ||||||
|  |  | ||||||
| ShaderCacheVersionHash GetShaderCacheVersionHash() { | ShaderCacheVersionHash GetShaderCacheVersionHash() { | ||||||
|     ShaderCacheVersionHash hash{}; |     ShaderCacheVersionHash hash{}; | ||||||
| @@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||||||
|     return hash; |     return hash; | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | ||||||
|                                        u32 program_code_size, u32 program_code_size_b, |  | ||||||
|                                        ProgramCode program_code, ProgramCode program_code_b) |                                        ProgramCode program_code, ProgramCode program_code_b) | ||||||
|     : unique_identifier{unique_identifier}, program_type{program_type}, |     : unique_identifier{unique_identifier}, program_type{program_type}, | ||||||
|       program_code_size{program_code_size}, program_code_size_b{program_code_size_b}, |  | ||||||
|       program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} |       program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {} | ||||||
|  |  | ||||||
| ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; | ||||||
| @@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { | |||||||
| bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { | ||||||
|     if (file.WriteObject(unique_identifier) != 1 || |     if (file.WriteObject(unique_identifier) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(program_type)) != 1 || |         file.WriteObject(static_cast<u32>(program_type)) != 1 || | ||||||
|         file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) { |         file.WriteObject(static_cast<u32>(program_code.size())) != 1 || | ||||||
|  |         file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (file.WriteArray(program_code.data(), program_code_size) != program_code_size) |     if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size()) | ||||||
|         return false; |         return false; | ||||||
|  |  | ||||||
|     if (HasProgramA() && |     if (HasProgramA() && | ||||||
|         file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) { |         file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|     return true; |     return true; | ||||||
| @@ -127,13 +141,13 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||||||
|     u32 version{}; |     u32 version{}; | ||||||
|     if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { |     if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { | ||||||
|         LOG_ERROR(Render_OpenGL, |         LOG_ERROR(Render_OpenGL, | ||||||
|                   "Failed to get transferable cache version for title id={} - skipping", |                   "Failed to get transferable cache version for title id={}, skipping", | ||||||
|                   GetTitleID()); |                   GetTitleID()); | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (version < NativeVersion) { |     if (version < NativeVersion) { | ||||||
|         LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing"); |         LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing"); | ||||||
|         file.Close(); |         file.Close(); | ||||||
|         InvalidateTransferable(); |         InvalidateTransferable(); | ||||||
|         is_usable = true; |         is_usable = true; | ||||||
| @@ -141,17 +155,18 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||||||
|     } |     } | ||||||
|     if (version > NativeVersion) { |     if (version > NativeVersion) { | ||||||
|         LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " |         LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " | ||||||
|                                    "of the emulator - skipping"); |                                    "of the emulator, skipping"); | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Version is valid, load the shaders |     // Version is valid, load the shaders | ||||||
|  |     constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; | ||||||
|     std::vector<ShaderDiskCacheRaw> raws; |     std::vector<ShaderDiskCacheRaw> raws; | ||||||
|     std::vector<ShaderDiskCacheUsage> usages; |     std::vector<ShaderDiskCacheUsage> usages; | ||||||
|     while (file.Tell() < file.GetSize()) { |     while (file.Tell() < file.GetSize()) { | ||||||
|         TransferableEntryKind kind{}; |         TransferableEntryKind kind{}; | ||||||
|         if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { |         if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { | ||||||
|             LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping"); |             LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); | ||||||
|             return {}; |             return {}; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -159,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||||||
|         case TransferableEntryKind::Raw: { |         case TransferableEntryKind::Raw: { | ||||||
|             ShaderDiskCacheRaw entry; |             ShaderDiskCacheRaw entry; | ||||||
|             if (!entry.Load(file)) { |             if (!entry.Load(file)) { | ||||||
|                 LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping"); |                 LOG_ERROR(Render_OpenGL, error_loading); | ||||||
|                 return {}; |                 return {}; | ||||||
|             } |             } | ||||||
|             transferable.insert({entry.GetUniqueIdentifier(), {}}); |             transferable.insert({entry.GetUniqueIdentifier(), {}}); | ||||||
| @@ -167,16 +182,45 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         case TransferableEntryKind::Usage: { |         case TransferableEntryKind::Usage: { | ||||||
|             ShaderDiskCacheUsage usage{}; |             ShaderDiskCacheUsage usage; | ||||||
|             if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) { |  | ||||||
|                 LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping"); |             u32 num_keys{}; | ||||||
|  |             u32 num_bound_samplers{}; | ||||||
|  |             u32 num_bindless_samplers{}; | ||||||
|  |             if (file.ReadArray(&usage.unique_identifier, 1) != 1 || | ||||||
|  |                 file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || | ||||||
|  |                 file.ReadArray(&num_bound_samplers, 1) != 1 || | ||||||
|  |                 file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||||||
|  |                 LOG_ERROR(Render_OpenGL, error_loading); | ||||||
|                 return {}; |                 return {}; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             std::vector<ConstBufferKey> keys(num_keys); | ||||||
|  |             std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||||||
|  |             std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||||||
|  |             if (file.ReadArray(keys.data(), keys.size()) != keys.size() || | ||||||
|  |                 file.ReadArray(bound_samplers.data(), bound_samplers.size()) != | ||||||
|  |                     bound_samplers.size() || | ||||||
|  |                 file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != | ||||||
|  |                     bindless_samplers.size()) { | ||||||
|  |                 LOG_ERROR(Render_OpenGL, error_loading); | ||||||
|  |                 return {}; | ||||||
|  |             } | ||||||
|  |             for (const auto& key : keys) { | ||||||
|  |                 usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||||||
|  |             } | ||||||
|  |             for (const auto& key : bound_samplers) { | ||||||
|  |                 usage.bound_samplers.emplace(key.offset, key.sampler); | ||||||
|  |             } | ||||||
|  |             for (const auto& key : bindless_samplers) { | ||||||
|  |                 usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||||||
|  |             } | ||||||
|  |  | ||||||
|             usages.push_back(std::move(usage)); |             usages.push_back(std::move(usage)); | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         default: |         default: | ||||||
|             LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping", |             LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", | ||||||
|                       static_cast<u32>(kind)); |                       static_cast<u32>(kind)); | ||||||
|             return {}; |             return {}; | ||||||
|         } |         } | ||||||
| @@ -186,13 +230,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { | |||||||
|     return {{std::move(raws), std::move(usages)}}; |     return {{std::move(raws), std::move(usages)}}; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap> | std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> | ||||||
| ShaderDiskCacheOpenGL::LoadPrecompiled() { | ShaderDiskCacheOpenGL::LoadPrecompiled() { | ||||||
|     if (!is_usable) { |     if (!is_usable) { | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     FileUtil::IOFile file(GetPrecompiledPath(), "rb"); |     std::string path = GetPrecompiledPath(); | ||||||
|  |     FileUtil::IOFile file(path, "rb"); | ||||||
|     if (!file.IsOpen()) { |     if (!file.IsOpen()) { | ||||||
|         LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", |         LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", | ||||||
|                  GetTitleID()); |                  GetTitleID()); | ||||||
| @@ -202,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { | |||||||
|     const auto result = LoadPrecompiledFile(file); |     const auto result = LoadPrecompiledFile(file); | ||||||
|     if (!result) { |     if (!result) { | ||||||
|         LOG_INFO(Render_OpenGL, |         LOG_INFO(Render_OpenGL, | ||||||
|                  "Failed to load precompiled cache for game with title id={} - removing", |                  "Failed to load precompiled cache for game with title id={}, removing", | ||||||
|                  GetTitleID()); |                  GetTitleID()); | ||||||
|         file.Close(); |         file.Close(); | ||||||
|         InvalidatePrecompiled(); |         InvalidatePrecompiled(); | ||||||
| @@ -211,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() { | |||||||
|     return *result; |     return *result; | ||||||
| } | } | ||||||
|  |  | ||||||
| std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>> | std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | ||||||
| ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | ||||||
|     // Read compressed file from disk and decompress to virtual precompiled cache file |     // Read compressed file from disk and decompress to virtual precompiled cache file | ||||||
|     std::vector<u8> compressed(file.GetSize()); |     std::vector<u8> compressed(file.GetSize()); | ||||||
| @@ -231,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { | |||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; |  | ||||||
|     ShaderDumpsMap dumps; |     ShaderDumpsMap dumps; | ||||||
|     while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { |     while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { | ||||||
|         PrecompiledEntryKind kind{}; |         u32 num_keys{}; | ||||||
|         if (!LoadObjectFromPrecompiled(kind)) { |         u32 num_bound_samplers{}; | ||||||
|  |         u32 num_bindless_samplers{}; | ||||||
|  |         ShaderDiskCacheUsage usage; | ||||||
|  |         if (!LoadObjectFromPrecompiled(usage.unique_identifier) || | ||||||
|  |             !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) || | ||||||
|  |             !LoadObjectFromPrecompiled(num_bound_samplers) || | ||||||
|  |             !LoadObjectFromPrecompiled(num_bindless_samplers)) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |         std::vector<ConstBufferKey> keys(num_keys); | ||||||
|  |         std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); | ||||||
|  |         std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); | ||||||
|  |         if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || | ||||||
|  |             !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != | ||||||
|  |                 bound_samplers.size() || | ||||||
|  |             !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != | ||||||
|  |                 bindless_samplers.size()) { | ||||||
|  |             return {}; | ||||||
|  |         } | ||||||
|  |         for (const auto& key : keys) { | ||||||
|  |             usage.keys.insert({{key.cbuf, key.offset}, key.value}); | ||||||
|  |         } | ||||||
|  |         for (const auto& key : bound_samplers) { | ||||||
|  |             usage.bound_samplers.emplace(key.offset, key.sampler); | ||||||
|  |         } | ||||||
|  |         for (const auto& key : bindless_samplers) { | ||||||
|  |             usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         ShaderDiskCacheDump dump; | ||||||
|  |         if (!LoadObjectFromPrecompiled(dump.binary_format)) { | ||||||
|             return {}; |             return {}; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         switch (kind) { |         u32 binary_length{}; | ||||||
|         case PrecompiledEntryKind::Decompiled: { |         if (!LoadObjectFromPrecompiled(binary_length)) { | ||||||
|             u64 unique_identifier{}; |  | ||||||
|             if (!LoadObjectFromPrecompiled(unique_identifier)) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             auto entry = LoadDecompiledEntry(); |  | ||||||
|             if (!entry) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|             decompiled.insert({unique_identifier, std::move(*entry)}); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         case PrecompiledEntryKind::Dump: { |  | ||||||
|             ShaderDiskCacheUsage usage; |  | ||||||
|             if (!LoadObjectFromPrecompiled(usage)) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             ShaderDiskCacheDump dump; |  | ||||||
|             if (!LoadObjectFromPrecompiled(dump.binary_format)) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             u32 binary_length{}; |  | ||||||
|             if (!LoadObjectFromPrecompiled(binary_length)) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             dump.binary.resize(binary_length); |  | ||||||
|             if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { |  | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             dumps.insert({usage, dump}); |  | ||||||
|             break; |  | ||||||
|         } |  | ||||||
|         default: |  | ||||||
|             return {}; |             return {}; | ||||||
|         } |         } | ||||||
|     } |  | ||||||
|     return {{decompiled, dumps}}; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() { |         dump.binary.resize(binary_length); | ||||||
|     u32 code_size{}; |         if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { | ||||||
|     if (!LoadObjectFromPrecompiled(code_size)) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     std::string code(code_size, '\0'); |  | ||||||
|     if (!LoadArrayFromPrecompiled(code.data(), code.size())) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     ShaderDiskCacheDecompiled entry; |  | ||||||
|     entry.code = std::move(code); |  | ||||||
|  |  | ||||||
|     u32 const_buffers_count{}; |  | ||||||
|     if (!LoadObjectFromPrecompiled(const_buffers_count)) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for (u32 i = 0; i < const_buffers_count; ++i) { |  | ||||||
|         u32 max_offset{}; |  | ||||||
|         u32 index{}; |  | ||||||
|         bool is_indirect{}; |  | ||||||
|         if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) || |  | ||||||
|             !LoadObjectFromPrecompiled(is_indirect)) { |  | ||||||
|             return {}; |             return {}; | ||||||
|         } |         } | ||||||
|         entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     u32 samplers_count{}; |         dumps.emplace(std::move(usage), dump); | ||||||
|     if (!LoadObjectFromPrecompiled(samplers_count)) { |  | ||||||
|         return {}; |  | ||||||
|     } |     } | ||||||
|  |     return dumps; | ||||||
|     for (u32 i = 0; i < samplers_count; ++i) { |  | ||||||
|         u64 offset{}; |  | ||||||
|         u64 index{}; |  | ||||||
|         u32 type{}; |  | ||||||
|         bool is_array{}; |  | ||||||
|         bool is_shadow{}; |  | ||||||
|         bool is_bindless{}; |  | ||||||
|         if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || |  | ||||||
|             !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) || |  | ||||||
|             !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         entry.entries.samplers.emplace_back( |  | ||||||
|             static_cast<std::size_t>(offset), static_cast<std::size_t>(index), |  | ||||||
|             static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     u32 images_count{}; |  | ||||||
|     if (!LoadObjectFromPrecompiled(images_count)) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     for (u32 i = 0; i < images_count; ++i) { |  | ||||||
|         u64 offset{}; |  | ||||||
|         u64 index{}; |  | ||||||
|         u32 type{}; |  | ||||||
|         u8 is_bindless{}; |  | ||||||
|         u8 is_written{}; |  | ||||||
|         u8 is_read{}; |  | ||||||
|         u8 is_atomic{}; |  | ||||||
|         if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) || |  | ||||||
|             !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) || |  | ||||||
|             !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) || |  | ||||||
|             !LoadObjectFromPrecompiled(is_atomic)) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         entry.entries.images.emplace_back( |  | ||||||
|             static_cast<std::size_t>(offset), static_cast<std::size_t>(index), |  | ||||||
|             static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0, |  | ||||||
|             is_read != 0, is_atomic != 0); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     u32 global_memory_count{}; |  | ||||||
|     if (!LoadObjectFromPrecompiled(global_memory_count)) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     for (u32 i = 0; i < global_memory_count; ++i) { |  | ||||||
|         u32 cbuf_index{}; |  | ||||||
|         u32 cbuf_offset{}; |  | ||||||
|         bool is_read{}; |  | ||||||
|         bool is_written{}; |  | ||||||
|         if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) || |  | ||||||
|             !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|         entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read, |  | ||||||
|                                                          is_written); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for (auto& clip_distance : entry.entries.clip_distances) { |  | ||||||
|         if (!LoadObjectFromPrecompiled(clip_distance)) { |  | ||||||
|             return {}; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     u64 shader_length{}; |  | ||||||
|     if (!LoadObjectFromPrecompiled(shader_length)) { |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
|     entry.entries.shader_length = static_cast<std::size_t>(shader_length); |  | ||||||
|  |  | ||||||
|     return entry; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code, |  | ||||||
|                                                const GLShader::ShaderEntries& entries) { |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) || |  | ||||||
|         !SaveObjectToPrecompiled(unique_identifier) || |  | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(code.size())) || |  | ||||||
|         !SaveArrayToPrecompiled(code.data(), code.size())) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     for (const auto& cbuf : entries.const_buffers) { |  | ||||||
|         if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) || |  | ||||||
|             !SaveObjectToPrecompiled(cbuf.IsIndirect())) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     for (const auto& sampler : entries.samplers) { |  | ||||||
|         if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) || |  | ||||||
|             !SaveObjectToPrecompiled(sampler.IsArray()) || |  | ||||||
|             !SaveObjectToPrecompiled(sampler.IsShadow()) || |  | ||||||
|             !SaveObjectToPrecompiled(sampler.IsBindless())) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     for (const auto& image : entries.images) { |  | ||||||
|         if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|     for (const auto& gmem : entries.global_memory_entries) { |  | ||||||
|         if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) || |  | ||||||
|             !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) || |  | ||||||
|             !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for (const bool clip_distance : entries.clip_distances) { |  | ||||||
|         if (!SaveObjectToPrecompiled(clip_distance)) { |  | ||||||
|             return false; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) { |  | ||||||
|         return false; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return true; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void ShaderDiskCacheOpenGL::InvalidateTransferable() { | void ShaderDiskCacheOpenGL::InvalidateTransferable() { | ||||||
| @@ -494,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     FileUtil::IOFile file = AppendTransferableFile(); |     FileUtil::IOFile file = AppendTransferableFile(); | ||||||
|     if (!file.IsOpen()) |     if (!file.IsOpen()) { | ||||||
|         return; |         return; | ||||||
|  |     } | ||||||
|     if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { |     if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing"); |         LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); | ||||||
|         file.Close(); |         file.Close(); | ||||||
|         InvalidateTransferable(); |         InvalidateTransferable(); | ||||||
|         return; |         return; | ||||||
| @@ -523,29 +387,39 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { | |||||||
|     FileUtil::IOFile file = AppendTransferableFile(); |     FileUtil::IOFile file = AppendTransferableFile(); | ||||||
|     if (!file.IsOpen()) |     if (!file.IsOpen()) | ||||||
|         return; |         return; | ||||||
|  |     const auto Close = [&] { | ||||||
|     if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) { |         LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing"); |  | ||||||
|         file.Close(); |         file.Close(); | ||||||
|         InvalidateTransferable(); |         InvalidateTransferable(); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     if (file.WriteObject(TransferableEntryKind::Usage) != 1 || | ||||||
|  |         file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || | ||||||
|  |         file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || | ||||||
|  |         file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || | ||||||
|  |         file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { | ||||||
|  |         Close(); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| } |     for (const auto& [pair, value] : usage.keys) { | ||||||
|  |         const auto [cbuf, offset] = pair; | ||||||
| void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code, |         if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { | ||||||
|                                            const GLShader::ShaderEntries& entries) { |             Close(); | ||||||
|     if (!is_usable) { |             return; | ||||||
|         return; |         } | ||||||
|     } |     } | ||||||
|  |     for (const auto& [offset, sampler] : usage.bound_samplers) { | ||||||
|     if (precompiled_cache_virtual_file.GetSize() == 0) { |         if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { | ||||||
|         SavePrecompiledHeaderToVirtualPrecompiledCache(); |             Close(); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |     for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||||||
|     if (!SaveDecompiledFile(unique_identifier, code, entries)) { |         const auto [cbuf, offset] = pair; | ||||||
|         LOG_ERROR(Render_OpenGL, |         if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||||||
|                   "Failed to save decompiled entry to the precompiled file - removing"); |             Close(); | ||||||
|         InvalidatePrecompiled(); |             return; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -554,6 +428,13 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header | ||||||
|  |     // when writing the dump. This should be done the moment I get access to write to the virtual | ||||||
|  |     // file. | ||||||
|  |     if (precompiled_cache_virtual_file.GetSize() == 0) { | ||||||
|  |         SavePrecompiledHeaderToVirtualPrecompiledCache(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     GLint binary_length{}; |     GLint binary_length{}; | ||||||
|     glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); |     glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); | ||||||
|  |  | ||||||
| @@ -561,21 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p | |||||||
|     std::vector<u8> binary(binary_length); |     std::vector<u8> binary(binary_length); | ||||||
|     glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); |     glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); | ||||||
|  |  | ||||||
|     if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) || |     const auto Close = [&] { | ||||||
|         !SaveObjectToPrecompiled(usage) || |         LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || |  | ||||||
|         !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || |  | ||||||
|         !SaveArrayToPrecompiled(binary.data(), binary.size())) { |  | ||||||
|         LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing", |  | ||||||
|                   usage.unique_identifier); |                   usage.unique_identifier); | ||||||
|         InvalidatePrecompiled(); |         InvalidatePrecompiled(); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     if (!SaveObjectToPrecompiled(usage.unique_identifier) || | ||||||
|  |         !SaveObjectToPrecompiled(usage.variant) || | ||||||
|  |         !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || | ||||||
|  |         !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || | ||||||
|  |         !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { | ||||||
|  |         Close(); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |     for (const auto& [pair, value] : usage.keys) { | ||||||
|  |         const auto [cbuf, offset] = pair; | ||||||
|  |         if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { | ||||||
|  |             Close(); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     for (const auto& [offset, sampler] : usage.bound_samplers) { | ||||||
|  |         if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { | ||||||
|  |             Close(); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     for (const auto& [pair, sampler] : usage.bindless_samplers) { | ||||||
|  |         const auto [cbuf, offset] = pair; | ||||||
|  |         if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { | ||||||
|  |             Close(); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || | ||||||
|  |         !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || | ||||||
|  |         !SaveArrayToPrecompiled(binary.data(), binary.size())) { | ||||||
|  |         Close(); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { | ||||||
|     if (!EnsureDirectories()) |     if (!EnsureDirectories()) { | ||||||
|         return {}; |         return {}; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const auto transferable_path{GetTransferablePath()}; |     const auto transferable_path{GetTransferablePath()}; | ||||||
|     const bool existed = FileUtil::Exists(transferable_path); |     const bool existed = FileUtil::Exists(transferable_path); | ||||||
| @@ -607,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() { | |||||||
|  |  | ||||||
| void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { | ||||||
|     precompiled_cache_virtual_file_offset = 0; |     precompiled_cache_virtual_file_offset = 0; | ||||||
|     const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); |     const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes(); | ||||||
|     const std::vector<u8>& compressed = |     const std::vector<u8> compressed = | ||||||
|         Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); |         Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); | ||||||
|  |  | ||||||
|     const auto precompiled_path{GetPrecompiledPath()}; |     const auto precompiled_path{GetPrecompiledPath()}; | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ | |||||||
| #include <optional> | #include <optional> | ||||||
| #include <string> | #include <string> | ||||||
| #include <tuple> | #include <tuple> | ||||||
|  | #include <type_traits> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
| #include <unordered_set> | #include <unordered_set> | ||||||
| #include <utility> | #include <utility> | ||||||
| @@ -19,6 +20,7 @@ | |||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "core/file_sys/vfs_vector.h" | #include "core/file_sys/vfs_vector.h" | ||||||
| #include "video_core/renderer_opengl/gl_shader_gen.h" | #include "video_core/renderer_opengl/gl_shader_gen.h" | ||||||
|  | #include "video_core/shader/const_buffer_locker.h" | ||||||
|  |  | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| @@ -53,6 +55,7 @@ struct BaseBindings { | |||||||
|         return !operator==(rhs); |         return !operator==(rhs); | ||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  | static_assert(std::is_trivially_copyable_v<BaseBindings>); | ||||||
|  |  | ||||||
| /// Describes the different variants a single program can be compiled. | /// Describes the different variants a single program can be compiled. | ||||||
| struct ProgramVariant { | struct ProgramVariant { | ||||||
| @@ -70,13 +73,20 @@ struct ProgramVariant { | |||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | static_assert(std::is_trivially_copyable_v<ProgramVariant>); | ||||||
|  |  | ||||||
| /// Describes how a shader is used. | /// Describes how a shader is used. | ||||||
| struct ShaderDiskCacheUsage { | struct ShaderDiskCacheUsage { | ||||||
|     u64 unique_identifier{}; |     u64 unique_identifier{}; | ||||||
|     ProgramVariant variant; |     ProgramVariant variant; | ||||||
|  |     VideoCommon::Shader::KeyMap keys; | ||||||
|  |     VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||||||
|  |     VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||||||
|  |  | ||||||
|     bool operator==(const ShaderDiskCacheUsage& rhs) const { |     bool operator==(const ShaderDiskCacheUsage& rhs) const { | ||||||
|         return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant); |         return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == | ||||||
|  |                std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, | ||||||
|  |                         rhs.bindless_samplers); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     bool operator!=(const ShaderDiskCacheUsage& rhs) const { |     bool operator!=(const ShaderDiskCacheUsage& rhs) const { | ||||||
| @@ -123,8 +133,7 @@ namespace OpenGL { | |||||||
| class ShaderDiskCacheRaw { | class ShaderDiskCacheRaw { | ||||||
| public: | public: | ||||||
|     explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, |     explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | ||||||
|                                 u32 program_code_size, u32 program_code_size_b, |                                 ProgramCode program_code, ProgramCode program_code_b = {}); | ||||||
|                                 ProgramCode program_code, ProgramCode program_code_b); |  | ||||||
|     ShaderDiskCacheRaw(); |     ShaderDiskCacheRaw(); | ||||||
|     ~ShaderDiskCacheRaw(); |     ~ShaderDiskCacheRaw(); | ||||||
|  |  | ||||||
| @@ -155,22 +164,14 @@ public: | |||||||
| private: | private: | ||||||
|     u64 unique_identifier{}; |     u64 unique_identifier{}; | ||||||
|     ProgramType program_type{}; |     ProgramType program_type{}; | ||||||
|     u32 program_code_size{}; |  | ||||||
|     u32 program_code_size_b{}; |  | ||||||
|  |  | ||||||
|     ProgramCode program_code; |     ProgramCode program_code; | ||||||
|     ProgramCode program_code_b; |     ProgramCode program_code_b; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /// Contains decompiled data from a shader |  | ||||||
| struct ShaderDiskCacheDecompiled { |  | ||||||
|     std::string code; |  | ||||||
|     GLShader::ShaderEntries entries; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /// Contains an OpenGL dumped binary program | /// Contains an OpenGL dumped binary program | ||||||
| struct ShaderDiskCacheDump { | struct ShaderDiskCacheDump { | ||||||
|     GLenum binary_format; |     GLenum binary_format{}; | ||||||
|     std::vector<u8> binary; |     std::vector<u8> binary; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -184,9 +185,7 @@ public: | |||||||
|     LoadTransferable(); |     LoadTransferable(); | ||||||
|  |  | ||||||
|     /// Loads current game's precompiled cache. Invalidates on failure. |     /// Loads current game's precompiled cache. Invalidates on failure. | ||||||
|     std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, |     std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); | ||||||
|               std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> |  | ||||||
|     LoadPrecompiled(); |  | ||||||
|  |  | ||||||
|     /// Removes the transferable (and precompiled) cache file. |     /// Removes the transferable (and precompiled) cache file. | ||||||
|     void InvalidateTransferable(); |     void InvalidateTransferable(); | ||||||
| @@ -200,10 +199,6 @@ public: | |||||||
|     /// Saves shader usage to the transferable file. Does not check for collisions. |     /// Saves shader usage to the transferable file. Does not check for collisions. | ||||||
|     void SaveUsage(const ShaderDiskCacheUsage& usage); |     void SaveUsage(const ShaderDiskCacheUsage& usage); | ||||||
|  |  | ||||||
|     /// Saves a decompiled entry to the precompiled file. Does not check for collisions. |  | ||||||
|     void SaveDecompiled(u64 unique_identifier, const std::string& code, |  | ||||||
|                         const GLShader::ShaderEntries& entries); |  | ||||||
|  |  | ||||||
|     /// Saves a dump entry to the precompiled file. Does not check for collisions. |     /// Saves a dump entry to the precompiled file. Does not check for collisions. | ||||||
|     void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); |     void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); | ||||||
|  |  | ||||||
| @@ -212,18 +207,9 @@ public: | |||||||
|  |  | ||||||
| private: | private: | ||||||
|     /// Loads the transferable cache. Returns empty on failure. |     /// Loads the transferable cache. Returns empty on failure. | ||||||
|     std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, |     std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> | ||||||
|                             std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>> |  | ||||||
|     LoadPrecompiledFile(FileUtil::IOFile& file); |     LoadPrecompiledFile(FileUtil::IOFile& file); | ||||||
|  |  | ||||||
|     /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on |  | ||||||
|     /// failure. |  | ||||||
|     std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry(); |  | ||||||
|  |  | ||||||
|     /// Saves a decompiled entry to the passed file. Returns true on success. |  | ||||||
|     bool SaveDecompiledFile(u64 unique_identifier, const std::string& code, |  | ||||||
|                             const GLShader::ShaderEntries& entries); |  | ||||||
|  |  | ||||||
|     /// Opens current game's transferable file and write it's header if it doesn't exist |     /// Opens current game's transferable file and write it's header if it doesn't exist | ||||||
|     FileUtil::IOFile AppendTransferableFile() const; |     FileUtil::IOFile AppendTransferableFile() const; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -16,17 +16,8 @@ using VideoCommon::Shader::CompilerSettings; | |||||||
| using VideoCommon::Shader::ProgramCode; | using VideoCommon::Shader::ProgramCode; | ||||||
| using VideoCommon::Shader::ShaderIR; | using VideoCommon::Shader::ShaderIR; | ||||||
|  |  | ||||||
| static constexpr u32 PROGRAM_OFFSET = 10; | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { | ||||||
| static constexpr u32 COMPUTE_OFFSET = 0; |     std::string out = GetCommonDeclarations(); | ||||||
|  |  | ||||||
| static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; |  | ||||||
|  |  | ||||||
| ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { |  | ||||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |  | ||||||
|  |  | ||||||
|     std::string out = "// Shader Unique Id: VS" + id + "\n\n"; |  | ||||||
|     out += GetCommonDeclarations(); |  | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | ||||||
|     vec4 viewport_flip; |     vec4 viewport_flip; | ||||||
| @@ -34,17 +25,10 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| )"; | )"; | ||||||
|  |     const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB; | ||||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); |     out += Decompile(device, ir, stage, "vertex"); | ||||||
|     const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; |     if (ir_b) { | ||||||
|     ProgramResult program = Decompile(device, program_ir, stage, "vertex"); |         out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b"); | ||||||
|     out += program.first; |  | ||||||
|  |  | ||||||
|     if (setup.IsDualProgram()) { |  | ||||||
|         const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, |  | ||||||
|                                     settings); |  | ||||||
|         ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); |  | ||||||
|         out += program_b.first; |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| @@ -52,7 +36,7 @@ void main() { | |||||||
|     execute_vertex(); |     execute_vertex(); | ||||||
| )"; | )"; | ||||||
|  |  | ||||||
|     if (setup.IsDualProgram()) { |     if (ir_b) { | ||||||
|         out += "    execute_vertex_b();"; |         out += "    execute_vertex_b();"; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -66,17 +50,13 @@ void main() { | |||||||
|         // Viewport can be flipped, which is unsupported by glViewport |         // Viewport can be flipped, which is unsupported by glViewport | ||||||
|         gl_Position.xy *= viewport_flip.xy; |         gl_Position.xy *= viewport_flip.xy; | ||||||
|     } |     } | ||||||
| })"; | } | ||||||
|  | )"; | ||||||
|     return {std::move(out), std::move(program.second)}; |     return out; | ||||||
| } | } | ||||||
|  |  | ||||||
| ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { | ||||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |     std::string out = GetCommonDeclarations(); | ||||||
|  |  | ||||||
|     std::string out = "// Shader Unique Id: GS" + id + "\n\n"; |  | ||||||
|     out += GetCommonDeclarations(); |  | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | ||||||
|     vec4 viewport_flip; |     vec4 viewport_flip; | ||||||
| @@ -84,25 +64,18 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| )"; | )"; | ||||||
|  |     out += Decompile(device, ir, ProgramType::Geometry, "geometry"); | ||||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); |  | ||||||
|     ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); |  | ||||||
|     out += program.first; |  | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| void main() { | void main() { | ||||||
|     execute_geometry(); |     execute_geometry(); | ||||||
| };)"; | } | ||||||
|  | )"; | ||||||
|     return {std::move(out), std::move(program.second)}; |     return out; | ||||||
| } | } | ||||||
|  |  | ||||||
| ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { | ||||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |     std::string out = GetCommonDeclarations(); | ||||||
|  |  | ||||||
|     std::string out = "// Shader Unique Id: FS" + id + "\n\n"; |  | ||||||
|     out += GetCommonDeclarations(); |  | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| layout (location = 0) out vec4 FragColor0; | layout (location = 0) out vec4 FragColor0; | ||||||
| layout (location = 1) out vec4 FragColor1; | layout (location = 1) out vec4 FragColor1; | ||||||
| @@ -119,36 +92,25 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| )"; | )"; | ||||||
|  |     out += Decompile(device, ir, ProgramType::Fragment, "fragment"); | ||||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); |  | ||||||
|     ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); |  | ||||||
|     out += program.first; |  | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| void main() { | void main() { | ||||||
|     execute_fragment(); |     execute_fragment(); | ||||||
| } | } | ||||||
|  |  | ||||||
| )"; | )"; | ||||||
|     return {std::move(out), std::move(program.second)}; |     return out; | ||||||
| } | } | ||||||
|  |  | ||||||
| ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { | ||||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); |     std::string out = GetCommonDeclarations(); | ||||||
|  |     out += Decompile(device, ir, ProgramType::Compute, "compute"); | ||||||
|     std::string out = "// Shader Unique Id: CS" + id + "\n\n"; |  | ||||||
|     out += GetCommonDeclarations(); |  | ||||||
|  |  | ||||||
|     const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); |  | ||||||
|     ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); |  | ||||||
|     out += program.first; |  | ||||||
|  |  | ||||||
|     out += R"( |     out += R"( | ||||||
| void main() { | void main() { | ||||||
|     execute_compute(); |     execute_compute(); | ||||||
| } | } | ||||||
| )"; | )"; | ||||||
|     return {std::move(out), std::move(program.second)}; |     return out; | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace OpenGL::GLShader | } // namespace OpenGL::GLShader | ||||||
|   | |||||||
| @@ -17,44 +17,18 @@ class Device; | |||||||
| namespace OpenGL::GLShader { | namespace OpenGL::GLShader { | ||||||
|  |  | ||||||
| using VideoCommon::Shader::ProgramCode; | using VideoCommon::Shader::ProgramCode; | ||||||
|  | using VideoCommon::Shader::ShaderIR; | ||||||
| struct ShaderSetup { |  | ||||||
|     explicit ShaderSetup(ProgramCode program_code) { |  | ||||||
|         program.code = std::move(program_code); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     struct { |  | ||||||
|         ProgramCode code; |  | ||||||
|         ProgramCode code_b; // Used for dual vertex shaders |  | ||||||
|         u64 unique_identifier; |  | ||||||
|         std::size_t size_a; |  | ||||||
|         std::size_t size_b; |  | ||||||
|     } program; |  | ||||||
|  |  | ||||||
|     /// Used in scenarios where we have a dual vertex shaders |  | ||||||
|     void SetProgramB(ProgramCode program_b) { |  | ||||||
|         program.code_b = std::move(program_b); |  | ||||||
|         has_program_b = true; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     bool IsDualProgram() const { |  | ||||||
|         return has_program_b; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
| private: |  | ||||||
|     bool has_program_b{}; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /// Generates the GLSL vertex shader program source code for the given VS program | /// Generates the GLSL vertex shader program source code for the given VS program | ||||||
| ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); | std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); | ||||||
|  |  | ||||||
| /// Generates the GLSL geometry shader program source code for the given GS program | /// Generates the GLSL geometry shader program source code for the given GS program | ||||||
| ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); | std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); | ||||||
|  |  | ||||||
| /// Generates the GLSL fragment shader program source code for the given FS program | /// Generates the GLSL fragment shader program source code for the given FS program | ||||||
| ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); | ||||||
|  |  | ||||||
| /// Generates the GLSL compute shader program source code for the given CS program | /// Generates the GLSL compute shader program source code for the given CS program | ||||||
| ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); | ||||||
|  |  | ||||||
| } // namespace OpenGL::GLShader | } // namespace OpenGL::GLShader | ||||||
|   | |||||||
| @@ -1704,6 +1704,13 @@ public: | |||||||
|         return expr.value ? decomp.v_true : decomp.v_false; |         return expr.value ? decomp.v_true : decomp.v_false; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     Id operator()(const ExprGprEqual& expr) { | ||||||
|  |         const Id target = decomp.Constant(decomp.t_uint, expr.value); | ||||||
|  |         const Id gpr = decomp.BitcastTo<Type::Uint>( | ||||||
|  |             decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr)))); | ||||||
|  |         return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target)); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     Id Visit(const Expr& node) { |     Id Visit(const Expr& node) { | ||||||
|         return std::visit(*this, *node); |         return std::visit(*this, *node); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -228,6 +228,10 @@ public: | |||||||
|         inner += expr.value ? "true" : "false"; |         inner += expr.value ? "true" : "false"; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void operator()(const ExprGprEqual& expr) { | ||||||
|  |         inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')'; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     const std::string& GetResult() const { |     const std::string& GetResult() const { | ||||||
|         return inner; |         return inner; | ||||||
|     } |     } | ||||||
|   | |||||||
							
								
								
									
										110
									
								
								src/video_core/shader/const_buffer_locker.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/video_core/shader/const_buffer_locker.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | |||||||
|  | // Copyright 2019 yuzu Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <algorithm> | ||||||
|  | #include <memory> | ||||||
|  | #include "common/assert.h" | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/shader/const_buffer_locker.h" | ||||||
|  |  | ||||||
|  | namespace VideoCommon::Shader { | ||||||
|  |  | ||||||
|  | using Tegra::Engines::SamplerDescriptor; | ||||||
|  |  | ||||||
|  | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage) | ||||||
|  |     : stage{shader_stage} {} | ||||||
|  |  | ||||||
|  | ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||||||
|  |                                      Tegra::Engines::ConstBufferEngineInterface& engine) | ||||||
|  |     : stage{shader_stage}, engine{&engine} {} | ||||||
|  |  | ||||||
|  | ConstBufferLocker::~ConstBufferLocker() = default; | ||||||
|  |  | ||||||
|  | std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) { | ||||||
|  |     const std::pair<u32, u32> key = {buffer, offset}; | ||||||
|  |     const auto iter = keys.find(key); | ||||||
|  |     if (iter != keys.end()) { | ||||||
|  |         return iter->second; | ||||||
|  |     } | ||||||
|  |     if (!engine) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const u32 value = engine->AccessConstBuffer32(stage, buffer, offset); | ||||||
|  |     keys.emplace(key, value); | ||||||
|  |     return value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) { | ||||||
|  |     const u32 key = offset; | ||||||
|  |     const auto iter = bound_samplers.find(key); | ||||||
|  |     if (iter != bound_samplers.end()) { | ||||||
|  |         return iter->second; | ||||||
|  |     } | ||||||
|  |     if (!engine) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset); | ||||||
|  |     bound_samplers.emplace(key, value); | ||||||
|  |     return value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler( | ||||||
|  |     u32 buffer, u32 offset) { | ||||||
|  |     const std::pair key = {buffer, offset}; | ||||||
|  |     const auto iter = bindless_samplers.find(key); | ||||||
|  |     if (iter != bindless_samplers.end()) { | ||||||
|  |         return iter->second; | ||||||
|  |     } | ||||||
|  |     if (!engine) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset); | ||||||
|  |     bindless_samplers.emplace(key, value); | ||||||
|  |     return value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) { | ||||||
|  |     keys.insert_or_assign({buffer, offset}, value); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) { | ||||||
|  |     bound_samplers.insert_or_assign(offset, sampler); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) { | ||||||
|  |     bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool ConstBufferLocker::IsConsistent() const { | ||||||
|  |     if (!engine) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     return std::all_of(keys.begin(), keys.end(), | ||||||
|  |                        [this](const auto& pair) { | ||||||
|  |                            const auto [cbuf, offset] = pair.first; | ||||||
|  |                            const auto value = pair.second; | ||||||
|  |                            return value == engine->AccessConstBuffer32(stage, cbuf, offset); | ||||||
|  |                        }) && | ||||||
|  |            std::all_of(bound_samplers.begin(), bound_samplers.end(), | ||||||
|  |                        [this](const auto& sampler) { | ||||||
|  |                            const auto [key, value] = sampler; | ||||||
|  |                            return value == engine->AccessBoundSampler(stage, key); | ||||||
|  |                        }) && | ||||||
|  |            std::all_of(bindless_samplers.begin(), bindless_samplers.end(), | ||||||
|  |                        [this](const auto& sampler) { | ||||||
|  |                            const auto [cbuf, offset] = sampler.first; | ||||||
|  |                            const auto value = sampler.second; | ||||||
|  |                            return value == engine->AccessBindlessSampler(stage, cbuf, offset); | ||||||
|  |                        }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const { | ||||||
|  |     return keys == rhs.keys && bound_samplers == rhs.bound_samplers && | ||||||
|  |            bindless_samplers == rhs.bindless_samplers; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace VideoCommon::Shader | ||||||
							
								
								
									
										80
									
								
								src/video_core/shader/const_buffer_locker.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								src/video_core/shader/const_buffer_locker.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,80 @@ | |||||||
|  | // Copyright 2019 yuzu Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <unordered_map> | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "common/hash.h" | ||||||
|  | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
|  |  | ||||||
|  | namespace VideoCommon::Shader { | ||||||
|  |  | ||||||
|  | using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; | ||||||
|  | using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; | ||||||
|  | using BindlessSamplerMap = | ||||||
|  |     std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader | ||||||
|  |  * compiler. with it, the shader can obtain required data from GPU state and store it for disk | ||||||
|  |  * shader compilation. | ||||||
|  |  **/ | ||||||
|  | class ConstBufferLocker { | ||||||
|  | public: | ||||||
|  |     explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage); | ||||||
|  |  | ||||||
|  |     explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage, | ||||||
|  |                                Tegra::Engines::ConstBufferEngineInterface& engine); | ||||||
|  |  | ||||||
|  |     ~ConstBufferLocker(); | ||||||
|  |  | ||||||
|  |     /// Retrieves a key from the locker, if it's registered, it will give the registered value, if | ||||||
|  |     /// not it will obtain it from maxwell3d and register it. | ||||||
|  |     std::optional<u32> ObtainKey(u32 buffer, u32 offset); | ||||||
|  |  | ||||||
|  |     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); | ||||||
|  |  | ||||||
|  |     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||||||
|  |  | ||||||
|  |     /// Inserts a key. | ||||||
|  |     void InsertKey(u32 buffer, u32 offset, u32 value); | ||||||
|  |  | ||||||
|  |     /// Inserts a bound sampler key. | ||||||
|  |     void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||||||
|  |  | ||||||
|  |     /// Inserts a bindless sampler key. | ||||||
|  |     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||||||
|  |  | ||||||
|  |     /// Checks keys and samplers against engine's current const buffers. Returns true if they are | ||||||
|  |     /// the same value, false otherwise; | ||||||
|  |     bool IsConsistent() const; | ||||||
|  |  | ||||||
|  |     /// Returns true if the keys are equal to the other ones in the locker. | ||||||
|  |     bool HasEqualKeys(const ConstBufferLocker& rhs) const; | ||||||
|  |  | ||||||
|  |     /// Gives an getter to the const buffer keys in the database. | ||||||
|  |     const KeyMap& GetKeys() const { | ||||||
|  |         return keys; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Gets samplers database. | ||||||
|  |     const BoundSamplerMap& GetBoundSamplers() const { | ||||||
|  |         return bound_samplers; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Gets bindless samplers database. | ||||||
|  |     const BindlessSamplerMap& GetBindlessSamplers() const { | ||||||
|  |         return bindless_samplers; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     const Tegra::Engines::ShaderType stage; | ||||||
|  |     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; | ||||||
|  |     KeyMap keys; | ||||||
|  |     BoundSamplerMap bound_samplers; | ||||||
|  |     BindlessSamplerMap bindless_samplers; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } // namespace VideoCommon::Shader | ||||||
| @@ -35,14 +35,20 @@ struct BlockStack { | |||||||
|     std::stack<u32> pbk_stack{}; |     std::stack<u32> pbk_stack{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| struct BlockBranchInfo { | template <typename T, typename... Args> | ||||||
|     Condition condition{}; | BlockBranchInfo MakeBranchInfo(Args&&... args) { | ||||||
|     s32 address{exit_branch}; |     static_assert(std::is_convertible_v<T, BranchData>); | ||||||
|     bool kill{}; |     return std::make_shared<BranchData>(T(std::forward<Args>(args)...)); | ||||||
|     bool is_sync{}; | } | ||||||
|     bool is_brk{}; |  | ||||||
|     bool ignore{}; | bool BlockBranchIsIgnored(BlockBranchInfo first) { | ||||||
| }; |     bool ignore = false; | ||||||
|  |     if (std::holds_alternative<SingleBranch>(*first)) { | ||||||
|  |         const auto branch = std::get_if<SingleBranch>(first.get()); | ||||||
|  |         ignore = branch->ignore; | ||||||
|  |     } | ||||||
|  |     return ignore; | ||||||
|  | } | ||||||
|  |  | ||||||
| struct BlockInfo { | struct BlockInfo { | ||||||
|     u32 start{}; |     u32 start{}; | ||||||
| @@ -56,10 +62,11 @@ struct BlockInfo { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| struct CFGRebuildState { | struct CFGRebuildState { | ||||||
|     explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, |     explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker) | ||||||
|                              const u32 start) |         : program_code{program_code}, start{start}, locker{locker} {} | ||||||
|         : start{start}, program_code{program_code}, program_size{program_size} {} |  | ||||||
|  |  | ||||||
|  |     const ProgramCode& program_code; | ||||||
|  |     ConstBufferLocker& locker; | ||||||
|     u32 start{}; |     u32 start{}; | ||||||
|     std::vector<BlockInfo> block_info{}; |     std::vector<BlockInfo> block_info{}; | ||||||
|     std::list<u32> inspect_queries{}; |     std::list<u32> inspect_queries{}; | ||||||
| @@ -69,8 +76,6 @@ struct CFGRebuildState { | |||||||
|     std::map<u32, u32> ssy_labels{}; |     std::map<u32, u32> ssy_labels{}; | ||||||
|     std::map<u32, u32> pbk_labels{}; |     std::map<u32, u32> pbk_labels{}; | ||||||
|     std::unordered_map<u32, BlockStack> stacks{}; |     std::unordered_map<u32, BlockStack> stacks{}; | ||||||
|     const ProgramCode& program_code; |  | ||||||
|     const std::size_t program_size; |  | ||||||
|     ASTManager* manager; |     ASTManager* manager; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -124,10 +129,116 @@ enum class ParseResult : u32 { | |||||||
|     AbnormalFlow, |     AbnormalFlow, | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | struct BranchIndirectInfo { | ||||||
|  |     u32 buffer{}; | ||||||
|  |     u32 offset{}; | ||||||
|  |     u32 entries{}; | ||||||
|  |     s32 relative_position{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, | ||||||
|  |                                                           u32 start_address, u32 current_position) { | ||||||
|  |     const u32 shader_start = state.start; | ||||||
|  |     u32 pos = current_position; | ||||||
|  |     BranchIndirectInfo result{}; | ||||||
|  |     u64 track_register = 0; | ||||||
|  |  | ||||||
|  |     // Step 0 Get BRX Info | ||||||
|  |     const Instruction instr = {state.program_code[pos]}; | ||||||
|  |     const auto opcode = OpCode::Decode(instr); | ||||||
|  |     if (opcode->get().GetId() != OpCode::Id::BRX) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     if (instr.brx.constant_buffer != 0) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     track_register = instr.gpr8.Value(); | ||||||
|  |     result.relative_position = instr.brx.GetBranchExtend(); | ||||||
|  |     pos--; | ||||||
|  |     bool found_track = false; | ||||||
|  |  | ||||||
|  |     // Step 1 Track LDC | ||||||
|  |     while (pos >= shader_start) { | ||||||
|  |         if (IsSchedInstruction(pos, shader_start)) { | ||||||
|  |             pos--; | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         const Instruction instr = {state.program_code[pos]}; | ||||||
|  |         const auto opcode = OpCode::Decode(instr); | ||||||
|  |         if (opcode->get().GetId() == OpCode::Id::LD_C) { | ||||||
|  |             if (instr.gpr0.Value() == track_register && | ||||||
|  |                 instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) { | ||||||
|  |                 result.buffer = instr.cbuf36.index.Value(); | ||||||
|  |                 result.offset = static_cast<u32>(instr.cbuf36.GetOffset()); | ||||||
|  |                 track_register = instr.gpr8.Value(); | ||||||
|  |                 pos--; | ||||||
|  |                 found_track = true; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         pos--; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (!found_track) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     found_track = false; | ||||||
|  |  | ||||||
|  |     // Step 2 Track SHL | ||||||
|  |     while (pos >= shader_start) { | ||||||
|  |         if (IsSchedInstruction(pos, shader_start)) { | ||||||
|  |             pos--; | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         const Instruction instr = state.program_code[pos]; | ||||||
|  |         const auto opcode = OpCode::Decode(instr); | ||||||
|  |         if (opcode->get().GetId() == OpCode::Id::SHL_IMM) { | ||||||
|  |             if (instr.gpr0.Value() == track_register) { | ||||||
|  |                 track_register = instr.gpr8.Value(); | ||||||
|  |                 pos--; | ||||||
|  |                 found_track = true; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         pos--; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (!found_track) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     found_track = false; | ||||||
|  |  | ||||||
|  |     // Step 3 Track IMNMX | ||||||
|  |     while (pos >= shader_start) { | ||||||
|  |         if (IsSchedInstruction(pos, shader_start)) { | ||||||
|  |             pos--; | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         const Instruction instr = state.program_code[pos]; | ||||||
|  |         const auto opcode = OpCode::Decode(instr); | ||||||
|  |         if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { | ||||||
|  |             if (instr.gpr0.Value() == track_register) { | ||||||
|  |                 track_register = instr.gpr8.Value(); | ||||||
|  |                 result.entries = instr.alu.GetSignedImm20_20() + 1; | ||||||
|  |                 pos--; | ||||||
|  |                 found_track = true; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         pos--; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (!found_track) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
| std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) { | ||||||
|     u32 offset = static_cast<u32>(address); |     u32 offset = static_cast<u32>(address); | ||||||
|     const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction)); |     const u32 end_address = static_cast<u32>(state.program_code.size()); | ||||||
|     ParseInfo parse_info{}; |     ParseInfo parse_info{}; | ||||||
|  |     SingleBranch single_branch{}; | ||||||
|  |  | ||||||
|     const auto insert_label = [](CFGRebuildState& state, u32 address) { |     const auto insert_label = [](CFGRebuildState& state, u32 address) { | ||||||
|         const auto pair = state.labels.emplace(address); |         const auto pair = state.labels.emplace(address); | ||||||
| @@ -140,13 +251,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||||||
|         if (offset >= end_address) { |         if (offset >= end_address) { | ||||||
|             // ASSERT_OR_EXECUTE can't be used, as it ignores the break |             // ASSERT_OR_EXECUTE can't be used, as it ignores the break | ||||||
|             ASSERT_MSG(false, "Shader passed the current limit!"); |             ASSERT_MSG(false, "Shader passed the current limit!"); | ||||||
|             parse_info.branch_info.address = exit_branch; |  | ||||||
|             parse_info.branch_info.ignore = false; |             single_branch.address = exit_branch; | ||||||
|  |             single_branch.ignore = false; | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         if (state.registered.count(offset) != 0) { |         if (state.registered.count(offset) != 0) { | ||||||
|             parse_info.branch_info.address = offset; |             single_branch.address = offset; | ||||||
|             parse_info.branch_info.ignore = true; |             single_branch.ignore = true; | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         if (IsSchedInstruction(offset, state.start)) { |         if (IsSchedInstruction(offset, state.start)) { | ||||||
| @@ -163,24 +275,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||||||
|         switch (opcode->get().GetId()) { |         switch (opcode->get().GetId()) { | ||||||
|         case OpCode::Id::EXIT: { |         case OpCode::Id::EXIT: { | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||||
|             parse_info.branch_info.condition.predicate = |             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); |             if (single_branch.condition.predicate == Pred::NeverExecute) { | ||||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |             const ConditionCode cc = instr.flow_condition_code; | ||||||
|             parse_info.branch_info.condition.cc = cc; |             single_branch.condition.cc = cc; | ||||||
|             if (cc == ConditionCode::F) { |             if (cc == ConditionCode::F) { | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             parse_info.branch_info.address = exit_branch; |             single_branch.address = exit_branch; | ||||||
|             parse_info.branch_info.kill = false; |             single_branch.kill = false; | ||||||
|             parse_info.branch_info.is_sync = false; |             single_branch.is_sync = false; | ||||||
|             parse_info.branch_info.is_brk = false; |             single_branch.is_brk = false; | ||||||
|             parse_info.branch_info.ignore = false; |             single_branch.ignore = false; | ||||||
|             parse_info.end_address = offset; |             parse_info.end_address = offset; | ||||||
|  |             parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||||||
|  |                 single_branch.condition, single_branch.address, single_branch.kill, | ||||||
|  |                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||||||
|  |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |             return {ParseResult::ControlCaught, parse_info}; | ||||||
|         } |         } | ||||||
| @@ -189,99 +303,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||||||
|                 return {ParseResult::AbnormalFlow, parse_info}; |                 return {ParseResult::AbnormalFlow, parse_info}; | ||||||
|             } |             } | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||||
|             parse_info.branch_info.condition.predicate = |             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); |             if (single_branch.condition.predicate == Pred::NeverExecute) { | ||||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |             const ConditionCode cc = instr.flow_condition_code; | ||||||
|             parse_info.branch_info.condition.cc = cc; |             single_branch.condition.cc = cc; | ||||||
|             if (cc == ConditionCode::F) { |             if (cc == ConditionCode::F) { | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const u32 branch_offset = offset + instr.bra.GetBranchTarget(); |             const u32 branch_offset = offset + instr.bra.GetBranchTarget(); | ||||||
|             if (branch_offset == 0) { |             if (branch_offset == 0) { | ||||||
|                 parse_info.branch_info.address = exit_branch; |                 single_branch.address = exit_branch; | ||||||
|             } else { |             } else { | ||||||
|                 parse_info.branch_info.address = branch_offset; |                 single_branch.address = branch_offset; | ||||||
|             } |             } | ||||||
|             insert_label(state, branch_offset); |             insert_label(state, branch_offset); | ||||||
|             parse_info.branch_info.kill = false; |             single_branch.kill = false; | ||||||
|             parse_info.branch_info.is_sync = false; |             single_branch.is_sync = false; | ||||||
|             parse_info.branch_info.is_brk = false; |             single_branch.is_brk = false; | ||||||
|             parse_info.branch_info.ignore = false; |             single_branch.ignore = false; | ||||||
|             parse_info.end_address = offset; |             parse_info.end_address = offset; | ||||||
|  |             parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||||||
|  |                 single_branch.condition, single_branch.address, single_branch.kill, | ||||||
|  |                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||||||
|  |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |             return {ParseResult::ControlCaught, parse_info}; | ||||||
|         } |         } | ||||||
|         case OpCode::Id::SYNC: { |         case OpCode::Id::SYNC: { | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||||
|             parse_info.branch_info.condition.predicate = |             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); |             if (single_branch.condition.predicate == Pred::NeverExecute) { | ||||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |             const ConditionCode cc = instr.flow_condition_code; | ||||||
|             parse_info.branch_info.condition.cc = cc; |             single_branch.condition.cc = cc; | ||||||
|             if (cc == ConditionCode::F) { |             if (cc == ConditionCode::F) { | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             parse_info.branch_info.address = unassigned_branch; |             single_branch.address = unassigned_branch; | ||||||
|             parse_info.branch_info.kill = false; |             single_branch.kill = false; | ||||||
|             parse_info.branch_info.is_sync = true; |             single_branch.is_sync = true; | ||||||
|             parse_info.branch_info.is_brk = false; |             single_branch.is_brk = false; | ||||||
|             parse_info.branch_info.ignore = false; |             single_branch.ignore = false; | ||||||
|             parse_info.end_address = offset; |             parse_info.end_address = offset; | ||||||
|  |             parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||||||
|  |                 single_branch.condition, single_branch.address, single_branch.kill, | ||||||
|  |                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||||||
|  |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |             return {ParseResult::ControlCaught, parse_info}; | ||||||
|         } |         } | ||||||
|         case OpCode::Id::BRK: { |         case OpCode::Id::BRK: { | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||||
|             parse_info.branch_info.condition.predicate = |             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); |             if (single_branch.condition.predicate == Pred::NeverExecute) { | ||||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |             const ConditionCode cc = instr.flow_condition_code; | ||||||
|             parse_info.branch_info.condition.cc = cc; |             single_branch.condition.cc = cc; | ||||||
|             if (cc == ConditionCode::F) { |             if (cc == ConditionCode::F) { | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             parse_info.branch_info.address = unassigned_branch; |             single_branch.address = unassigned_branch; | ||||||
|             parse_info.branch_info.kill = false; |             single_branch.kill = false; | ||||||
|             parse_info.branch_info.is_sync = false; |             single_branch.is_sync = false; | ||||||
|             parse_info.branch_info.is_brk = true; |             single_branch.is_brk = true; | ||||||
|             parse_info.branch_info.ignore = false; |             single_branch.ignore = false; | ||||||
|             parse_info.end_address = offset; |             parse_info.end_address = offset; | ||||||
|  |             parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||||||
|  |                 single_branch.condition, single_branch.address, single_branch.kill, | ||||||
|  |                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||||||
|  |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |             return {ParseResult::ControlCaught, parse_info}; | ||||||
|         } |         } | ||||||
|         case OpCode::Id::KIL: { |         case OpCode::Id::KIL: { | ||||||
|             const auto pred_index = static_cast<u32>(instr.pred.pred_index); |             const auto pred_index = static_cast<u32>(instr.pred.pred_index); | ||||||
|             parse_info.branch_info.condition.predicate = |             single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0); | ||||||
|                 GetPredicate(pred_index, instr.negate_pred != 0); |             if (single_branch.condition.predicate == Pred::NeverExecute) { | ||||||
|             if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { |  | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             const ConditionCode cc = instr.flow_condition_code; |             const ConditionCode cc = instr.flow_condition_code; | ||||||
|             parse_info.branch_info.condition.cc = cc; |             single_branch.condition.cc = cc; | ||||||
|             if (cc == ConditionCode::F) { |             if (cc == ConditionCode::F) { | ||||||
|                 offset++; |                 offset++; | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             parse_info.branch_info.address = exit_branch; |             single_branch.address = exit_branch; | ||||||
|             parse_info.branch_info.kill = true; |             single_branch.kill = true; | ||||||
|             parse_info.branch_info.is_sync = false; |             single_branch.is_sync = false; | ||||||
|             parse_info.branch_info.is_brk = false; |             single_branch.is_brk = false; | ||||||
|             parse_info.branch_info.ignore = false; |             single_branch.ignore = false; | ||||||
|             parse_info.end_address = offset; |             parse_info.end_address = offset; | ||||||
|  |             parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||||||
|  |                 single_branch.condition, single_branch.address, single_branch.kill, | ||||||
|  |                 single_branch.is_sync, single_branch.is_brk, single_branch.ignore); | ||||||
|  |  | ||||||
|             return {ParseResult::ControlCaught, parse_info}; |             return {ParseResult::ControlCaught, parse_info}; | ||||||
|         } |         } | ||||||
| @@ -298,6 +420,29 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|         case OpCode::Id::BRX: { |         case OpCode::Id::BRX: { | ||||||
|  |             auto tmp = TrackBranchIndirectInfo(state, address, offset); | ||||||
|  |             if (tmp) { | ||||||
|  |                 auto result = *tmp; | ||||||
|  |                 std::vector<CaseBranch> branches{}; | ||||||
|  |                 s32 pc_target = offset + result.relative_position; | ||||||
|  |                 for (u32 i = 0; i < result.entries; i++) { | ||||||
|  |                     auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4); | ||||||
|  |                     if (!k) { | ||||||
|  |                         return {ParseResult::AbnormalFlow, parse_info}; | ||||||
|  |                     } | ||||||
|  |                     u32 value = *k; | ||||||
|  |                     u32 target = static_cast<u32>((value >> 3) + pc_target); | ||||||
|  |                     insert_label(state, target); | ||||||
|  |                     branches.emplace_back(value, target); | ||||||
|  |                 } | ||||||
|  |                 parse_info.end_address = offset; | ||||||
|  |                 parse_info.branch_info = MakeBranchInfo<MultiBranch>( | ||||||
|  |                     static_cast<u32>(instr.gpr8.Value()), std::move(branches)); | ||||||
|  |  | ||||||
|  |                 return {ParseResult::ControlCaught, parse_info}; | ||||||
|  |             } else { | ||||||
|  |                 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful"); | ||||||
|  |             } | ||||||
|             return {ParseResult::AbnormalFlow, parse_info}; |             return {ParseResult::AbnormalFlow, parse_info}; | ||||||
|         } |         } | ||||||
|         default: |         default: | ||||||
| @@ -306,10 +451,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) | |||||||
|  |  | ||||||
|         offset++; |         offset++; | ||||||
|     } |     } | ||||||
|     parse_info.branch_info.kill = false; |     single_branch.kill = false; | ||||||
|     parse_info.branch_info.is_sync = false; |     single_branch.is_sync = false; | ||||||
|     parse_info.branch_info.is_brk = false; |     single_branch.is_brk = false; | ||||||
|     parse_info.end_address = offset - 1; |     parse_info.end_address = offset - 1; | ||||||
|  |     parse_info.branch_info = MakeBranchInfo<SingleBranch>( | ||||||
|  |         single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync, | ||||||
|  |         single_branch.is_brk, single_branch.ignore); | ||||||
|     return {ParseResult::BlockEnd, parse_info}; |     return {ParseResult::BlockEnd, parse_info}; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -333,9 +481,10 @@ bool TryInspectAddress(CFGRebuildState& state) { | |||||||
|         BlockInfo& current_block = state.block_info[block_index]; |         BlockInfo& current_block = state.block_info[block_index]; | ||||||
|         current_block.end = address - 1; |         current_block.end = address - 1; | ||||||
|         new_block.branch = current_block.branch; |         new_block.branch = current_block.branch; | ||||||
|         BlockBranchInfo forward_branch{}; |         BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); | ||||||
|         forward_branch.address = address; |         const auto branch = std::get_if<SingleBranch>(forward_branch.get()); | ||||||
|         forward_branch.ignore = true; |         branch->address = address; | ||||||
|  |         branch->ignore = true; | ||||||
|         current_block.branch = forward_branch; |         current_block.branch = forward_branch; | ||||||
|         return true; |         return true; | ||||||
|     } |     } | ||||||
| @@ -350,12 +499,15 @@ bool TryInspectAddress(CFGRebuildState& state) { | |||||||
|  |  | ||||||
|     BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); |     BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address); | ||||||
|     block_info.branch = parse_info.branch_info; |     block_info.branch = parse_info.branch_info; | ||||||
|     if (parse_info.branch_info.condition.IsUnconditional()) { |     if (std::holds_alternative<SingleBranch>(*block_info.branch)) { | ||||||
|  |         const auto branch = std::get_if<SingleBranch>(block_info.branch.get()); | ||||||
|  |         if (branch->condition.IsUnconditional()) { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         const u32 fallthrough_address = parse_info.end_address + 1; | ||||||
|  |         state.inspect_queries.push_front(fallthrough_address); | ||||||
|         return true; |         return true; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const u32 fallthrough_address = parse_info.end_address + 1; |  | ||||||
|     state.inspect_queries.push_front(fallthrough_address); |  | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -393,31 +545,42 @@ bool TryQuery(CFGRebuildState& state) { | |||||||
|     state.queries.pop_front(); |     state.queries.pop_front(); | ||||||
|     gather_labels(q2.ssy_stack, state.ssy_labels, block); |     gather_labels(q2.ssy_stack, state.ssy_labels, block); | ||||||
|     gather_labels(q2.pbk_stack, state.pbk_labels, block); |     gather_labels(q2.pbk_stack, state.pbk_labels, block); | ||||||
|     if (!block.branch.condition.IsUnconditional()) { |     if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||||||
|         q2.address = block.end + 1; |         const auto branch = std::get_if<SingleBranch>(block.branch.get()); | ||||||
|         state.queries.push_back(q2); |         if (!branch->condition.IsUnconditional()) { | ||||||
|     } |             q2.address = block.end + 1; | ||||||
|  |             state.queries.push_back(q2); | ||||||
|  |         } | ||||||
|  |  | ||||||
|     Query conditional_query{q2}; |         Query conditional_query{q2}; | ||||||
|     if (block.branch.is_sync) { |         if (branch->is_sync) { | ||||||
|         if (block.branch.address == unassigned_branch) { |             if (branch->address == unassigned_branch) { | ||||||
|             block.branch.address = conditional_query.ssy_stack.top(); |                 branch->address = conditional_query.ssy_stack.top(); | ||||||
|  |             } | ||||||
|  |             conditional_query.ssy_stack.pop(); | ||||||
|         } |         } | ||||||
|         conditional_query.ssy_stack.pop(); |         if (branch->is_brk) { | ||||||
|     } |             if (branch->address == unassigned_branch) { | ||||||
|     if (block.branch.is_brk) { |                 branch->address = conditional_query.pbk_stack.top(); | ||||||
|         if (block.branch.address == unassigned_branch) { |             } | ||||||
|             block.branch.address = conditional_query.pbk_stack.top(); |             conditional_query.pbk_stack.pop(); | ||||||
|         } |         } | ||||||
|         conditional_query.pbk_stack.pop(); |         conditional_query.address = branch->address; | ||||||
|  |         state.queries.push_back(std::move(conditional_query)); | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |     const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||||||
|  |     for (const auto& branch_case : multi_branch->branches) { | ||||||
|  |         Query conditional_query{q2}; | ||||||
|  |         conditional_query.address = branch_case.address; | ||||||
|  |         state.queries.push_back(std::move(conditional_query)); | ||||||
|     } |     } | ||||||
|     conditional_query.address = block.branch.address; |  | ||||||
|     state.queries.push_back(std::move(conditional_query)); |  | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { | void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { | ||||||
|     const auto get_expr = ([&](const Condition& cond) -> Expr { |     const auto get_expr = ([&](const Condition& cond) -> Expr { | ||||||
|         Expr result{}; |         Expr result{}; | ||||||
|         if (cond.cc != ConditionCode::T) { |         if (cond.cc != ConditionCode::T) { | ||||||
| @@ -444,15 +607,24 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch) { | |||||||
|         } |         } | ||||||
|         return MakeExpr<ExprBoolean>(true); |         return MakeExpr<ExprBoolean>(true); | ||||||
|     }); |     }); | ||||||
|     if (branch.address < 0) { |     if (std::holds_alternative<SingleBranch>(*branch_info)) { | ||||||
|         if (branch.kill) { |         const auto branch = std::get_if<SingleBranch>(branch_info.get()); | ||||||
|             mm.InsertReturn(get_expr(branch.condition), true); |         if (branch->address < 0) { | ||||||
|  |             if (branch->kill) { | ||||||
|  |                 mm.InsertReturn(get_expr(branch->condition), true); | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             mm.InsertReturn(get_expr(branch->condition), false); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         mm.InsertReturn(get_expr(branch.condition), false); |         mm.InsertGoto(get_expr(branch->condition), branch->address); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     mm.InsertGoto(get_expr(branch.condition), branch.address); |     const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); | ||||||
|  |     for (const auto& branch_case : multi_branch->branches) { | ||||||
|  |         mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), | ||||||
|  |                       branch_case.address); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void DecompileShader(CFGRebuildState& state) { | void DecompileShader(CFGRebuildState& state) { | ||||||
| @@ -464,25 +636,26 @@ void DecompileShader(CFGRebuildState& state) { | |||||||
|         if (state.labels.count(block.start) != 0) { |         if (state.labels.count(block.start) != 0) { | ||||||
|             state.manager->InsertLabel(block.start); |             state.manager->InsertLabel(block.start); | ||||||
|         } |         } | ||||||
|         u32 end = block.branch.ignore ? block.end + 1 : block.end; |         const bool ignore = BlockBranchIsIgnored(block.branch); | ||||||
|  |         u32 end = ignore ? block.end + 1 : block.end; | ||||||
|         state.manager->InsertBlock(block.start, end); |         state.manager->InsertBlock(block.start, end); | ||||||
|         if (!block.branch.ignore) { |         if (!ignore) { | ||||||
|             InsertBranch(*state.manager, block.branch); |             InsertBranch(*state.manager, block.branch); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     state.manager->Decompile(); |     state.manager->Decompile(); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||||||
|                                                 std::size_t program_size, u32 start_address, |                                                 const CompilerSettings& settings, | ||||||
|                                                 const CompilerSettings& settings) { |                                                 ConstBufferLocker& locker) { | ||||||
|     auto result_out = std::make_unique<ShaderCharacteristics>(); |     auto result_out = std::make_unique<ShaderCharacteristics>(); | ||||||
|     if (settings.depth == CompileDepth::BruteForce) { |     if (settings.depth == CompileDepth::BruteForce) { | ||||||
|         result_out->settings.depth = CompileDepth::BruteForce; |         result_out->settings.depth = CompileDepth::BruteForce; | ||||||
|         return result_out; |         return result_out; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     CFGRebuildState state{program_code, program_size, start_address}; |     CFGRebuildState state{program_code, start_address, locker}; | ||||||
|     // Inspect Code and generate blocks |     // Inspect Code and generate blocks | ||||||
|     state.labels.clear(); |     state.labels.clear(); | ||||||
|     state.labels.emplace(start_address); |     state.labels.emplace(start_address); | ||||||
| @@ -547,11 +720,9 @@ std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | |||||||
|         ShaderBlock new_block{}; |         ShaderBlock new_block{}; | ||||||
|         new_block.start = block.start; |         new_block.start = block.start; | ||||||
|         new_block.end = block.end; |         new_block.end = block.end; | ||||||
|         new_block.ignore_branch = block.branch.ignore; |         new_block.ignore_branch = BlockBranchIsIgnored(block.branch); | ||||||
|         if (!new_block.ignore_branch) { |         if (!new_block.ignore_branch) { | ||||||
|             new_block.branch.cond = block.branch.condition; |             new_block.branch = block.branch; | ||||||
|             new_block.branch.kills = block.branch.kill; |  | ||||||
|             new_block.branch.address = block.branch.address; |  | ||||||
|         } |         } | ||||||
|         result_out->end = std::max(result_out->end, block.end); |         result_out->end = std::max(result_out->end, block.end); | ||||||
|         result_out->blocks.push_back(new_block); |         result_out->blocks.push_back(new_block); | ||||||
|   | |||||||
| @@ -7,6 +7,7 @@ | |||||||
| #include <list> | #include <list> | ||||||
| #include <optional> | #include <optional> | ||||||
| #include <set> | #include <set> | ||||||
|  | #include <variant> | ||||||
|  |  | ||||||
| #include "video_core/engines/shader_bytecode.h" | #include "video_core/engines/shader_bytecode.h" | ||||||
| #include "video_core/shader/ast.h" | #include "video_core/shader/ast.h" | ||||||
| @@ -37,29 +38,61 @@ struct Condition { | |||||||
|     } |     } | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | class SingleBranch { | ||||||
|  | public: | ||||||
|  |     SingleBranch() = default; | ||||||
|  |     SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk, | ||||||
|  |                  bool ignore) | ||||||
|  |         : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk}, | ||||||
|  |           ignore{ignore} {} | ||||||
|  |  | ||||||
|  |     bool operator==(const SingleBranch& b) const { | ||||||
|  |         return std::tie(condition, address, kill, is_sync, is_brk, ignore) == | ||||||
|  |                std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool operator!=(const SingleBranch& b) const { | ||||||
|  |         return !operator==(b); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Condition condition{}; | ||||||
|  |     s32 address{exit_branch}; | ||||||
|  |     bool kill{}; | ||||||
|  |     bool is_sync{}; | ||||||
|  |     bool is_brk{}; | ||||||
|  |     bool ignore{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | struct CaseBranch { | ||||||
|  |     CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {} | ||||||
|  |     u32 cmp_value; | ||||||
|  |     u32 address; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class MultiBranch { | ||||||
|  | public: | ||||||
|  |     MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches) | ||||||
|  |         : gpr{gpr}, branches{std::move(branches)} {} | ||||||
|  |  | ||||||
|  |     u32 gpr{}; | ||||||
|  |     std::vector<CaseBranch> branches{}; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | using BranchData = std::variant<SingleBranch, MultiBranch>; | ||||||
|  | using BlockBranchInfo = std::shared_ptr<BranchData>; | ||||||
|  |  | ||||||
|  | bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second); | ||||||
|  |  | ||||||
| struct ShaderBlock { | struct ShaderBlock { | ||||||
|     struct Branch { |  | ||||||
|         Condition cond{}; |  | ||||||
|         bool kills{}; |  | ||||||
|         s32 address{}; |  | ||||||
|  |  | ||||||
|         bool operator==(const Branch& b) const { |  | ||||||
|             return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         bool operator!=(const Branch& b) const { |  | ||||||
|             return !operator==(b); |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     u32 start{}; |     u32 start{}; | ||||||
|     u32 end{}; |     u32 end{}; | ||||||
|     bool ignore_branch{}; |     bool ignore_branch{}; | ||||||
|     Branch branch{}; |     BlockBranchInfo branch{}; | ||||||
|  |  | ||||||
|     bool operator==(const ShaderBlock& sb) const { |     bool operator==(const ShaderBlock& sb) const { | ||||||
|         return std::tie(start, end, ignore_branch, branch) == |         return std::tie(start, end, ignore_branch) == | ||||||
|                std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch); |                    std::tie(sb.start, sb.end, sb.ignore_branch) && | ||||||
|  |                BlockBranchInfoAreEqual(branch, sb.branch); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     bool operator!=(const ShaderBlock& sb) const { |     bool operator!=(const ShaderBlock& sb) const { | ||||||
| @@ -76,8 +109,8 @@ struct ShaderCharacteristics { | |||||||
|     CompilerSettings settings{}; |     CompilerSettings settings{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, | std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address, | ||||||
|                                                 std::size_t program_size, u32 start_address, |                                                 const CompilerSettings& settings, | ||||||
|                                                 const CompilerSettings& settings); |                                                 ConstBufferLocker& locker); | ||||||
|  |  | ||||||
| } // namespace VideoCommon::Shader | } // namespace VideoCommon::Shader | ||||||
|   | |||||||
| @@ -33,7 +33,7 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { | |||||||
|     return (absolute_offset % SchedPeriod) == 0; |     return (absolute_offset % SchedPeriod) == 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| } // namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| class ASTDecoder { | class ASTDecoder { | ||||||
| public: | public: | ||||||
| @@ -102,7 +102,7 @@ void ShaderIR::Decode() { | |||||||
|     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); |     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); | ||||||
|  |  | ||||||
|     decompiled = false; |     decompiled = false; | ||||||
|     auto info = ScanFlow(program_code, program_size, main_offset, settings); |     auto info = ScanFlow(program_code, main_offset, settings, locker); | ||||||
|     auto& shader_info = *info; |     auto& shader_info = *info; | ||||||
|     coverage_begin = shader_info.start; |     coverage_begin = shader_info.start; | ||||||
|     coverage_end = shader_info.end; |     coverage_end = shader_info.end; | ||||||
| @@ -155,7 +155,7 @@ void ShaderIR::Decode() { | |||||||
|         [[fallthrough]]; |         [[fallthrough]]; | ||||||
|     case CompileDepth::BruteForce: { |     case CompileDepth::BruteForce: { | ||||||
|         coverage_begin = main_offset; |         coverage_begin = main_offset; | ||||||
|         const u32 shader_end = static_cast<u32>(program_size / sizeof(u64)); |         const std::size_t shader_end = program_code.size(); | ||||||
|         coverage_end = shader_end; |         coverage_end = shader_end; | ||||||
|         for (u32 label = main_offset; label < shader_end; label++) { |         for (u32 label = main_offset; label < shader_end; label++) { | ||||||
|             basic_blocks.insert({label, DecodeRange(label, label + 1)}); |             basic_blocks.insert({label, DecodeRange(label, label + 1)}); | ||||||
| @@ -198,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) { | |||||||
|         } |         } | ||||||
|         return result; |         return result; | ||||||
|     }; |     }; | ||||||
|     if (block.branch.address < 0) { |     if (std::holds_alternative<SingleBranch>(*block.branch)) { | ||||||
|         if (block.branch.kills) { |         auto branch = std::get_if<SingleBranch>(block.branch.get()); | ||||||
|             Node n = Operation(OperationCode::Discard); |         if (branch->address < 0) { | ||||||
|             n = apply_conditions(block.branch.cond, n); |             if (branch->kill) { | ||||||
|  |                 Node n = Operation(OperationCode::Discard); | ||||||
|  |                 n = apply_conditions(branch->condition, n); | ||||||
|  |                 bb.push_back(n); | ||||||
|  |                 global_code.push_back(n); | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             Node n = Operation(OperationCode::Exit); | ||||||
|  |             n = apply_conditions(branch->condition, n); | ||||||
|             bb.push_back(n); |             bb.push_back(n); | ||||||
|             global_code.push_back(n); |             global_code.push_back(n); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         Node n = Operation(OperationCode::Exit); |         Node n = Operation(OperationCode::Branch, Immediate(branch->address)); | ||||||
|         n = apply_conditions(block.branch.cond, n); |         n = apply_conditions(branch->condition, n); | ||||||
|         bb.push_back(n); |         bb.push_back(n); | ||||||
|         global_code.push_back(n); |         global_code.push_back(n); | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     Node n = Operation(OperationCode::Branch, Immediate(block.branch.address)); |     auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); | ||||||
|     n = apply_conditions(block.branch.cond, n); |     Node op_a = GetRegister(multi_branch->gpr); | ||||||
|     bb.push_back(n); |     for (auto& branch_case : multi_branch->branches) { | ||||||
|     global_code.push_back(n); |         Node n = Operation(OperationCode::Branch, Immediate(branch_case.address)); | ||||||
|  |         Node op_b = Immediate(branch_case.cmp_value); | ||||||
|  |         Node condition = | ||||||
|  |             GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b); | ||||||
|  |         auto result = Conditional(condition, {n}); | ||||||
|  |         bb.push_back(result); | ||||||
|  |         global_code.push_back(result); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) { | ||||||
|   | |||||||
| @@ -141,7 +141,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|         const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); |         const Node component = Immediate(static_cast<u32>(instr.tld4s.component)); | ||||||
|  |  | ||||||
|         const auto& sampler = |         const auto& sampler = | ||||||
|             GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare); |             GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}}); | ||||||
|  |  | ||||||
|         Node4 values; |         Node4 values; | ||||||
|         for (u32 element = 0; element < values.size(); ++element) { |         for (u32 element = 0; element < values.size(); ++element) { | ||||||
| @@ -165,10 +165,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|         // Sadly, not all texture instructions specify the type of texture their sampler |         // Sadly, not all texture instructions specify the type of texture their sampler | ||||||
|         // uses. This must be fixed at a later instance. |         // uses. This must be fixed at a later instance. | ||||||
|         const auto& sampler = |         const auto& sampler = | ||||||
|             is_bindless |             is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {}); | ||||||
|                 ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false, |  | ||||||
|                                      false) |  | ||||||
|                 : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false); |  | ||||||
|  |  | ||||||
|         u32 indexer = 0; |         u32 indexer = 0; | ||||||
|         switch (instr.txq.query_type) { |         switch (instr.txq.query_type) { | ||||||
| @@ -207,9 +204,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|  |  | ||||||
|         auto texture_type = instr.tmml.texture_type.Value(); |         auto texture_type = instr.tmml.texture_type.Value(); | ||||||
|         const bool is_array = instr.tmml.array != 0; |         const bool is_array = instr.tmml.array != 0; | ||||||
|         const auto& sampler = is_bindless |         const auto& sampler = | ||||||
|                                   ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false) |             is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}}) | ||||||
|                                   : GetSampler(instr.sampler, texture_type, is_array, false); |                         : GetSampler(instr.sampler, {{texture_type, is_array, false}}); | ||||||
|  |  | ||||||
|         std::vector<Node> coords; |         std::vector<Node> coords; | ||||||
|  |  | ||||||
| @@ -285,9 +282,26 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { | |||||||
|     return pc; |     return pc; | ||||||
| } | } | ||||||
|  |  | ||||||
| const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type, | const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, | ||||||
|                                     bool is_array, bool is_shadow) { |                                     std::optional<SamplerInfo> sampler_info) { | ||||||
|     const auto offset = static_cast<std::size_t>(sampler.index.Value()); |     const auto offset = static_cast<u32>(sampler.index.Value()); | ||||||
|  |  | ||||||
|  |     Tegra::Shader::TextureType type; | ||||||
|  |     bool is_array; | ||||||
|  |     bool is_shadow; | ||||||
|  |     if (sampler_info) { | ||||||
|  |         type = sampler_info->type; | ||||||
|  |         is_array = sampler_info->is_array; | ||||||
|  |         is_shadow = sampler_info->is_shadow; | ||||||
|  |     } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) { | ||||||
|  |         type = sampler->texture_type.Value(); | ||||||
|  |         is_array = sampler->is_array.Value() != 0; | ||||||
|  |         is_shadow = sampler->is_shadow.Value() != 0; | ||||||
|  |     } else { | ||||||
|  |         type = Tegra::Shader::TextureType::Texture2D; | ||||||
|  |         is_array = false; | ||||||
|  |         is_shadow = false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // If this sampler has already been used, return the existing mapping. |     // If this sampler has already been used, return the existing mapping. | ||||||
|     const auto itr = |     const auto itr = | ||||||
| @@ -303,15 +317,31 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu | |||||||
|     const std::size_t next_index = used_samplers.size(); |     const std::size_t next_index = used_samplers.size(); | ||||||
|     const Sampler entry{offset, next_index, type, is_array, is_shadow}; |     const Sampler entry{offset, next_index, type, is_array, is_shadow}; | ||||||
|     return *used_samplers.emplace(entry).first; |     return *used_samplers.emplace(entry).first; | ||||||
| } | } // namespace VideoCommon::Shader | ||||||
|  |  | ||||||
| const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type, | const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, | ||||||
|                                             bool is_array, bool is_shadow) { |                                             std::optional<SamplerInfo> sampler_info) { | ||||||
|     const Node sampler_register = GetRegister(reg); |     const Node sampler_register = GetRegister(reg); | ||||||
|     const auto [base_sampler, cbuf_index, cbuf_offset] = |     const auto [base_sampler, cbuf_index, cbuf_offset] = | ||||||
|         TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); |         TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size())); | ||||||
|     ASSERT(base_sampler != nullptr); |     ASSERT(base_sampler != nullptr); | ||||||
|     const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); |     const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset); | ||||||
|  |     Tegra::Shader::TextureType type; | ||||||
|  |     bool is_array; | ||||||
|  |     bool is_shadow; | ||||||
|  |     if (sampler_info) { | ||||||
|  |         type = sampler_info->type; | ||||||
|  |         is_array = sampler_info->is_array; | ||||||
|  |         is_shadow = sampler_info->is_shadow; | ||||||
|  |     } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) { | ||||||
|  |         type = sampler->texture_type.Value(); | ||||||
|  |         is_array = sampler->is_array.Value() != 0; | ||||||
|  |         is_shadow = sampler->is_shadow.Value() != 0; | ||||||
|  |     } else { | ||||||
|  |         type = Tegra::Shader::TextureType::Texture2D; | ||||||
|  |         is_array = false; | ||||||
|  |         is_shadow = false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // If this sampler has already been used, return the existing mapping. |     // If this sampler has already been used, return the existing mapping. | ||||||
|     const auto itr = |     const auto itr = | ||||||
| @@ -411,9 +441,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, | |||||||
|                              (texture_type == TextureType::TextureCube && is_array && is_shadow), |                              (texture_type == TextureType::TextureCube && is_array && is_shadow), | ||||||
|                          "This method is not supported."); |                          "This method is not supported."); | ||||||
|  |  | ||||||
|     const auto& sampler = is_bindless |     const auto& sampler = | ||||||
|                               ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow) |         is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}}) | ||||||
|                               : GetSampler(instr.sampler, texture_type, is_array, is_shadow); |                     : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}}); | ||||||
|  |  | ||||||
|     const bool lod_needed = process_mode == TextureProcessMode::LZ || |     const bool lod_needed = process_mode == TextureProcessMode::LZ || | ||||||
|                             process_mode == TextureProcessMode::LL || |                             process_mode == TextureProcessMode::LL || | ||||||
| @@ -577,7 +607,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de | |||||||
|         dc = GetRegister(parameter_register++); |         dc = GetRegister(parameter_register++); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare); |     const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}}); | ||||||
|  |  | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
| @@ -610,7 +640,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { | |||||||
|     // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; |     // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr}; | ||||||
|     // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; |     // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr}; | ||||||
|  |  | ||||||
|     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |     const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); | ||||||
|  |  | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
| @@ -646,7 +676,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is | |||||||
|     // When lod is used always is in gpr20 |     // When lod is used always is in gpr20 | ||||||
|     const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); |     const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0); | ||||||
|  |  | ||||||
|     const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false); |     const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}}); | ||||||
|  |  | ||||||
|     Node4 values; |     Node4 values; | ||||||
|     for (u32 element = 0; element < values.size(); ++element) { |     for (u32 element = 0; element < values.size(); ++element) { | ||||||
|   | |||||||
| @@ -17,13 +17,14 @@ using Tegra::Shader::Pred; | |||||||
| class ExprAnd; | class ExprAnd; | ||||||
| class ExprBoolean; | class ExprBoolean; | ||||||
| class ExprCondCode; | class ExprCondCode; | ||||||
|  | class ExprGprEqual; | ||||||
| class ExprNot; | class ExprNot; | ||||||
| class ExprOr; | class ExprOr; | ||||||
| class ExprPredicate; | class ExprPredicate; | ||||||
| class ExprVar; | class ExprVar; | ||||||
|  |  | ||||||
| using ExprData = | using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, | ||||||
|     std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd, ExprBoolean>; |                               ExprBoolean, ExprGprEqual>; | ||||||
| using Expr = std::shared_ptr<ExprData>; | using Expr = std::shared_ptr<ExprData>; | ||||||
|  |  | ||||||
| class ExprAnd final { | class ExprAnd final { | ||||||
| @@ -118,6 +119,22 @@ public: | |||||||
|     bool value; |     bool value; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | class ExprGprEqual final { | ||||||
|  | public: | ||||||
|  |     ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {} | ||||||
|  |  | ||||||
|  |     bool operator==(const ExprGprEqual& b) const { | ||||||
|  |         return gpr == b.gpr && value == b.value; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool operator!=(const ExprGprEqual& b) const { | ||||||
|  |         return !operator==(b); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32 gpr; | ||||||
|  |     u32 value; | ||||||
|  | }; | ||||||
|  |  | ||||||
| template <typename T, typename... Args> | template <typename T, typename... Args> | ||||||
| Expr MakeExpr(Args&&... args) { | Expr MakeExpr(Args&&... args) { | ||||||
|     static_assert(std::is_convertible_v<T, ExprData>); |     static_assert(std::is_convertible_v<T, ExprData>); | ||||||
|   | |||||||
| @@ -23,10 +23,9 @@ using Tegra::Shader::PredCondition; | |||||||
| using Tegra::Shader::PredOperation; | using Tegra::Shader::PredOperation; | ||||||
| using Tegra::Shader::Register; | using Tegra::Shader::Register; | ||||||
|  |  | ||||||
| ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, | ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | ||||||
|                    CompilerSettings settings) |                    ConstBufferLocker& locker) | ||||||
|     : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, |     : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} { | ||||||
|       program_manager{true, true}, settings{settings} { |  | ||||||
|     Decode(); |     Decode(); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ | |||||||
| #include "video_core/engines/shader_header.h" | #include "video_core/engines/shader_header.h" | ||||||
| #include "video_core/shader/ast.h" | #include "video_core/shader/ast.h" | ||||||
| #include "video_core/shader/compiler_settings.h" | #include "video_core/shader/compiler_settings.h" | ||||||
|  | #include "video_core/shader/const_buffer_locker.h" | ||||||
| #include "video_core/shader/node.h" | #include "video_core/shader/node.h" | ||||||
|  |  | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
| @@ -66,8 +67,8 @@ struct GlobalMemoryUsage { | |||||||
|  |  | ||||||
| class ShaderIR final { | class ShaderIR final { | ||||||
| public: | public: | ||||||
|     explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, |     explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings, | ||||||
|                       CompilerSettings settings); |                       ConstBufferLocker& locker); | ||||||
|     ~ShaderIR(); |     ~ShaderIR(); | ||||||
|  |  | ||||||
|     const std::map<u32, NodeBlock>& GetBasicBlocks() const { |     const std::map<u32, NodeBlock>& GetBasicBlocks() const { | ||||||
| @@ -172,6 +173,13 @@ public: | |||||||
|  |  | ||||||
| private: | private: | ||||||
|     friend class ASTDecoder; |     friend class ASTDecoder; | ||||||
|  |  | ||||||
|  |     struct SamplerInfo { | ||||||
|  |         Tegra::Shader::TextureType type; | ||||||
|  |         bool is_array; | ||||||
|  |         bool is_shadow; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     void Decode(); |     void Decode(); | ||||||
|  |  | ||||||
|     NodeBlock DecodeRange(u32 begin, u32 end); |     NodeBlock DecodeRange(u32 begin, u32 end); | ||||||
| @@ -296,12 +304,11 @@ private: | |||||||
|  |  | ||||||
|     /// Accesses a texture sampler |     /// Accesses a texture sampler | ||||||
|     const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, |     const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler, | ||||||
|                               Tegra::Shader::TextureType type, bool is_array, bool is_shadow); |                               std::optional<SamplerInfo> sampler_info); | ||||||
|  |  | ||||||
|     // Accesses a texture sampler for a bindless texture. |     // Accesses a texture sampler for a bindless texture. | ||||||
|     const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, |     const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg, | ||||||
|                                       Tegra::Shader::TextureType type, bool is_array, |                                       std::optional<SamplerInfo> sampler_info); | ||||||
|                                       bool is_shadow); |  | ||||||
|  |  | ||||||
|     /// Accesses an image. |     /// Accesses an image. | ||||||
|     Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); |     Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type); | ||||||
| @@ -377,7 +384,9 @@ private: | |||||||
|  |  | ||||||
|     const ProgramCode& program_code; |     const ProgramCode& program_code; | ||||||
|     const u32 main_offset; |     const u32 main_offset; | ||||||
|     const std::size_t program_size; |     const CompilerSettings settings; | ||||||
|  |     ConstBufferLocker& locker; | ||||||
|  |  | ||||||
|     bool decompiled{}; |     bool decompiled{}; | ||||||
|     bool disable_flow_stack{}; |     bool disable_flow_stack{}; | ||||||
|  |  | ||||||
| @@ -386,8 +395,7 @@ private: | |||||||
|  |  | ||||||
|     std::map<u32, NodeBlock> basic_blocks; |     std::map<u32, NodeBlock> basic_blocks; | ||||||
|     NodeBlock global_code; |     NodeBlock global_code; | ||||||
|     ASTManager program_manager; |     ASTManager program_manager{true, true}; | ||||||
|     CompilerSettings settings{}; |  | ||||||
|  |  | ||||||
|     std::set<u32> used_registers; |     std::set<u32> used_registers; | ||||||
|     std::set<Tegra::Shader::Pred> used_predicates; |     std::set<Tegra::Shader::Pred> used_predicates; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user