Merge pull request #2705 from FernandoS27/tex-cache-fixes
GPU: Fixes to Texture Cache and Include Microprofiles for GL State/BufferCopy/Macro Interpreter
This commit is contained in:
		| @@ -4,14 +4,18 @@ | |||||||
|  |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  | #include "common/microprofile.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/macro_interpreter.h" | #include "video_core/macro_interpreter.h" | ||||||
|  |  | ||||||
|  | MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192)); | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  |  | ||||||
| MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {} | ||||||
|  |  | ||||||
| void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) { | ||||||
|  |     MICROPROFILE_SCOPE(MacroInterp); | ||||||
|     Reset(); |     Reset(); | ||||||
|     registers[1] = parameters[0]; |     registers[1] = parameters[0]; | ||||||
|     this->parameters = std::move(parameters); |     this->parameters = std::move(parameters); | ||||||
|   | |||||||
| @@ -6,8 +6,11 @@ | |||||||
| #include <glad/glad.h> | #include <glad/glad.h> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
|  | #include "common/microprofile.h" | ||||||
| #include "video_core/renderer_opengl/gl_state.h" | #include "video_core/renderer_opengl/gl_state.h" | ||||||
|  |  | ||||||
|  | MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); | ||||||
|  |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
|  |  | ||||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||||
| @@ -524,6 +527,7 @@ void OpenGLState::ApplySamplers() const { | |||||||
| } | } | ||||||
|  |  | ||||||
| void OpenGLState::Apply() const { | void OpenGLState::Apply() const { | ||||||
|  |     MICROPROFILE_SCOPE(OpenGL_State); | ||||||
|     ApplyFramebufferState(); |     ApplyFramebufferState(); | ||||||
|     ApplyVertexArrayState(); |     ApplyVertexArrayState(); | ||||||
|     ApplyShaderProgram(); |     ApplyShaderProgram(); | ||||||
|   | |||||||
| @@ -31,6 +31,8 @@ using VideoCore::Surface::SurfaceType; | |||||||
|  |  | ||||||
| MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); | ||||||
| MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); | ||||||
|  | MICROPROFILE_DEFINE(OpenGL_Texture_Buffer_Copy, "OpenGL", "Texture Buffer Copy", | ||||||
|  |                     MP_RGB(128, 192, 128)); | ||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
|  |  | ||||||
| @@ -535,6 +537,7 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, | |||||||
| } | } | ||||||
|  |  | ||||||
| void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { | ||||||
|  |     MICROPROFILE_SCOPE(OpenGL_Texture_Buffer_Copy); | ||||||
|     const auto& src_params = src_surface->GetSurfaceParams(); |     const auto& src_params = src_surface->GetSurfaceParams(); | ||||||
|     const auto& dst_params = dst_surface->GetSurfaceParams(); |     const auto& dst_params = dst_surface->GetSurfaceParams(); | ||||||
|     UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); |     UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); | ||||||
|   | |||||||
| @@ -75,9 +75,12 @@ MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) | |||||||
|  |  | ||||||
|     // Linear Surface check |     // Linear Surface check | ||||||
|     if (!params.is_tiled) { |     if (!params.is_tiled) { | ||||||
|         if (std::tie(params.width, params.height, params.pitch) == |         if (std::tie(params.height, params.pitch) == std::tie(rhs.height, rhs.pitch)) { | ||||||
|             std::tie(rhs.width, rhs.height, rhs.pitch)) { |             if (params.width == rhs.width) { | ||||||
|             return MatchStructureResult::FullMatch; |                 return MatchStructureResult::FullMatch; | ||||||
|  |             } else { | ||||||
|  |                 return MatchStructureResult::SemiMatch; | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|         return MatchStructureResult::None; |         return MatchStructureResult::None; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -200,8 +200,9 @@ public: | |||||||
|         modification_tick = tick; |         modification_tick = tick; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void MarkAsRenderTarget(const bool is_target) { |     void MarkAsRenderTarget(const bool is_target, const u32 index) { | ||||||
|         this->is_target = is_target; |         this->is_target = is_target; | ||||||
|  |         this->index = index; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void MarkAsPicked(const bool is_picked) { |     void MarkAsPicked(const bool is_picked) { | ||||||
| @@ -221,6 +222,10 @@ public: | |||||||
|         return is_target; |         return is_target; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     u32 GetRenderTarget() const { | ||||||
|  |         return index; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     bool IsRegistered() const { |     bool IsRegistered() const { | ||||||
|         return is_registered; |         return is_registered; | ||||||
|     } |     } | ||||||
| @@ -307,10 +312,13 @@ private: | |||||||
|         return view; |         return view; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     static constexpr u32 NO_RT = 0xFFFFFFFF; | ||||||
|  |  | ||||||
|     bool is_modified{}; |     bool is_modified{}; | ||||||
|     bool is_target{}; |     bool is_target{}; | ||||||
|     bool is_registered{}; |     bool is_registered{}; | ||||||
|     bool is_picked{}; |     bool is_picked{}; | ||||||
|  |     u32 index{NO_RT}; | ||||||
|     u64 modification_tick{}; |     u64 modification_tick{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -290,12 +290,19 @@ std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) co | |||||||
|  |  | ||||||
| std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, | ||||||
|                                                     bool uncompressed) const { |                                                     bool uncompressed) const { | ||||||
|     const bool tiled{as_host_size ? false : is_tiled}; |  | ||||||
|     const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; |     const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; | ||||||
|     const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; |     const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; | ||||||
|     const u32 depth{is_layered ? 1U : GetMipDepth(level)}; |     const u32 depth{is_layered ? 1U : GetMipDepth(level)}; | ||||||
|     return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, |     if (is_tiled) { | ||||||
|                                          GetMipBlockHeight(level), GetMipBlockDepth(level)); |         return Tegra::Texture::CalculateSize(!as_host_size, GetBytesPerPixel(), width, height, | ||||||
|  |                                              depth, GetMipBlockHeight(level), | ||||||
|  |                                              GetMipBlockDepth(level)); | ||||||
|  |     } else if (as_host_size || IsBuffer()) { | ||||||
|  |         return GetBytesPerPixel() * width * height * depth; | ||||||
|  |     } else { | ||||||
|  |         // Linear Texture Case | ||||||
|  |         return pitch * height * depth; | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | bool SurfaceParams::operator==(const SurfaceParams& rhs) const { | ||||||
|   | |||||||
| @@ -133,11 +133,11 @@ public: | |||||||
|             regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |             regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; | ||||||
|         auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); |         auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); | ||||||
|         if (depth_buffer.target) |         if (depth_buffer.target) | ||||||
|             depth_buffer.target->MarkAsRenderTarget(false); |             depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||||||
|         depth_buffer.target = surface_view.first; |         depth_buffer.target = surface_view.first; | ||||||
|         depth_buffer.view = surface_view.second; |         depth_buffer.view = surface_view.second; | ||||||
|         if (depth_buffer.target) |         if (depth_buffer.target) | ||||||
|             depth_buffer.target->MarkAsRenderTarget(true); |             depth_buffer.target->MarkAsRenderTarget(true, DEPTH_RT); | ||||||
|         return surface_view.second; |         return surface_view.second; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -167,11 +167,11 @@ public: | |||||||
|         auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |         auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), | ||||||
|                                        preserve_contents, true); |                                        preserve_contents, true); | ||||||
|         if (render_targets[index].target) |         if (render_targets[index].target) | ||||||
|             render_targets[index].target->MarkAsRenderTarget(false); |             render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||||||
|         render_targets[index].target = surface_view.first; |         render_targets[index].target = surface_view.first; | ||||||
|         render_targets[index].view = surface_view.second; |         render_targets[index].view = surface_view.second; | ||||||
|         if (render_targets[index].target) |         if (render_targets[index].target) | ||||||
|             render_targets[index].target->MarkAsRenderTarget(true); |             render_targets[index].target->MarkAsRenderTarget(true, static_cast<u32>(index)); | ||||||
|         return surface_view.second; |         return surface_view.second; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -191,7 +191,7 @@ public: | |||||||
|         if (depth_buffer.target == nullptr) { |         if (depth_buffer.target == nullptr) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         depth_buffer.target->MarkAsRenderTarget(false); |         depth_buffer.target->MarkAsRenderTarget(false, NO_RT); | ||||||
|         depth_buffer.target = nullptr; |         depth_buffer.target = nullptr; | ||||||
|         depth_buffer.view = nullptr; |         depth_buffer.view = nullptr; | ||||||
|     } |     } | ||||||
| @@ -200,7 +200,7 @@ public: | |||||||
|         if (render_targets[index].target == nullptr) { |         if (render_targets[index].target == nullptr) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         render_targets[index].target->MarkAsRenderTarget(false); |         render_targets[index].target->MarkAsRenderTarget(false, NO_RT); | ||||||
|         render_targets[index].target = nullptr; |         render_targets[index].target = nullptr; | ||||||
|         render_targets[index].view = nullptr; |         render_targets[index].view = nullptr; | ||||||
|     } |     } | ||||||
| @@ -270,6 +270,16 @@ protected: | |||||||
|     // and reading it from a sepparate buffer. |     // and reading it from a sepparate buffer. | ||||||
|     virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |     virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; | ||||||
|  |  | ||||||
|  |     void ManageRenderTargetUnregister(TSurface& surface) { | ||||||
|  |         auto& maxwell3d = system.GPU().Maxwell3D(); | ||||||
|  |         const u32 index = surface->GetRenderTarget(); | ||||||
|  |         if (index == DEPTH_RT) { | ||||||
|  |             maxwell3d.dirty_flags.zeta_buffer = true; | ||||||
|  |         } else { | ||||||
|  |             maxwell3d.dirty_flags.color_buffer.set(index, true); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     void Register(TSurface surface) { |     void Register(TSurface surface) { | ||||||
|         const GPUVAddr gpu_addr = surface->GetGpuAddr(); |         const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||||||
|         const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); |         const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); | ||||||
| @@ -294,6 +304,9 @@ protected: | |||||||
|         if (guard_render_targets && surface->IsProtected()) { |         if (guard_render_targets && surface->IsProtected()) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  |         if (!guard_render_targets && surface->IsRenderTarget()) { | ||||||
|  |             ManageRenderTargetUnregister(surface); | ||||||
|  |         } | ||||||
|         const GPUVAddr gpu_addr = surface->GetGpuAddr(); |         const GPUVAddr gpu_addr = surface->GetGpuAddr(); | ||||||
|         const CacheAddr cache_ptr = surface->GetCacheAddr(); |         const CacheAddr cache_ptr = surface->GetCacheAddr(); | ||||||
|         const std::size_t size = surface->GetSizeInBytes(); |         const std::size_t size = surface->GetSizeInBytes(); | ||||||
| @@ -649,15 +662,6 @@ private: | |||||||
|                 } |                 } | ||||||
|                 return {current_surface, *view}; |                 return {current_surface, *view}; | ||||||
|             } |             } | ||||||
|             // The next case is unsafe, so if we r in accurate GPU, just skip it |  | ||||||
|             if (Settings::values.use_accurate_gpu_emulation) { |  | ||||||
|                 return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |  | ||||||
|                                       MatchTopologyResult::FullMatch); |  | ||||||
|             } |  | ||||||
|             // This is the case the texture is a part of the parent. |  | ||||||
|             if (current_surface->MatchesSubTexture(params, gpu_addr)) { |  | ||||||
|                 return RebuildSurface(current_surface, params, is_render); |  | ||||||
|             } |  | ||||||
|         } else { |         } else { | ||||||
|             // If there are many overlaps, odds are they are subtextures of the candidate |             // If there are many overlaps, odds are they are subtextures of the candidate | ||||||
|             // surface. We try to construct a new surface based on the candidate parameters, |             // surface. We try to construct a new surface based on the candidate parameters, | ||||||
| @@ -793,6 +797,9 @@ private: | |||||||
|     static constexpr u64 registry_page_size{1 << registry_page_bits}; |     static constexpr u64 registry_page_size{1 << registry_page_bits}; | ||||||
|     std::unordered_map<CacheAddr, std::vector<TSurface>> registry; |     std::unordered_map<CacheAddr, std::vector<TSurface>> registry; | ||||||
|  |  | ||||||
|  |     static constexpr u32 DEPTH_RT = 8; | ||||||
|  |     static constexpr u32 NO_RT = 0xFFFFFFFF; | ||||||
|  |  | ||||||
|     // The L1 Cache is used for fast texture lookup before checking the overlaps |     // The L1 Cache is used for fast texture lookup before checking the overlaps | ||||||
|     // This avoids calculating size and other stuffs. |     // This avoids calculating size and other stuffs. | ||||||
|     std::unordered_map<CacheAddr, TSurface> l1_cache; |     std::unordered_map<CacheAddr, TSurface> l1_cache; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user