From 612647f94f7b522f127840abeb96c852ffe20fd9 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 10 Feb 2023 16:07:55 +0200 Subject: [PATCH] video_core: Only allocate needed levels * Especially with high res scaling allocating so many levels increases memory usage. Also clamp level size to 8x8, since on tiled textures it doesn't make sense to have any smaller than that. Fixes portal3DS and log spam on ZLBW --- .../rasterizer_cache/rasterizer_cache.h | 46 +++++++++---------- .../rasterizer_cache/rasterizer_cache_base.h | 2 +- .../rasterizer_cache/surface_base.h | 1 - src/video_core/rasterizer_cache/utils.cpp | 11 +++++ src/video_core/rasterizer_cache/utils.h | 5 +- .../renderer_opengl/gl_rasterizer.cpp | 22 +++++---- .../renderer_opengl/gl_resource_manager.cpp | 45 ------------------ .../renderer_opengl/gl_resource_manager.h | 6 --- .../renderer_opengl/gl_texture_runtime.cpp | 20 ++++---- .../renderer_opengl/gl_texture_runtime.h | 7 ++- .../renderer_vulkan/vk_rasterizer.cpp | 8 ++-- .../renderer_vulkan/vk_texture_runtime.cpp | 20 ++++---- .../renderer_vulkan/vk_texture_runtime.h | 8 ++-- 13 files changed, 90 insertions(+), 111 deletions(-) diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index c3a361505..5fd0db503 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -19,6 +19,14 @@ RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, TextureRuntim : memory{memory_}, runtime{runtime_}, resolution_scale_factor{ VideoCore::GetResolutionScaleFactor()} {} +template +RasterizerCache::~RasterizerCache() { +#ifndef ANDROID + // This is for switching renderers, which is unsupported on Android, and costly on shutdown + ClearAll(false); +#endif +} + template template auto RasterizerCache::FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, @@ -326,7 +334,8 @@ template auto RasterizerCache::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) -> Surface { const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - return GetTextureSurface(info, config.config.lod.max_level); + const u32 max_level = MipLevels(info.width, info.height, config.config.lod.max_level) - 1; + return GetTextureSurface(info, max_level); } template @@ -340,9 +349,9 @@ auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& inf params.addr = info.physical_address; params.width = info.width; params.height = info.height; + params.levels = max_level + 1; params.is_tiled = true; params.pixel_format = PixelFormatFromTextureFormat(info.format); - params.res_scale = /*texture_filterer->IsNull() ?*/ 1 /*: resolution_scale_factor*/; params.UpdateParams(); const u32 min_width = info.width >> max_level; @@ -368,20 +377,10 @@ auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& inf if (max_level >= 8) { // Since PICA only supports texture size between 8 and 1024, there are at most eight // possible mipmap levels including the base. - LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); + LOG_CRITICAL(HW_GPU, "Unsupported mipmap level {}", max_level); return nullptr; } - // Allocate more mipmap levels if necessary - if (surface->max_level < max_level) { - /*if (!texture_filterer->IsNull()) { - // TODO: proper mipmap support for custom textures - runtime.GenerateMipmaps(surface->texture, max_level); - }*/ - - surface->max_level = max_level; - } - // Blit mipmaps that have been invalidated SurfaceParams surface_params = *surface; for (u32 level = 1; level <= max_level; level++) { @@ -391,6 +390,7 @@ auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& inf surface_params.width /= 2; surface_params.height /= 2; surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it + surface_params.levels = 1; surface_params.UpdateParams(); auto& watcher = surface->level_watchers[level - 1]; @@ -409,17 +409,15 @@ auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& inf ValidateSurface(level_surface, level_surface->addr, level_surface->size); } - if (/*texture_filterer->IsNull()*/ true) { - const TextureBlit texture_blit = {.src_level = 0, - .dst_level = level, - .src_layer = 0, - .dst_layer = 0, - .src_rect = level_surface->GetScaledRect(), - .dst_rect = surface_params.GetScaledRect()}; - - runtime.BlitTextures(*level_surface, *surface, texture_blit); - } - + const TextureBlit texture_blit = { + .src_level = 0, + .dst_level = level, + .src_layer = 0, + .dst_layer = 0, + .src_rect = level_surface->GetScaledRect(), + .dst_rect = surface_params.GetScaledRect(), + }; + runtime.BlitTextures(*level_surface, *surface, texture_blit); watcher->Validate(); } } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index 8fb9f9d12..15881796e 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -72,7 +72,7 @@ private: public: RasterizerCache(Memory::MemorySystem& memory, TextureRuntime& runtime); - ~RasterizerCache() = default; + ~RasterizerCache(); /// Get the best surface match (and its match type) for the given flags template diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 502e0e3c8..020e8e4a8 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -86,7 +86,6 @@ public: bool registered = false; SurfaceRegions invalid_regions; std::array, 7> level_watchers; - u32 max_level = 0; std::array fill_data; u32 fill_size = 0; diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index 79f2f46d2..2a486310a 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -74,6 +74,17 @@ void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr en UNREACHABLE(); } +u32 MipLevels(u32 width, u32 height, u32 max_level) { + u32 levels = 1; + while (width > 8 && height > 8) { + levels++; + width >>= 1; + height >>= 1; + } + + return std::min(levels, max_level + 1); +} + void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr, std::span source, std::span dest, bool convert) { const u32 func_index = static_cast(surface_info.pixel_format); diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index ff87084fb..13a9c8a8c 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -76,9 +76,10 @@ struct BufferCopy { struct HostTextureTag { PixelFormat format{}; + TextureType type{}; u32 width = 0; u32 height = 0; - u32 layers = 1; + u32 levels = 1; auto operator<=>(const HostTextureTag&) const noexcept = default; @@ -106,6 +107,8 @@ struct TextureCubeConfig { [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); +u32 MipLevels(u32 width, u32 height, u32 max_level); + /** * Encodes a linear texture to the expected linear or tiled format. * diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e43e8d9d9..fedb1984b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -564,25 +564,29 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { // The game is trying to use a surface as a texture and framebuffer at the same time // which causes unpredictable behavior on the host. // Making a copy to sample from eliminates this issue and seems to be fairly cheap. - OGLTexture temp_tex; if (need_duplicate_texture) { - temp_tex = - runtime.Allocate(color_surface->GetScaledWidth(), color_surface->GetScaledHeight(), - color_surface->pixel_format, color_surface->texture_type); - - temp_tex.CopyFrom(color_surface->texture, GL_TEXTURE_2D, color_surface->max_level + 1, - color_surface->GetScaledWidth(), color_surface->GetScaledHeight()); + Surface temp{*color_surface, runtime}; + const VideoCore::TextureCopy copy = { + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_offset = {0, 0}, + .dst_offset = {0, 0}, + .extent = {temp.GetScaledWidth(), temp.GetScaledHeight()}, + }; + runtime.CopyTextures(*color_surface, temp, copy); for (auto& unit : state.texture_units) { if (unit.texture_2d == color_surface->texture.handle) { - unit.texture_2d = temp_tex.handle; + unit.texture_2d = temp.Handle(); } } for (auto shadow_unit : {&state.image_shadow_texture_nx, &state.image_shadow_texture_ny, &state.image_shadow_texture_nz, &state.image_shadow_texture_px, &state.image_shadow_texture_py, &state.image_shadow_texture_pz}) { if (*shadow_unit == color_surface->texture.handle) { - *shadow_unit = temp_tex.handle; + *shadow_unit = temp.Handle(); } } } diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 90f014439..576431546 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -52,51 +52,6 @@ void OGLTexture::Release() { handle = 0; } -void OGLTexture::Allocate(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, - GLsizei height, GLsizei depth) { - GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d; - glActiveTexture(GL_TEXTURE0); - glBindTexture(target, handle); - - switch (target) { - case GL_TEXTURE_1D: - case GL_TEXTURE: - glTexStorage1D(target, levels, internalformat, width); - break; - case GL_TEXTURE_2D: - case GL_TEXTURE_1D_ARRAY: - case GL_TEXTURE_RECTANGLE: - case GL_TEXTURE_CUBE_MAP: - glTexStorage2D(target, levels, internalformat, width, height); - break; - case GL_TEXTURE_3D: - case GL_TEXTURE_2D_ARRAY: - case GL_TEXTURE_CUBE_MAP_ARRAY: - glTexStorage3D(target, levels, internalformat, width, height, depth); - break; - } - - glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - - glBindTexture(target, old_tex); -} - -void OGLTexture::CopyFrom(const OGLTexture& other, GLenum target, GLsizei levels, GLsizei width, - GLsizei height) { - GLuint old_tex = OpenGLState::GetCurState().texture_units[0].texture_2d; - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, handle); - - for (GLsizei level = 0; level < levels; level++) { - glCopyImageSubData(other.handle, target, level, 0, 0, 0, handle, target, level, 0, 0, 0, - width >> level, height >> level, 1); - } - - glBindTexture(GL_TEXTURE_2D, old_tex); -} - void OGLSampler::Create() { if (handle != 0) { return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index ab46b7af4..b65724289 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -59,12 +59,6 @@ public: /// Deletes the internal OpenGL resource void Release(); - void Allocate(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, - GLsizei height = 1, GLsizei depth = 1); - - void CopyFrom(const OGLTexture& other, GLenum target, GLsizei levels, GLsizei width, - GLsizei height); - GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 91322fd99..424aeba1a 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -115,16 +115,19 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f return DEFAULT_TUPLE; } -OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, - VideoCore::TextureType type) { - const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; - const u32 levels = std::log2(std::max(width, height)) + 1; +OGLTexture TextureRuntime::Allocate(u32 width, u32 height, u32 levels, + VideoCore::PixelFormat format, VideoCore::TextureType type) { const GLenum target = type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; // Attempt to recycle an unused texture const VideoCore::HostTextureTag key = { - .format = format, .width = width, .height = height, .layers = layers}; + .format = format, + .type = type, + .width = width, + .height = height, + .levels = levels, + }; if (auto it = texture_recycler.find(key); it != texture_recycler.end()) { OGLTexture texture = std::move(it->second); @@ -311,7 +314,7 @@ void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarge Surface::Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) : VideoCore::SurfaceBase{params}, runtime{runtime}, driver{runtime.GetDriver()} { if (pixel_format != VideoCore::PixelFormat::Invalid) { - texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, + texture = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, params.pixel_format, texture_type); } } @@ -320,10 +323,11 @@ Surface::~Surface() { if (pixel_format != VideoCore::PixelFormat::Invalid) { const VideoCore::HostTextureTag tag = { .format = pixel_format, + .type = texture_type, .width = GetScaledWidth(), .height = GetScaledHeight(), - .layers = texture_type == VideoCore::TextureType::CubeMap ? 6u : 1u}; - + .levels = levels, + }; runtime.texture_recycler.emplace(tag, std::move(texture)); } } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index b5cc0b573..d18bd7566 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -51,7 +51,7 @@ public: void Finish() const {} /// Allocates an OpenGL texture with the specified dimentions and format - OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format, + OGLTexture Allocate(u32 width, u32 height, u32 levels, VideoCore::PixelFormat format, VideoCore::TextureType type); /// Fills the rectangle of the texture with the clear value provided @@ -104,6 +104,11 @@ public: Surface(VideoCore::SurfaceParams& params, TextureRuntime& runtime); ~Surface() override; + /// Returns the surface image handle + GLuint Handle() const noexcept { + return texture.handle; + } + /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 6048045b1..8c78aff2f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -695,10 +695,10 @@ void RasterizerVulkan::SyncTextureUnits(Surface* const color_surface) { .dst_level = 0, .src_layer = 0, .dst_layer = 0, - .src_offset = VideoCore::Offset{0, 0}, - .dst_offset = VideoCore::Offset{0, 0}, - .extent = VideoCore::Extent{temp.GetScaledWidth(), temp.GetScaledHeight()}}; - + .src_offset = {0, 0}, + .dst_offset = {0, 0}, + .extent = {temp.GetScaledWidth(), temp.GetScaledHeight()}, + }; runtime.CopyTextures(*color_surface, temp, copy); pipeline_cache.BindTexture(texture_index, temp.ImageView()); } else { diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index b04090071..f59f314ed 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -19,6 +19,8 @@ MICROPROFILE_DEFINE(Vulkan_Download, "Vulkan", "Texture Download", MP_RGB(128, 1 namespace Vulkan { +using VideoCore::GetFormatType; +using VideoCore::MipLevels; using VideoCore::PixelFormatAsString; struct RecordParams { @@ -161,13 +163,15 @@ void TextureRuntime::Finish() { scheduler.Finish(); } -ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, - VideoCore::TextureType type) { +ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 levels, + VideoCore::PixelFormat format, VideoCore::TextureType type) { const FormatTraits traits = instance.GetTraits(format); - return Allocate(width, height, format, type, traits.native, traits.usage, traits.aspect); + return Allocate(width, height, levels, format, type, traits.native, traits.usage, + traits.aspect); } -ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat pixel_format, +ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, u32 levels, + VideoCore::PixelFormat pixel_format, VideoCore::TextureType type, vk::Format format, vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect) { MICROPROFILE_SCOPE(Vulkan_ImageAlloc); @@ -185,6 +189,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma .type = type, .width = width, .height = height, + .levels = levels, }; // Attempt to recycle an unused allocation @@ -195,7 +200,6 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma } const bool create_storage_view = pixel_format == VideoCore::PixelFormat::RGBA8; - const u32 levels = std::log2(std::max(width, height)) + 1; const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; vk::ImageCreateFlags flags; @@ -750,7 +754,7 @@ Surface::Surface(const VideoCore::SurfaceParams& params, TextureRuntime& runtime scheduler{runtime.GetScheduler()}, traits{instance.GetTraits(pixel_format)} { if (pixel_format != VideoCore::PixelFormat::Invalid) { - alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), params.pixel_format, + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, params.pixel_format, texture_type); } } @@ -760,8 +764,8 @@ Surface::Surface(const VideoCore::SurfaceParams& params, vk::Format format, : VideoCore::SurfaceBase{params}, runtime{runtime}, instance{runtime.GetInstance()}, scheduler{runtime.GetScheduler()} { if (format != vk::Format::eUndefined) { - alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), pixel_format, texture_type, - format, usage, aspect); + alloc = runtime.Allocate(GetScaledWidth(), GetScaledHeight(), levels, pixel_format, + texture_type, format, usage, aspect); } } diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index ac7bfaf3d..cc5ecb9c0 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -53,6 +53,7 @@ struct HostTextureTag { VideoCore::TextureType type = VideoCore::TextureType::Texture2D; u32 width = 1; u32 height = 1; + u32 levels = 1; auto operator<=>(const HostTextureTag&) const noexcept = default; @@ -101,11 +102,12 @@ public: [[nodiscard]] StagingData FindStaging(u32 size, bool upload); /// Allocates a vulkan image possibly resusing an existing one - [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format, - VideoCore::TextureType type); + [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, u32 levels, + VideoCore::PixelFormat format, VideoCore::TextureType type); /// Allocates a vulkan image - [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat pixel_format, + [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, u32 levels, + VideoCore::PixelFormat pixel_format, VideoCore::TextureType type, vk::Format format, vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect);