From eeb1ff7965232bc320262c4e50d7ed923ba25bff Mon Sep 17 00:00:00 2001 From: GPUCode Date: Wed, 8 Mar 2023 10:11:47 +0200 Subject: [PATCH] rasterizer_cache: Avoid redundant texture copies when using texcubes --- .../rasterizer_cache/rasterizer_cache.h | 50 +++++++++------- .../rasterizer_cache/rasterizer_cache_base.h | 11 ++-- .../rasterizer_cache/surface_base.cpp | 2 - .../rasterizer_cache/surface_base.h | 60 ++++++++++++------- .../renderer_vulkan/vk_rasterizer.cpp | 1 - 5 files changed, 72 insertions(+), 52 deletions(-) diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 7940aa2af..ba907b346 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include "common/alignment.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -627,6 +628,8 @@ auto RasterizerCache::GetTextureCube(const TextureCubeConfig& config) -> Surf } auto [it, new_surface] = texture_cube_cache.try_emplace(config); + CubeParams& params = it->second; + if (new_surface) { const SurfaceParams cube_params = { .addr = config.px, @@ -638,11 +641,12 @@ auto RasterizerCache::GetTextureCube(const TextureCubeConfig& config) -> Surf .pixel_format = PixelFormatFromTextureFormat(config.format), .type = SurfaceType::Texture, }; - it->second = CreateSurface(cube_params); + params.cube_id = CreateSurface(cube_params); } - const SurfaceId cube_id = it->second; - const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; + const std::array addresses = { + config.px, config.nx, config.py, config.ny, config.pz, config.nz, + }; for (std::size_t i = 0; i < addresses.size(); i++) { Pica::Texture::TextureInfo info = { @@ -654,25 +658,27 @@ auto RasterizerCache::GetTextureCube(const TextureCubeConfig& config) -> Surf info.SetDefaultStride(); Surface& face_surface = GetTextureSurface(info, config.levels - 1); - Surface& cube = slot_surfaces[cube_id]; + Surface& cube = slot_surfaces[params.cube_id]; - const u32 face = static_cast(i); - const u32 scaled_size = cube.GetScaledWidth(); - for (u32 level = 0; level < face_surface.levels; level++) { - const TextureCopy texture_copy = { - .src_level = level, - .dst_level = level, - .src_layer = 0, - .dst_layer = face, - .src_offset = {0, 0}, - .dst_offset = {0, 0}, - .extent = {scaled_size >> level, scaled_size >> level}, - }; - runtime.CopyTextures(face_surface, cube, texture_copy); + if (face_surface.ModificationTick() != params.ticks[i]) { + const u32 scaled_size = cube.GetScaledWidth(); + for (u32 level = 0; level < face_surface.levels; level++) { + const TextureCopy texture_copy = { + .src_level = level, + .dst_level = level, + .src_layer = 0, + .dst_layer = static_cast(i), + .src_offset = {0, 0}, + .dst_offset = {0, 0}, + .extent = {scaled_size >> level, scaled_size >> level}, + }; + runtime.CopyTextures(face_surface, cube, texture_copy); + } + params.ticks[i] = face_surface.ModificationTick(); } } - return slot_surfaces[cube_id]; + return slot_surfaces[params.cube_id]; } template @@ -886,7 +892,7 @@ void RasterizerCache::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s const auto NotifyValidated = [&](SurfaceInterval interval) { level_regions.erase(interval); - surface.invalid_regions.erase(interval); + surface.MarkValid(interval); }; while (!level_regions.empty()) { @@ -1245,7 +1251,7 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region ASSERT(addr >= region_owner.addr && addr + size <= region_owner.end); // Surfaces can't have a gap ASSERT(region_owner.width == region_owner.stride); - region_owner.invalid_regions.erase(invalid_interval); + region_owner.MarkValid(invalid_interval); } ForEachSurfaceInRegion(addr, size, [&](SurfaceId surface_id, Surface& surface) { @@ -1262,7 +1268,7 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, SurfaceId region } const SurfaceInterval interval = surface.GetInterval() & invalid_interval; - surface.invalid_regions.insert(interval); + surface.MarkInvalid(interval); // If the surface has no salvageable data it should be removed from the cache to avoid // clogging the data structure @@ -1287,7 +1293,7 @@ template SurfaceId RasterizerCache::CreateSurface(const SurfaceParams& params) { SurfaceId surface_id = slot_surfaces.insert(runtime, params); Surface& surface = slot_surfaces[surface_id]; - surface.invalid_regions.insert(surface.GetInterval()); + surface.MarkInvalid(surface.GetInterval()); return surface_id; } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index c3baa2a10..671c901af 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -9,8 +9,6 @@ #include #include #include -#include -#include "common/thread_worker.h" #include "video_core/rasterizer_cache/sampler_params.h" #include "video_core/rasterizer_cache/surface_params.h" #include "video_core/rasterizer_cache/utils.h" @@ -41,7 +39,7 @@ DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); class CustomTexManager; template -class RasterizerCache : NonCopyable { +class RasterizerCache { /// Address shift for caching surfaces into a hash table static constexpr u64 CITRA_PAGEBITS = 18; @@ -63,6 +61,11 @@ class RasterizerCache : NonCopyable { SurfaceId depth_surface_id; }; + struct CubeParams { + SurfaceId cube_id; + std::array ticks{}; + }; + public: RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, Runtime& runtime); @@ -203,13 +206,13 @@ private: SurfaceMap dirty_regions; std::vector remove_surfaces; u16 resolution_scale_factor; - std::unordered_map texture_cube_cache; // The internal surface cache is based on buckets of 256KB. // This fits better for the purpose of this cache as textures are normaly // large in size. std::unordered_map, Common::IdentityHash> page_table; std::unordered_map samplers; + std::unordered_map texture_cube_cache; SlotVector slot_surfaces; SlotVector slot_samplers; diff --git a/src/video_core/rasterizer_cache/surface_base.cpp b/src/video_core/rasterizer_cache/surface_base.cpp index b61f0e2a5..eb7084d20 100644 --- a/src/video_core/rasterizer_cache/surface_base.cpp +++ b/src/video_core/rasterizer_cache/surface_base.cpp @@ -9,8 +9,6 @@ namespace VideoCore { -SurfaceBase::SurfaceBase() = default; - SurfaceBase::SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {} bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const { diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 85d2da6c5..6ef2b83fc 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -14,9 +14,44 @@ using SurfaceRegions = boost::icl::interval_set(overlap_size); + return addr < overlap_end && overlap_addr < end; + } + + u64 ModificationTick() const noexcept { + return modification_tick; + } + + CustomPixelFormat CustomFormat() const noexcept { + return custom_format; + } + + bool IsCustom() const noexcept { + return is_custom; + } + + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + void MarkValid(SurfaceInterval interval) { + invalid_regions.erase(interval); + modification_tick++; + } + + void MarkInvalid(SurfaceInterval interval) { + invalid_regions.insert(interval); + modification_tick++; + } + + bool IsFullyInvalid() const { + auto interval = GetInterval(); + return *invalid_regions.equal_range(interval).first == interval; + } + /// Returns true when this surface can be used to fill the fill_interval of dest_surface bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; @@ -29,28 +64,6 @@ public: /// Returns the clear value used to validate another surface from this fill surface ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format); - bool IsCustom() const noexcept { - return is_custom; - } - - CustomPixelFormat CustomFormat() const noexcept { - return custom_format; - } - - bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept { - const PAddr overlap_end = overlap_addr + static_cast(overlap_size); - return addr < overlap_end && overlap_addr < end; - } - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } - - bool IsFullyInvalid() const { - auto interval = GetInterval(); - return *invalid_regions.equal_range(interval).first == interval; - } - private: /// Returns the fill buffer value starting from copy_addr std::array MakeFillBuffer(PAddr copy_addr); @@ -63,6 +76,7 @@ public: SurfaceRegions invalid_regions; std::array fill_data; u32 fill_size = 0; + u64 modification_tick = 1; }; } // namespace VideoCore diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aa7e45bb7..b039a2456 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -617,7 +617,6 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf } void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { - LOG_WARNING(Render_Vulkan, "Using texture cube, might be slow"); using CubeFace = Pica::TexturingRegs::CubeFace; const VideoCore::TextureCubeConfig config = { .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX),