From 3ef2957b951a8ef8dfbddc193eb57be74bda27c4 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Tue, 25 Apr 2023 23:41:30 +0300 Subject: [PATCH] rasterizer_cache: Switch to page table * Surface storage isn't particularly interval sensitive so we can use a page table to make it faster --- src/video_core/CMakeLists.txt | 4 +- .../rasterizer_cache/rasterizer_cache.h | 295 +++++++++++------- .../rasterizer_cache/rasterizer_cache_base.h | 45 ++- .../rasterizer_cache/surface_base.h | 6 + 4 files changed, 216 insertions(+), 134 deletions(-) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 80094a7e0..3ba7215d9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -127,8 +127,8 @@ target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) create_target_directory_groups(video_core) -target_link_libraries(video_core PUBLIC citra_common citra_core) -target_link_libraries(video_core PRIVATE glad json-headers dds-ktx nihstro-headers Boost::serialization) +target_link_libraries(video_core PUBLIC citra_common core) +target_link_libraries(video_core PRIVATE glad tsl::robin_map json-headers dds-ktx nihstro-headers Boost::serialization) set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO}) if ("x86_64" IN_LIST ARCHITECTURE) diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 748eea9fc..641f7436b 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -2,7 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include +#include #include #include "common/alignment.h" #include "common/logging/log.h" @@ -63,12 +63,7 @@ void RasterizerCache::TickFrame() { if (use_custom_textures) { custom_tex_manager.FindCustomTextures(); } - FlushAll(); - while (!surface_cache.empty()) { - UnregisterSurface(*surface_cache.begin()->second.begin()); - } - texture_cube_cache.clear(); - runtime.Reset(); + UnregisterAll(); } } @@ -641,89 +636,129 @@ RasterizerCache::SurfaceRect_Tuple RasterizerCache::GetTexCopySurface( return std::make_tuple(match_surface, rect); } +template +template +void RasterizerCache::ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result::type; + static constexpr bool BOOL_BREAK = std::is_same_v; + boost::container::small_vector surfaces; + ForEachPage(addr, size, [this, &surfaces, addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const SurfaceRef& surface : it->second) { + if (surface->picked) { + continue; + } + if (!surface->Overlaps(addr, size)) { + continue; + } + + surface->picked = true; + surfaces.push_back(surface); + if constexpr (BOOL_BREAK) { + if (func(surface)) { + return true; + } + } else { + func(surface); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const SurfaceRef& surface : surfaces) { + surface->picked = false; + } +} + template template RasterizerCache::SurfaceRef RasterizerCache::FindMatch( const SurfaceParams& params, ScaleMatch match_scale_type, std::optional validate_interval) { - RasterizerCache::SurfaceRef match_surface = nullptr; + SurfaceRef match_surface = nullptr; bool match_valid = false; u32 match_scale = 0; SurfaceInterval match_interval{}; - for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (const auto& surface : pair.second) { - const bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // Validity will be checked in GetCopyableInterval - const bool is_valid = - True(find_flags & MatchFlags::Copy) - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + ForEachSurfaceInRegion(params.addr, params.size, [&](SurfaceRef surface) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // Validity will be checked in GetCopyableInterval + const bool is_valid = + True(find_flags & MatchFlags::Copy) + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if (False(find_flags & check_type)) - return; + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (False(find_flags & check_type)) + return; - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; }; - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - ASSERT(validate_interval); - auto copy_interval = - surface->GetCopyableInterval(params.FromInterval(*validate_interval)); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } - } + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + auto copy_interval = + surface->GetCopyableInterval(params.FromInterval(*validate_interval)); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + }); return match_surface; } @@ -1014,15 +1049,16 @@ bool RasterizerCache::NoUnimplementedReinterpretations(const SurfaceRef& surf template bool RasterizerCache::IntervalHasInvalidPixelFormat(const SurfaceParams& params, const SurfaceInterval& interval) { - for (const auto& set : RangeFromInterval(surface_cache, interval)) { - for (const auto& surface : set.second) { - if (surface->pixel_format == PixelFormat::Invalid) { - LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface->addr); - return true; - } + bool invalid_format_found = false; + ForEachSurfaceInRegion(params.addr, params.end, [&](SurfaceRef surface) { + if (surface->pixel_format == PixelFormat::Invalid) { + LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface->addr); + invalid_format_found = true; + return true; } - } - return false; + return false; + }); + return invalid_format_found; } template @@ -1069,7 +1105,7 @@ void RasterizerCache::ClearAll(bool flush) { // Remove the whole cache without really looking at it. cached_pages -= flush_interval; dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF); - surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF); + page_table.clear(); remove_surfaces.clear(); } @@ -1082,7 +1118,7 @@ void RasterizerCache::FlushRegion(PAddr addr, u32 size, SurfaceRef flush_surf SurfaceRegions flushed_intervals; for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region + // Small sizes imply that this most likely comes from the cpu, flush the entire region // the point is to avoid thousands of small writes every frame if the cpu decides to // access that region, anything higher than 8 you're guaranteed it comes from a service const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; @@ -1127,29 +1163,28 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const SurfaceRef region_owner->MarkValid(invalid_interval); } - for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (const auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (!region_owner && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.push_back(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->MarkInvalid(interval); - - // If the surface has no salvageable data it should be removed from the cache to avoid - // clogging the data structure - if (cached_surface->IsFullyInvalid()) { - remove_surfaces.push_back(cached_surface); - } + ForEachSurfaceInRegion(addr, size, [&](SurfaceRef surface) { + if (surface == region_owner) { + return; } - } + + // If the CPU is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (!region_owner && size <= 8) { + FlushRegion(surface->addr, surface->size, surface); + remove_surfaces.push_back(surface); + return; + } + + const auto interval = surface->GetInterval() & invalid_interval; + surface->MarkInvalid(interval); + + // If the surface has no salvageable data it should be removed from the cache to avoid + // clogging the data structure + if (surface->IsFullyInvalid()) { + remove_surfaces.push_back(surface); + } + }); if (region_owner) { dirty_regions.set({invalid_interval, region_owner}); @@ -1157,7 +1192,7 @@ void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const SurfaceRef dirty_regions.erase(invalid_interval); } - for (const SurfaceRef& remove_surface : remove_surfaces) { + for (const SurfaceRef remove_surface : remove_surfaces) { UnregisterSurface(remove_surface); } remove_surfaces.clear(); @@ -1172,22 +1207,48 @@ RasterizerCache::SurfaceRef RasterizerCache::CreateSurface(const SurfacePa template void RasterizerCache::RegisterSurface(const SurfaceRef& surface) { - if (surface->registered) { - return; - } + ASSERT_MSG(!surface->registered, "Trying to register an already registered surface"); + surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); UpdatePagesCachedCount(surface->addr, surface->size, 1); + ForEachPage(surface->addr, surface->size, + [this, surface](u64 page) { page_table[page].push_back(surface); }); } template void RasterizerCache::UnregisterSurface(const SurfaceRef& surface) { - if (!surface->registered) { - return; - } + ASSERT_MSG(surface->registered, "Trying to unregister an already unregistered surface"); + surface->registered = false; UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); + ForEachPage(surface->addr, surface->size, [this, surface](u64 page) { + auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << CITRA_PAGEBITS); + return; + } + std::vector& surfaces = page_it.value(); + const auto vector_it = std::find(surfaces.begin(), surfaces.end(), surface); + if (vector_it == surfaces.end()) { + ASSERT_MSG(false, "Unregistering unregistered surface in page=0x{:x}", + page << CITRA_PAGEBITS); + return; + } + surfaces.erase(vector_it); + }); +} + +template +void RasterizerCache::UnregisterAll() { + FlushAll(); + for (auto& [page, surfaces] : page_table) { + while (!surfaces.empty()) { + UnregisterSurface(surfaces.back()); + } + } + texture_cube_cache.clear(); + remove_surfaces.clear(); + runtime.Reset(); } template diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index e28dfc7bd..29643a653 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -9,6 +9,7 @@ #include #include #include +#include #include "video_core/rasterizer_cache/surface_base.h" namespace Memory { @@ -46,25 +47,16 @@ class RendererBase; template class RasterizerCache { -public: + /// Address shift for caching surfaces into a hash table + static constexpr u64 CITRA_PAGEBITS = 18; + using TextureRuntime = typename T::TextureRuntime; using SurfaceRef = std::shared_ptr; using Framebuffer = typename T::Framebuffer; - // Declare rasterizer interval types - using SurfaceSet = std::set; using SurfaceMap = boost::icl::interval_map; - using SurfaceCache = boost::icl::interval_map; - - static_assert( - std::is_same() && - std::is_same(), - "Incorrect interval types"); using SurfaceRect_Tuple = std::tuple>; using PageMap = boost::icl::interval_map; @@ -139,6 +131,26 @@ public: void ClearAll(bool flush); private: + /// Iterate over all page indices in a range + template + void ForEachPage(PAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_end = (addr + size - 1) >> CITRA_PAGEBITS; + for (u64 page = addr >> CITRA_PAGEBITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Iterates over all the surfaces in a region calling func + template + void ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func); + /// Get the best surface match (and its match type) for the given flags template SurfaceRef FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type, @@ -187,6 +199,9 @@ private: /// Remove surface from the cache void UnregisterSurface(const SurfaceRef& surface); + /// Unregisters all surfaces from the cache + void UnregisterAll(); + /// Increase/decrease the number of surface in pages touching the specified region void UpdatePagesCachedCount(PAddr addr, u32 size, int delta); @@ -196,13 +211,13 @@ private: TextureRuntime& runtime; Pica::Regs& regs; RendererBase& renderer; - SurfaceCache surface_cache; - PageMap cached_pages; + tsl::robin_pg_map, Common::IdentityHash> page_table; + std::unordered_map texture_cube_cache; SurfaceMap dirty_regions; + PageMap cached_pages; std::vector remove_surfaces; u32 resolution_scale_factor; RenderTargets render_targets; - std::unordered_map texture_cube_cache; bool use_filter; bool dump_textures; bool use_custom_textures; diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 3deb989b5..15e92c531 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -33,6 +33,11 @@ public: /// Returns true if the surface contains a custom material with a normal map. bool HasNormalMap() const noexcept; + bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept { + const PAddr overlap_end = overlap_addr + static_cast(overlap_size); + return addr < overlap_end && overlap_addr < end; + } + u64 ModificationTick() const noexcept { return modification_tick; } @@ -66,6 +71,7 @@ private: public: bool registered = false; + bool picked = false; bool is_custom = false; const Material* material = nullptr; SurfaceRegions invalid_regions;