rasterizer_cache: Switch to page table

* Surface storage isn't particularly interval sensitive so we can use a page table to make it faster
This commit is contained in:
GPUCode
2023-04-25 23:41:30 +03:00
parent 5dd4a81476
commit 3ef2957b95
4 changed files with 216 additions and 134 deletions

View File

@@ -127,8 +127,8 @@ target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC citra_common citra_core)
target_link_libraries(video_core PRIVATE glad json-headers dds-ktx nihstro-headers Boost::serialization)
target_link_libraries(video_core PUBLIC citra_common core)
target_link_libraries(video_core PRIVATE glad tsl::robin_map json-headers dds-ktx nihstro-headers Boost::serialization)
set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})
if ("x86_64" IN_LIST ARCHITECTURE)

View File

@@ -2,7 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include <boost/container/small_vector.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/alignment.h"
#include "common/logging/log.h"
@@ -63,12 +63,7 @@ void RasterizerCache<T>::TickFrame() {
if (use_custom_textures) {
custom_tex_manager.FindCustomTextures();
}
FlushAll();
while (!surface_cache.empty()) {
UnregisterSurface(*surface_cache.begin()->second.begin());
}
texture_cube_cache.clear();
runtime.Reset();
UnregisterAll();
}
}
@@ -641,89 +636,129 @@ RasterizerCache<T>::SurfaceRect_Tuple RasterizerCache<T>::GetTexCopySurface(
return std::make_tuple(match_surface, rect);
}
template <class T>
template <typename Func>
void RasterizerCache<T>::ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, SurfaceRef>::type;
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<SurfaceRef, 32> surfaces;
ForEachPage(addr, size, [this, &surfaces, addr, size, func](u64 page) {
const auto it = page_table.find(page);
if (it == page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
return;
}
}
for (const SurfaceRef& surface : it->second) {
if (surface->picked) {
continue;
}
if (!surface->Overlaps(addr, size)) {
continue;
}
surface->picked = true;
surfaces.push_back(surface);
if constexpr (BOOL_BREAK) {
if (func(surface)) {
return true;
}
} else {
func(surface);
}
}
if constexpr (BOOL_BREAK) {
return false;
}
});
for (const SurfaceRef& surface : surfaces) {
surface->picked = false;
}
}
template <class T>
template <MatchFlags find_flags>
RasterizerCache<T>::SurfaceRef RasterizerCache<T>::FindMatch(
const SurfaceParams& params, ScaleMatch match_scale_type,
std::optional<SurfaceInterval> validate_interval) {
RasterizerCache<T>::SurfaceRef match_surface = nullptr;
SurfaceRef match_surface = nullptr;
bool match_valid = false;
u32 match_scale = 0;
SurfaceInterval match_interval{};
for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) {
for (const auto& surface : pair.second) {
const bool res_scale_matched = match_scale_type == ScaleMatch::Exact
? (params.res_scale == surface->res_scale)
: (params.res_scale <= surface->res_scale);
// Validity will be checked in GetCopyableInterval
const bool is_valid =
True(find_flags & MatchFlags::Copy)
? true
: surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
ForEachSurfaceInRegion(params.addr, params.size, [&](SurfaceRef surface) {
const bool res_scale_matched = match_scale_type == ScaleMatch::Exact
? (params.res_scale == surface->res_scale)
: (params.res_scale <= surface->res_scale);
// Validity will be checked in GetCopyableInterval
const bool is_valid =
True(find_flags & MatchFlags::Copy)
? true
: surface->IsRegionValid(validate_interval.value_or(params.GetInterval()));
auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
if (False(find_flags & check_type))
return;
auto IsMatch_Helper = [&](auto check_type, auto match_fn) {
if (False(find_flags & check_type))
return;
bool matched;
SurfaceInterval surface_interval;
std::tie(matched, surface_interval) = match_fn();
if (!matched)
return;
bool matched;
SurfaceInterval surface_interval;
std::tie(matched, surface_interval) = match_fn();
if (!matched)
return;
if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore &&
surface->type != SurfaceType::Fill)
return;
if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore &&
surface->type != SurfaceType::Fill)
return;
// Found a match, update only if this is better than the previous one
auto UpdateMatch = [&] {
match_surface = surface;
match_valid = is_valid;
match_scale = surface->res_scale;
match_interval = surface_interval;
};
if (surface->res_scale > match_scale) {
UpdateMatch();
return;
} else if (surface->res_scale < match_scale) {
return;
}
if (is_valid && !match_valid) {
UpdateMatch();
return;
} else if (is_valid != match_valid) {
return;
}
if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) {
UpdateMatch();
}
// Found a match, update only if this is better than the previous one
auto UpdateMatch = [&] {
match_surface = surface;
match_valid = is_valid;
match_scale = surface->res_scale;
match_interval = surface_interval;
};
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] {
return std::make_pair(surface->ExactMatch(params), surface->GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] {
return std::make_pair(surface->CanSubRect(params), surface->GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
ASSERT(validate_interval);
auto copy_interval =
surface->GetCopyableInterval(params.FromInterval(*validate_interval));
bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
surface->CanCopy(params, copy_interval);
return std::make_pair(matched, copy_interval);
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
return std::make_pair(surface->CanExpand(params), surface->GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
return std::make_pair(surface->CanTexCopy(params), surface->GetInterval());
});
}
}
if (surface->res_scale > match_scale) {
UpdateMatch();
return;
} else if (surface->res_scale < match_scale) {
return;
}
if (is_valid && !match_valid) {
UpdateMatch();
return;
} else if (is_valid != match_valid) {
return;
}
if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) {
UpdateMatch();
}
};
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Exact>{}, [&] {
return std::make_pair(surface->ExactMatch(params), surface->GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::SubRect>{}, [&] {
return std::make_pair(surface->CanSubRect(params), surface->GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
ASSERT(validate_interval);
auto copy_interval =
surface->GetCopyableInterval(params.FromInterval(*validate_interval));
bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
surface->CanCopy(params, copy_interval);
return std::make_pair(matched, copy_interval);
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
return std::make_pair(surface->CanExpand(params), surface->GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::TexCopy>{}, [&] {
return std::make_pair(surface->CanTexCopy(params), surface->GetInterval());
});
});
return match_surface;
}
@@ -1014,15 +1049,16 @@ bool RasterizerCache<T>::NoUnimplementedReinterpretations(const SurfaceRef& surf
template <class T>
bool RasterizerCache<T>::IntervalHasInvalidPixelFormat(const SurfaceParams& params,
const SurfaceInterval& interval) {
for (const auto& set : RangeFromInterval(surface_cache, interval)) {
for (const auto& surface : set.second) {
if (surface->pixel_format == PixelFormat::Invalid) {
LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface->addr);
return true;
}
bool invalid_format_found = false;
ForEachSurfaceInRegion(params.addr, params.end, [&](SurfaceRef surface) {
if (surface->pixel_format == PixelFormat::Invalid) {
LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface->addr);
invalid_format_found = true;
return true;
}
}
return false;
return false;
});
return invalid_format_found;
}
template <class T>
@@ -1069,7 +1105,7 @@ void RasterizerCache<T>::ClearAll(bool flush) {
// Remove the whole cache without really looking at it.
cached_pages -= flush_interval;
dirty_regions -= SurfaceInterval(0x0, 0xFFFFFFFF);
surface_cache -= SurfaceInterval(0x0, 0xFFFFFFFF);
page_table.clear();
remove_surfaces.clear();
}
@@ -1082,7 +1118,7 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, SurfaceRef flush_surf
SurfaceRegions flushed_intervals;
for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) {
// small sizes imply that this most likely comes from the cpu, flush the entire region
// Small sizes imply that this most likely comes from the cpu, flush the entire region
// the point is to avoid thousands of small writes every frame if the cpu decides to
// access that region, anything higher than 8 you're guaranteed it comes from a service
const auto interval = size <= 8 ? pair.first : pair.first & flush_interval;
@@ -1127,29 +1163,28 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, const SurfaceRef
region_owner->MarkValid(invalid_interval);
}
for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) {
for (const auto& cached_surface : pair.second) {
if (cached_surface == region_owner)
continue;
// If cpu is invalidating this region we want to remove it
// to (likely) mark the memory pages as uncached
if (!region_owner && size <= 8) {
FlushRegion(cached_surface->addr, cached_surface->size, cached_surface);
remove_surfaces.push_back(cached_surface);
continue;
}
const auto interval = cached_surface->GetInterval() & invalid_interval;
cached_surface->MarkInvalid(interval);
// If the surface has no salvageable data it should be removed from the cache to avoid
// clogging the data structure
if (cached_surface->IsFullyInvalid()) {
remove_surfaces.push_back(cached_surface);
}
ForEachSurfaceInRegion(addr, size, [&](SurfaceRef surface) {
if (surface == region_owner) {
return;
}
}
// If the CPU is invalidating this region we want to remove it
// to (likely) mark the memory pages as uncached
if (!region_owner && size <= 8) {
FlushRegion(surface->addr, surface->size, surface);
remove_surfaces.push_back(surface);
return;
}
const auto interval = surface->GetInterval() & invalid_interval;
surface->MarkInvalid(interval);
// If the surface has no salvageable data it should be removed from the cache to avoid
// clogging the data structure
if (surface->IsFullyInvalid()) {
remove_surfaces.push_back(surface);
}
});
if (region_owner) {
dirty_regions.set({invalid_interval, region_owner});
@@ -1157,7 +1192,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, const SurfaceRef
dirty_regions.erase(invalid_interval);
}
for (const SurfaceRef& remove_surface : remove_surfaces) {
for (const SurfaceRef remove_surface : remove_surfaces) {
UnregisterSurface(remove_surface);
}
remove_surfaces.clear();
@@ -1172,22 +1207,48 @@ RasterizerCache<T>::SurfaceRef RasterizerCache<T>::CreateSurface(const SurfacePa
template <class T>
void RasterizerCache<T>::RegisterSurface(const SurfaceRef& surface) {
if (surface->registered) {
return;
}
ASSERT_MSG(!surface->registered, "Trying to register an already registered surface");
surface->registered = true;
surface_cache.add({surface->GetInterval(), SurfaceSet{surface}});
UpdatePagesCachedCount(surface->addr, surface->size, 1);
ForEachPage(surface->addr, surface->size,
[this, surface](u64 page) { page_table[page].push_back(surface); });
}
template <class T>
void RasterizerCache<T>::UnregisterSurface(const SurfaceRef& surface) {
if (!surface->registered) {
return;
}
ASSERT_MSG(surface->registered, "Trying to unregister an already unregistered surface");
surface->registered = false;
UpdatePagesCachedCount(surface->addr, surface->size, -1);
surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}});
ForEachPage(surface->addr, surface->size, [this, surface](u64 page) {
auto page_it = page_table.find(page);
if (page_it == page_table.end()) {
ASSERT_MSG(false, "Unregistering unregistered page=0x{:x}", page << CITRA_PAGEBITS);
return;
}
std::vector<SurfaceRef>& surfaces = page_it.value();
const auto vector_it = std::find(surfaces.begin(), surfaces.end(), surface);
if (vector_it == surfaces.end()) {
ASSERT_MSG(false, "Unregistering unregistered surface in page=0x{:x}",
page << CITRA_PAGEBITS);
return;
}
surfaces.erase(vector_it);
});
}
template <class T>
void RasterizerCache<T>::UnregisterAll() {
FlushAll();
for (auto& [page, surfaces] : page_table) {
while (!surfaces.empty()) {
UnregisterSurface(surfaces.back());
}
}
texture_cube_cache.clear();
remove_surfaces.clear();
runtime.Reset();
}
template <class T>

View File

@@ -9,6 +9,7 @@
#include <unordered_map>
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/surface_base.h"
namespace Memory {
@@ -46,25 +47,16 @@ class RendererBase;
template <class T>
class RasterizerCache {
public:
/// Address shift for caching surfaces into a hash table
static constexpr u64 CITRA_PAGEBITS = 18;
using TextureRuntime = typename T::TextureRuntime;
using SurfaceRef = std::shared_ptr<typename T::Surface>;
using Framebuffer = typename T::Framebuffer;
// Declare rasterizer interval types
using SurfaceSet = std::set<SurfaceRef>;
using SurfaceMap = boost::icl::interval_map<PAddr, SurfaceRef, boost::icl::partial_absorber,
std::less, boost::icl::inplace_plus,
boost::icl::inter_section, SurfaceInterval>;
using SurfaceCache = boost::icl::interval_map<PAddr, SurfaceSet, boost::icl::partial_absorber,
std::less, boost::icl::inplace_plus,
boost::icl::inter_section, SurfaceInterval>;
static_assert(
std::is_same<SurfaceRegions::interval_type, typename SurfaceCache::interval_type>() &&
std::is_same<typename SurfaceMap::interval_type,
typename SurfaceCache::interval_type>(),
"Incorrect interval types");
using SurfaceRect_Tuple = std::tuple<SurfaceRef, Common::Rectangle<u32>>;
using PageMap = boost::icl::interval_map<u32, int>;
@@ -139,6 +131,26 @@ public:
void ClearAll(bool flush);
private:
/// Iterate over all page indices in a range
template <typename Func>
void ForEachPage(PAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> CITRA_PAGEBITS;
for (u64 page = addr >> CITRA_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Iterates over all the surfaces in a region calling func
template <typename Func>
void ForEachSurfaceInRegion(PAddr addr, size_t size, Func&& func);
/// Get the best surface match (and its match type) for the given flags
template <MatchFlags find_flags>
SurfaceRef FindMatch(const SurfaceParams& params, ScaleMatch match_scale_type,
@@ -187,6 +199,9 @@ private:
/// Remove surface from the cache
void UnregisterSurface(const SurfaceRef& surface);
/// Unregisters all surfaces from the cache
void UnregisterAll();
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);
@@ -196,13 +211,13 @@ private:
TextureRuntime& runtime;
Pica::Regs& regs;
RendererBase& renderer;
SurfaceCache surface_cache;
PageMap cached_pages;
tsl::robin_pg_map<u64, std::vector<SurfaceRef>, Common::IdentityHash<u64>> page_table;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
SurfaceMap dirty_regions;
PageMap cached_pages;
std::vector<SurfaceRef> remove_surfaces;
u32 resolution_scale_factor;
RenderTargets render_targets;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
bool use_filter;
bool dump_textures;
bool use_custom_textures;

View File

@@ -33,6 +33,11 @@ public:
/// Returns true if the surface contains a custom material with a normal map.
bool HasNormalMap() const noexcept;
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
u64 ModificationTick() const noexcept {
return modification_tick;
}
@@ -66,6 +71,7 @@ private:
public:
bool registered = false;
bool picked = false;
bool is_custom = false;
const Material* material = nullptr;
SurfaceRegions invalid_regions;