rasterizer_cache: Avoid redundant texture copies when using texcubes

This commit is contained in:
GPUCode
2023-03-08 10:11:47 +02:00
parent 8721456944
commit eeb1ff7965
5 changed files with 72 additions and 52 deletions

View File

@ -5,6 +5,7 @@
#pragma once
#include <boost/container/small_vector.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/alignment.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
@ -627,6 +628,8 @@ auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> Surf
}
auto [it, new_surface] = texture_cube_cache.try_emplace(config);
CubeParams& params = it->second;
if (new_surface) {
const SurfaceParams cube_params = {
.addr = config.px,
@ -638,11 +641,12 @@ auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> Surf
.pixel_format = PixelFormatFromTextureFormat(config.format),
.type = SurfaceType::Texture,
};
it->second = CreateSurface(cube_params);
params.cube_id = CreateSurface(cube_params);
}
const SurfaceId cube_id = it->second;
const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz};
const std::array addresses = {
config.px, config.nx, config.py, config.ny, config.pz, config.nz,
};
for (std::size_t i = 0; i < addresses.size(); i++) {
Pica::Texture::TextureInfo info = {
@ -654,25 +658,27 @@ auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> Surf
info.SetDefaultStride();
Surface& face_surface = GetTextureSurface(info, config.levels - 1);
Surface& cube = slot_surfaces[cube_id];
Surface& cube = slot_surfaces[params.cube_id];
const u32 face = static_cast<u32>(i);
if (face_surface.ModificationTick() != params.ticks[i]) {
const u32 scaled_size = cube.GetScaledWidth();
for (u32 level = 0; level < face_surface.levels; level++) {
const TextureCopy texture_copy = {
.src_level = level,
.dst_level = level,
.src_layer = 0,
.dst_layer = face,
.dst_layer = static_cast<u32>(i),
.src_offset = {0, 0},
.dst_offset = {0, 0},
.extent = {scaled_size >> level, scaled_size >> level},
};
runtime.CopyTextures(face_surface, cube, texture_copy);
}
params.ticks[i] = face_surface.ModificationTick();
}
}
return slot_surfaces[cube_id];
return slot_surfaces[params.cube_id];
}
template <class T>
@ -886,7 +892,7 @@ void RasterizerCache<T>::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s
const auto NotifyValidated = [&](SurfaceInterval interval) {
level_regions.erase(interval);
surface.invalid_regions.erase(interval);
surface.MarkValid(interval);
};
while (!level_regions.empty()) {
@ -1245,7 +1251,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
ASSERT(addr >= region_owner.addr && addr + size <= region_owner.end);
// Surfaces can't have a gap
ASSERT(region_owner.width == region_owner.stride);
region_owner.invalid_regions.erase(invalid_interval);
region_owner.MarkValid(invalid_interval);
}
ForEachSurfaceInRegion(addr, size, [&](SurfaceId surface_id, Surface& surface) {
@ -1262,7 +1268,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
}
const SurfaceInterval interval = surface.GetInterval() & invalid_interval;
surface.invalid_regions.insert(interval);
surface.MarkInvalid(interval);
// If the surface has no salvageable data it should be removed from the cache to avoid
// clogging the data structure
@ -1287,7 +1293,7 @@ template <class T>
SurfaceId RasterizerCache<T>::CreateSurface(const SurfaceParams& params) {
SurfaceId surface_id = slot_surfaces.insert(runtime, params);
Surface& surface = slot_surfaces[surface_id];
surface.invalid_regions.insert(surface.GetInterval());
surface.MarkInvalid(surface.GetInterval());
return surface_id;
}

View File

@ -9,8 +9,6 @@
#include <unordered_map>
#include <unordered_set>
#include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/thread_worker.h"
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h"
@ -41,7 +39,7 @@ DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
class CustomTexManager;
template <class T>
class RasterizerCache : NonCopyable {
class RasterizerCache {
/// Address shift for caching surfaces into a hash table
static constexpr u64 CITRA_PAGEBITS = 18;
@ -63,6 +61,11 @@ class RasterizerCache : NonCopyable {
SurfaceId depth_surface_id;
};
struct CubeParams {
SurfaceId cube_id;
std::array<s64, 6> ticks{};
};
public:
RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime);
@ -203,13 +206,13 @@ private:
SurfaceMap dirty_regions;
std::vector<SurfaceId> remove_surfaces;
u16 resolution_scale_factor;
std::unordered_map<TextureCubeConfig, SurfaceId> texture_cube_cache;
// The internal surface cache is based on buckets of 256KB.
// This fits better for the purpose of this cache as textures are normaly
// large in size.
std::unordered_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<SamplerParams, SamplerId> samplers;
std::unordered_map<TextureCubeConfig, CubeParams> texture_cube_cache;
SlotVector<Surface> slot_surfaces;
SlotVector<Sampler> slot_samplers;

View File

@ -9,8 +9,6 @@
namespace VideoCore {
SurfaceBase::SurfaceBase() = default;
SurfaceBase::SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {}
bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const {

View File

@ -14,9 +14,44 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
class SurfaceBase : public SurfaceParams {
public:
SurfaceBase();
explicit SurfaceBase(const SurfaceParams& params);
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
u64 ModificationTick() const noexcept {
return modification_tick;
}
CustomPixelFormat CustomFormat() const noexcept {
return custom_format;
}
bool IsCustom() const noexcept {
return is_custom;
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
}
void MarkValid(SurfaceInterval interval) {
invalid_regions.erase(interval);
modification_tick++;
}
void MarkInvalid(SurfaceInterval interval) {
invalid_regions.insert(interval);
modification_tick++;
}
bool IsFullyInvalid() const {
auto interval = GetInterval();
return *invalid_regions.equal_range(interval).first == interval;
}
/// Returns true when this surface can be used to fill the fill_interval of dest_surface
bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
@ -29,28 +64,6 @@ public:
/// Returns the clear value used to validate another surface from this fill surface
ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format);
bool IsCustom() const noexcept {
return is_custom;
}
CustomPixelFormat CustomFormat() const noexcept {
return custom_format;
}
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
}
bool IsFullyInvalid() const {
auto interval = GetInterval();
return *invalid_regions.equal_range(interval).first == interval;
}
private:
/// Returns the fill buffer value starting from copy_addr
std::array<u8, 4> MakeFillBuffer(PAddr copy_addr);
@ -63,6 +76,7 @@ public:
SurfaceRegions invalid_regions;
std::array<u8, 4> fill_data;
u32 fill_size = 0;
u64 modification_tick = 1;
};
} // namespace VideoCore

View File

@ -617,7 +617,6 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf
}
void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
LOG_WARNING(Render_Vulkan, "Using texture cube, might be slow");
using CubeFace = Pica::TexturingRegs::CubeFace;
const VideoCore::TextureCubeConfig config = {
.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX),