rasterizer_cache: Avoid redundant texture copies when using texcubes

This commit is contained in:
GPUCode
2023-03-08 10:11:47 +02:00
parent 8721456944
commit eeb1ff7965
5 changed files with 72 additions and 52 deletions

View File

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h" #include "common/microprofile.h"
@ -627,6 +628,8 @@ auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> Surf
} }
auto [it, new_surface] = texture_cube_cache.try_emplace(config); auto [it, new_surface] = texture_cube_cache.try_emplace(config);
CubeParams& params = it->second;
if (new_surface) { if (new_surface) {
const SurfaceParams cube_params = { const SurfaceParams cube_params = {
.addr = config.px, .addr = config.px,
@ -638,11 +641,12 @@ auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> Surf
.pixel_format = PixelFormatFromTextureFormat(config.format), .pixel_format = PixelFormatFromTextureFormat(config.format),
.type = SurfaceType::Texture, .type = SurfaceType::Texture,
}; };
it->second = CreateSurface(cube_params); params.cube_id = CreateSurface(cube_params);
} }
const SurfaceId cube_id = it->second; const std::array addresses = {
const std::array addresses = {config.px, config.nx, config.py, config.ny, config.pz, config.nz}; config.px, config.nx, config.py, config.ny, config.pz, config.nz,
};
for (std::size_t i = 0; i < addresses.size(); i++) { for (std::size_t i = 0; i < addresses.size(); i++) {
Pica::Texture::TextureInfo info = { Pica::Texture::TextureInfo info = {
@ -654,25 +658,27 @@ auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> Surf
info.SetDefaultStride(); info.SetDefaultStride();
Surface& face_surface = GetTextureSurface(info, config.levels - 1); Surface& face_surface = GetTextureSurface(info, config.levels - 1);
Surface& cube = slot_surfaces[cube_id]; Surface& cube = slot_surfaces[params.cube_id];
const u32 face = static_cast<u32>(i); if (face_surface.ModificationTick() != params.ticks[i]) {
const u32 scaled_size = cube.GetScaledWidth(); const u32 scaled_size = cube.GetScaledWidth();
for (u32 level = 0; level < face_surface.levels; level++) { for (u32 level = 0; level < face_surface.levels; level++) {
const TextureCopy texture_copy = { const TextureCopy texture_copy = {
.src_level = level, .src_level = level,
.dst_level = level, .dst_level = level,
.src_layer = 0, .src_layer = 0,
.dst_layer = face, .dst_layer = static_cast<u32>(i),
.src_offset = {0, 0}, .src_offset = {0, 0},
.dst_offset = {0, 0}, .dst_offset = {0, 0},
.extent = {scaled_size >> level, scaled_size >> level}, .extent = {scaled_size >> level, scaled_size >> level},
}; };
runtime.CopyTextures(face_surface, cube, texture_copy); runtime.CopyTextures(face_surface, cube, texture_copy);
}
params.ticks[i] = face_surface.ModificationTick();
} }
} }
return slot_surfaces[cube_id]; return slot_surfaces[params.cube_id];
} }
template <class T> template <class T>
@ -886,7 +892,7 @@ void RasterizerCache<T>::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s
const auto NotifyValidated = [&](SurfaceInterval interval) { const auto NotifyValidated = [&](SurfaceInterval interval) {
level_regions.erase(interval); level_regions.erase(interval);
surface.invalid_regions.erase(interval); surface.MarkValid(interval);
}; };
while (!level_regions.empty()) { while (!level_regions.empty()) {
@ -1245,7 +1251,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
ASSERT(addr >= region_owner.addr && addr + size <= region_owner.end); ASSERT(addr >= region_owner.addr && addr + size <= region_owner.end);
// Surfaces can't have a gap // Surfaces can't have a gap
ASSERT(region_owner.width == region_owner.stride); ASSERT(region_owner.width == region_owner.stride);
region_owner.invalid_regions.erase(invalid_interval); region_owner.MarkValid(invalid_interval);
} }
ForEachSurfaceInRegion(addr, size, [&](SurfaceId surface_id, Surface& surface) { ForEachSurfaceInRegion(addr, size, [&](SurfaceId surface_id, Surface& surface) {
@ -1262,7 +1268,7 @@ void RasterizerCache<T>::InvalidateRegion(PAddr addr, u32 size, SurfaceId region
} }
const SurfaceInterval interval = surface.GetInterval() & invalid_interval; const SurfaceInterval interval = surface.GetInterval() & invalid_interval;
surface.invalid_regions.insert(interval); surface.MarkInvalid(interval);
// If the surface has no salvageable data it should be removed from the cache to avoid // If the surface has no salvageable data it should be removed from the cache to avoid
// clogging the data structure // clogging the data structure
@ -1287,7 +1293,7 @@ template <class T>
SurfaceId RasterizerCache<T>::CreateSurface(const SurfaceParams& params) { SurfaceId RasterizerCache<T>::CreateSurface(const SurfaceParams& params) {
SurfaceId surface_id = slot_surfaces.insert(runtime, params); SurfaceId surface_id = slot_surfaces.insert(runtime, params);
Surface& surface = slot_surfaces[surface_id]; Surface& surface = slot_surfaces[surface_id];
surface.invalid_regions.insert(surface.GetInterval()); surface.MarkInvalid(surface.GetInterval());
return surface_id; return surface_id;
} }

View File

@ -9,8 +9,6 @@
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp>
#include "common/thread_worker.h"
#include "video_core/rasterizer_cache/sampler_params.h" #include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_params.h" #include "video_core/rasterizer_cache/surface_params.h"
#include "video_core/rasterizer_cache/utils.h" #include "video_core/rasterizer_cache/utils.h"
@ -41,7 +39,7 @@ DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
class CustomTexManager; class CustomTexManager;
template <class T> template <class T>
class RasterizerCache : NonCopyable { class RasterizerCache {
/// Address shift for caching surfaces into a hash table /// Address shift for caching surfaces into a hash table
static constexpr u64 CITRA_PAGEBITS = 18; static constexpr u64 CITRA_PAGEBITS = 18;
@ -63,6 +61,11 @@ class RasterizerCache : NonCopyable {
SurfaceId depth_surface_id; SurfaceId depth_surface_id;
}; };
struct CubeParams {
SurfaceId cube_id;
std::array<s64, 6> ticks{};
};
public: public:
RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager, RasterizerCache(Memory::MemorySystem& memory, CustomTexManager& custom_tex_manager,
Runtime& runtime); Runtime& runtime);
@ -203,13 +206,13 @@ private:
SurfaceMap dirty_regions; SurfaceMap dirty_regions;
std::vector<SurfaceId> remove_surfaces; std::vector<SurfaceId> remove_surfaces;
u16 resolution_scale_factor; u16 resolution_scale_factor;
std::unordered_map<TextureCubeConfig, SurfaceId> texture_cube_cache;
// The internal surface cache is based on buckets of 256KB. // The internal surface cache is based on buckets of 256KB.
// This fits better for the purpose of this cache as textures are normaly // This fits better for the purpose of this cache as textures are normaly
// large in size. // large in size.
std::unordered_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table; std::unordered_map<u64, std::vector<SurfaceId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<SamplerParams, SamplerId> samplers; std::unordered_map<SamplerParams, SamplerId> samplers;
std::unordered_map<TextureCubeConfig, CubeParams> texture_cube_cache;
SlotVector<Surface> slot_surfaces; SlotVector<Surface> slot_surfaces;
SlotVector<Sampler> slot_samplers; SlotVector<Sampler> slot_samplers;

View File

@ -9,8 +9,6 @@
namespace VideoCore { namespace VideoCore {
SurfaceBase::SurfaceBase() = default;
SurfaceBase::SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {} SurfaceBase::SurfaceBase(const SurfaceParams& params) : SurfaceParams{params} {}
bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const { bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const {

View File

@ -14,9 +14,44 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
class SurfaceBase : public SurfaceParams { class SurfaceBase : public SurfaceParams {
public: public:
SurfaceBase();
explicit SurfaceBase(const SurfaceParams& params); explicit SurfaceBase(const SurfaceParams& params);
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
u64 ModificationTick() const noexcept {
return modification_tick;
}
CustomPixelFormat CustomFormat() const noexcept {
return custom_format;
}
bool IsCustom() const noexcept {
return is_custom;
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
}
void MarkValid(SurfaceInterval interval) {
invalid_regions.erase(interval);
modification_tick++;
}
void MarkInvalid(SurfaceInterval interval) {
invalid_regions.insert(interval);
modification_tick++;
}
bool IsFullyInvalid() const {
auto interval = GetInterval();
return *invalid_regions.equal_range(interval).first == interval;
}
/// Returns true when this surface can be used to fill the fill_interval of dest_surface /// Returns true when this surface can be used to fill the fill_interval of dest_surface
bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const;
@ -29,28 +64,6 @@ public:
/// Returns the clear value used to validate another surface from this fill surface /// Returns the clear value used to validate another surface from this fill surface
ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format); ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format);
bool IsCustom() const noexcept {
return is_custom;
}
CustomPixelFormat CustomFormat() const noexcept {
return custom_format;
}
bool Overlaps(PAddr overlap_addr, size_t overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + static_cast<PAddr>(overlap_size);
return addr < overlap_end && overlap_addr < end;
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
}
bool IsFullyInvalid() const {
auto interval = GetInterval();
return *invalid_regions.equal_range(interval).first == interval;
}
private: private:
/// Returns the fill buffer value starting from copy_addr /// Returns the fill buffer value starting from copy_addr
std::array<u8, 4> MakeFillBuffer(PAddr copy_addr); std::array<u8, 4> MakeFillBuffer(PAddr copy_addr);
@ -63,6 +76,7 @@ public:
SurfaceRegions invalid_regions; SurfaceRegions invalid_regions;
std::array<u8, 4> fill_data; std::array<u8, 4> fill_data;
u32 fill_size = 0; u32 fill_size = 0;
u64 modification_tick = 1;
}; };
} // namespace VideoCore } // namespace VideoCore

View File

@ -617,7 +617,6 @@ void RasterizerVulkan::BindShadowCube(const Pica::TexturingRegs::FullTextureConf
} }
void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) { void RasterizerVulkan::BindTextureCube(const Pica::TexturingRegs::FullTextureConfig& texture) {
LOG_WARNING(Render_Vulkan, "Using texture cube, might be slow");
using CubeFace = Pica::TexturingRegs::CubeFace; using CubeFace = Pica::TexturingRegs::CubeFace;
const VideoCore::TextureCubeConfig config = { const VideoCore::TextureCubeConfig config = {
.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX),