rasterizer_cache: Move sampler management out of rasterizer cache

This commit is contained in:
GPUCode
2023-04-26 00:19:52 +03:00
parent 3ef2957b95
commit cba8a2a18e
10 changed files with 375 additions and 102 deletions

View File

@@ -41,6 +41,8 @@ add_library(video_core STATIC
rasterizer_cache/rasterizer_cache.cpp
rasterizer_cache/rasterizer_cache.h
rasterizer_cache/rasterizer_cache_base.h
rasterizer_cache/sampler_params.h
rasterizer_cache/slot_vector.h
rasterizer_cache/surface_base.cpp
rasterizer_cache/surface_base.h
rasterizer_cache/surface_params.cpp

View File

@@ -35,7 +35,18 @@ RasterizerCache<T>::RasterizerCache(Memory::MemorySystem& memory_,
renderer{renderer_}, resolution_scale_factor{renderer.GetResolutionScaleFactor()},
use_filter{Settings::values.texture_filter.GetValue() != Settings::TextureFilter::None},
dump_textures{Settings::values.dump_textures.GetValue()},
use_custom_textures{Settings::values.custom_textures.GetValue()} {}
use_custom_textures{Settings::values.custom_textures.GetValue()} {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
// Create null handles for all cached resources
void(slot_samplers.insert(runtime, SamplerParams{
.mag_filter = TextureConfig::TextureFilter::Linear,
.min_filter = TextureConfig::TextureFilter::Linear,
.mip_filter = TextureConfig::TextureFilter::Linear,
.wrap_s = TextureConfig::WrapMode::ClampToBorder,
.wrap_t = TextureConfig::WrapMode::ClampToBorder,
}));
}
template <class T>
RasterizerCache<T>::~RasterizerCache() {
@@ -236,6 +247,34 @@ bool RasterizerCache<T>::AccelerateFill(const GPU::Regs::MemoryFillConfig& confi
return true;
}
template <class T>
RasterizerCache<T>::Sampler& RasterizerCache<T>::GetSampler(SamplerId sampler_id) {
return slot_samplers[sampler_id];
}
template <class T>
RasterizerCache<T>::Sampler& RasterizerCache<T>::GetSampler(
const Pica::TexturingRegs::TextureConfig& config) {
const SamplerParams params = {
.mag_filter = config.mag_filter,
.min_filter = config.min_filter,
.mip_filter = config.mip_filter,
.wrap_s = config.wrap_s,
.wrap_t = config.wrap_t,
.border_color = config.border_color.raw,
.lod_min = config.lod.min_level,
.lod_max = config.lod.max_level,
.lod_bias = config.lod.bias,
};
auto [it, is_new] = samplers.try_emplace(params);
if (is_new) {
it->second = slot_samplers.insert(runtime, params);
}
return slot_samplers[it->second];
}
template <class T>
void RasterizerCache<T>::CopySurface(const SurfaceRef& src_surface, const SurfaceRef& dst_surface,
SurfaceInterval copy_interval) {
@@ -322,10 +361,8 @@ RasterizerCache<T>::SurfaceRect_Tuple RasterizerCache<T>::GetSurfaceSubRect(
// Attempt to find encompassing surface
SurfaceRef surface = FindMatch<MatchFlags::SubRect>(params, match_res_scale);
// Check if FindMatch failed because of res scaling
// If that's the case create a new surface with
// the dimensions of the lower res_scale surface
// to suggest it should not be used again
// Check if FindMatch failed because of res scaling. If that's the case create a new surface with
// the dimensions of the lower res_scale surface to suggest it should not be used again.
if (!surface && match_res_scale != ScaleMatch::Ignore) {
surface = FindMatch<MatchFlags::SubRect>(params, ScaleMatch::Ignore);
if (surface) {

View File

@@ -10,6 +10,7 @@
#include <vector>
#include <boost/icl/interval_map.hpp>
#include <tsl/robin_map.h>
#include "video_core/rasterizer_cache/sampler_params.h"
#include "video_core/rasterizer_cache/surface_base.h"
namespace Memory {
@@ -51,6 +52,7 @@ class RasterizerCache {
static constexpr u64 CITRA_PAGEBITS = 18;
using TextureRuntime = typename T::TextureRuntime;
using Sampler = typename T::Sampler;
using SurfaceRef = std::shared_ptr<typename T::Surface>;
using Framebuffer = typename T::Framebuffer;
@@ -89,6 +91,10 @@ public:
/// Perform hardware accelerated memory fill according to the provided configuration
bool AccelerateFill(const GPU::Regs::MemoryFillConfig& config);
/// Returns a reference to the sampler object matching the provided configuration
Sampler& GetSampler(const Pica::TexturingRegs::TextureConfig& config);
Sampler& GetSampler(SamplerId sampler_id);
/// Copy one surface's region to another
void CopySurface(const SurfaceRef& src_surface, const SurfaceRef& dst_surface,
SurfaceInterval copy_interval);
@@ -211,8 +217,10 @@ private:
TextureRuntime& runtime;
Pica::Regs& regs;
RendererBase& renderer;
tsl::robin_pg_map<u64, std::vector<SurfaceRef>, Common::IdentityHash<u64>> page_table;
std::unordered_map<TextureCubeConfig, TextureCube> texture_cube_cache;
tsl::robin_pg_map<u64, std::vector<SurfaceRef>, Common::IdentityHash<u64>> page_table;
std::unordered_map<SamplerParams, SamplerId> samplers;
SlotVector<Sampler> slot_samplers;
SurfaceMap dirty_regions;
PageMap cached_pages;
std::vector<SurfaceRef> remove_surfaces;

View File

@@ -0,0 +1,42 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/hash.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
struct SamplerParams {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color = 0;
u32 lod_min = 0;
u32 lod_max = 0;
s32 lod_bias = 0;
auto operator<=>(const SamplerParams&) const noexcept = default;
const u64 Hash() const {
return Common::ComputeHash64(this, sizeof(SamplerParams));
}
};
static_assert(std::has_unique_object_representations_v<SamplerParams>,
"SamplerParams is not suitable for hashing");
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::SamplerParams> {
std::size_t operator()(const VideoCore::SamplerParams& params) const noexcept {
return params.Hash();
}
};
} // namespace std

View File

@@ -0,0 +1,222 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bit>
#include <numeric>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
namespace VideoCore {
struct SlotId {
static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX;
}
u32 index = INVALID_INDEX;
};
template <class T>
class SlotVector {
public:
class Iterator {
friend SlotVector<T>;
public:
constexpr Iterator() = default;
Iterator& operator++() noexcept {
const u64* const bitset = slot_vector->stored_bitset.data();
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
if (id.index < size) {
do {
++id.index;
} while (id.index < size && !IsValid(bitset));
if (id.index == size) {
id.index = SlotId::INVALID_INDEX;
}
}
return *this;
}
Iterator operator++(int) noexcept {
const Iterator copy{*this};
++*this;
return copy;
}
bool operator==(const Iterator& other) const noexcept {
return id.index == other.id.index;
}
bool operator!=(const Iterator& other) const noexcept {
return id.index != other.id.index;
}
std::pair<SlotId, T*> operator*() const noexcept {
return {id, std::addressof((*slot_vector)[id])};
}
T* operator->() const noexcept {
return std::addressof((*slot_vector)[id]);
}
private:
Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
: slot_vector{slot_vector_}, id{id_} {}
bool IsValid(const u64* bitset) const noexcept {
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
}
SlotVector<T>* slot_vector;
SlotId id;
};
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
if ((bits & 1) != 0) {
values[index + bit].object.~T();
}
}
index += 64;
}
delete[] values;
}
[[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id);
return values[id.index].object;
}
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id);
return values[id.index].object;
}
template <typename... Args>
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
}
[[nodiscard]] Iterator begin() noexcept {
const auto it = std::find_if(stored_bitset.begin(), stored_bitset.end(),
[](u64 value) { return value != 0; });
if (it == stored_bitset.end()) {
return end();
}
const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
return Iterator(this, first_id);
}
[[nodiscard]] Iterator end() noexcept {
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
};
union Entry {
Entry() noexcept : dummy{} {}
~Entry() noexcept {}
NonTrivialDummy dummy;
T object;
};
void SetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] |= u64(1) << (index % 64);
}
void ResetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
}
bool ReadStorageBit(u32 index) noexcept {
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
}
void ValidateIndex(SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
}
[[nodiscard]] u32 FreeValueIndex() noexcept {
if (free_list.empty()) {
Reserve(values_capacity ? (values_capacity << 1) : 1);
}
const u32 free_index = free_list.back();
free_list.pop_back();
return free_index;
}
void Reserve(size_t new_capacity) noexcept {
Entry* const new_values = new Entry[new_capacity];
size_t index = 0;
for (u64 bits : stored_bitset) {
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
const size_t i = index + bit;
if ((bits & 1) == 0) {
continue;
}
T& old_value = values[i].object;
new (&new_values[i].object) T(std::move(old_value));
old_value.~T();
}
index += 64;
}
stored_bitset.resize((new_capacity + 63) / 64);
const size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(),
static_cast<u32>(values_capacity));
delete[] values;
values = new_values;
values_capacity = new_capacity;
}
Entry* values = nullptr;
size_t values_capacity = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
};
} // namespace VideoCore
template <>
struct std::hash<VideoCore::SlotId> {
size_t operator()(const VideoCore::SlotId& id) const noexcept {
return std::hash<u32>{}(id.index);
}
};

View File

@@ -11,11 +11,17 @@
#include "common/math_util.h"
#include "common/vector_math.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/rasterizer_cache/slot_vector.h"
namespace VideoCore {
using SurfaceInterval = boost::icl::right_open_interval<PAddr>;
using SamplerId = SlotId;
/// Fake sampler ID for null samplers
constexpr SamplerId NULL_SAMPLER_ID{0};
struct Offset {
constexpr auto operator<=>(const Offset&) const noexcept = default;

View File

@@ -97,16 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory,
u8 framebuffer_data[4] = {0, 0, 0, 1};
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
// Create sampler objects
for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
state.texture_units[i].sampler = texture_samplers[i].sampler.handle;
}
// Create cubemap texture and sampler objects
texture_cube_sampler.Create();
state.texture_cube_unit.sampler = texture_cube_sampler.sampler.handle;
// Generate VAO
sw_vao.Create();
hw_vao.Create();
@@ -538,7 +528,8 @@ void RasterizerOpenGL::SyncTextureUnits(const Framebuffer& framebuffer) {
}
// Sync texture unit sampler
texture_samplers[texture_index].SyncWithConfig(texture.config);
const Sampler& sampler = res_cache.GetSampler(texture.config);
state.texture_units[texture_index].sampler = sampler.Handle();
// Bind the texture provided by the rasterizer cache
auto surface = res_cache.GetTextureSurface(texture);
@@ -590,9 +581,10 @@ void RasterizerOpenGL::BindTextureCube(const Pica::TexturingRegs::FullTextureCon
};
auto surface = res_cache.GetTextureCube(config);
texture_cube_sampler.SyncWithConfig(texture.config);
Sampler& sampler = res_cache.GetSampler(texture.config);
state.texture_cube_unit.texture_cube = surface->Handle();
state.texture_cube_unit.sampler = sampler.Handle();
state.texture_units[0].texture_2d = 0;
}
@@ -608,7 +600,7 @@ void RasterizerOpenGL::BindMaterial(u32 texture_index, Surface& surface) {
glBindSampler(unit.id, sampler);
};
const GLuint sampler = texture_samplers[texture_index].sampler.handle;
const GLuint sampler = state.texture_units[texture_index].sampler;
if (surface.HasNormalMap()) {
if (regs.lighting.disable) {
LOG_WARNING(Render_OpenGL, "Custom normal map used but scene has no light enabled");
@@ -798,64 +790,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
return true;
}
void RasterizerOpenGL::SamplerInfo::Create() {
sampler.Create();
mag_filter = min_filter = mip_filter = TextureConfig::Linear;
wrap_s = wrap_t = TextureConfig::Repeat;
border_color = 0;
lod_min = lod_max = 0;
// default is 1000 and -1000
// Other attributes have correct defaults
glSamplerParameterf(sampler.handle, GL_TEXTURE_MAX_LOD, static_cast<float>(lod_max));
glSamplerParameterf(sampler.handle, GL_TEXTURE_MIN_LOD, static_cast<float>(lod_min));
}
void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
const Pica::TexturingRegs::TextureConfig& config) {
GLuint s = sampler.handle;
if (mag_filter != config.mag_filter) {
mag_filter = config.mag_filter;
glSamplerParameteri(s, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureMagFilterMode(mag_filter));
}
if (min_filter != config.min_filter || mip_filter != config.mip_filter) {
min_filter = config.min_filter;
mip_filter = config.mip_filter;
glSamplerParameteri(s, GL_TEXTURE_MIN_FILTER,
PicaToGL::TextureMinFilterMode(min_filter, mip_filter));
}
if (wrap_s != config.wrap_s) {
wrap_s = config.wrap_s;
glSamplerParameteri(s, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(wrap_s));
}
if (wrap_t != config.wrap_t) {
wrap_t = config.wrap_t;
glSamplerParameteri(s, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(wrap_t));
}
if (wrap_s == TextureConfig::ClampToBorder || wrap_t == TextureConfig::ClampToBorder) {
if (border_color != config.border_color.raw) {
border_color = config.border_color.raw;
auto gl_color = PicaToGL::ColorRGBA8(border_color);
glSamplerParameterfv(s, GL_TEXTURE_BORDER_COLOR, gl_color.AsArray());
}
}
if (lod_min != config.lod.min_level) {
lod_min = config.lod.min_level;
glSamplerParameterf(s, GL_TEXTURE_MIN_LOD, static_cast<float>(lod_min));
}
if (lod_max != config.lod.max_level) {
lod_max = config.lod.max_level;
glSamplerParameterf(s, GL_TEXTURE_MAX_LOD, static_cast<float>(lod_max));
}
}
void RasterizerOpenGL::SyncClipEnabled() {
state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0;
}

View File

@@ -55,28 +55,6 @@ private:
void SyncFixedState() override;
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
OGLSampler sampler;
/// Creates the sampler object, initializing its state so that it's in sync with the
/// SamplerInfo struct.
void Create();
/// Syncs the sampler object with the config, updating any necessary state.
void SyncWithConfig(const TextureConfig& config);
private:
TextureConfig::TextureFilter mag_filter;
TextureConfig::TextureFilter min_filter;
TextureConfig::TextureFilter mip_filter;
TextureConfig::WrapMode wrap_s;
TextureConfig::WrapMode wrap_t;
u32 border_color;
u32 lod_min;
u32 lod_max;
};
/// Syncs the clip enabled status to match the PICA register
void SyncClipEnabled();
@@ -163,7 +141,6 @@ private:
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
std::array<bool, 16> hw_vao_enabled_attributes{};
std::array<SamplerInfo, 3> texture_samplers;
GLsizeiptr texture_buffer_size;
OGLStreamBuffer vertex_buffer;
OGLStreamBuffer uniform_buffer;
@@ -175,8 +152,6 @@ private:
std::size_t uniform_size_aligned_vs;
std::size_t uniform_size_aligned_fs;
SamplerInfo texture_cube_sampler;
OGLTexture texture_buffer_lut_lf;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;

View File

@@ -9,6 +9,7 @@
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
namespace OpenGL {
@@ -692,4 +693,30 @@ Framebuffer::Framebuffer(TextureRuntime& runtime, Surface* const color, u32 colo
Framebuffer::~Framebuffer() = default;
Sampler::Sampler(TextureRuntime& runtime, VideoCore::SamplerParams params) {
const GLenum mag_filter = PicaToGL::TextureMagFilterMode(params.mag_filter);
const GLenum min_filter = PicaToGL::TextureMinFilterMode(params.min_filter, params.mip_filter);
const GLenum wrap_s = PicaToGL::WrapMode(params.wrap_s);
const GLenum wrap_t = PicaToGL::WrapMode(params.wrap_t);
const Common::Vec4f gl_color = PicaToGL::ColorRGBA8(params.border_color);
const float lod_min = params.lod_min;
const float lod_max = params.lod_max;
sampler.Create();
const GLuint handle = sampler.handle;
glSamplerParameteri(handle, GL_TEXTURE_MAG_FILTER, mag_filter);
glSamplerParameteri(handle, GL_TEXTURE_MIN_FILTER, min_filter);
glSamplerParameteri(handle, GL_TEXTURE_WRAP_S, wrap_s);
glSamplerParameteri(handle, GL_TEXTURE_WRAP_T, wrap_t);
glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, gl_color.AsArray());
glSamplerParameterf(handle, GL_TEXTURE_MIN_LOD, lod_min);
glSamplerParameterf(handle, GL_TEXTURE_MAX_LOD, lod_max);
}
Sampler::~Sampler() = default;
} // namespace OpenGL

View File

@@ -223,8 +223,28 @@ private:
GLuint handle{};
};
class Sampler {
public:
explicit Sampler(TextureRuntime& runtime, VideoCore::SamplerParams params);
~Sampler();
Sampler(const Sampler&) = delete;
Sampler& operator=(const Sampler&) = delete;
Sampler(Sampler&&) = default;
Sampler& operator=(Sampler&&) = default;
[[nodiscard]] GLuint Handle() const noexcept {
return sampler.handle;
}
private:
OGLSampler sampler;
};
struct Traits {
using TextureRuntime = OpenGL::TextureRuntime;
using Sampler = OpenGL::Sampler;
using Surface = OpenGL::Surface;
using Framebuffer = OpenGL::Framebuffer;
};