Merge pull request #3999 from ReinUsesLisp/opt-tex-cache

texture_cache: Optimize GetSurfacesInRegion
This commit is contained in:
bunnei 2020-05-31 17:02:29 -04:00 committed by GitHub
commit f7debcaa04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 30 additions and 25 deletions

View File

@ -14,6 +14,7 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <boost/container/small_vector.hpp>
#include <boost/icl/interval_map.hpp> #include <boost/icl/interval_map.hpp>
#include <boost/range/iterator_range.hpp> #include <boost/range/iterator_range.hpp>
@ -53,6 +54,7 @@ using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig;
template <typename TSurface, typename TView> template <typename TSurface, typename TView>
class TextureCache { class TextureCache {
using VectorSurface = boost::container::small_vector<TSurface, 1>;
public: public:
void InvalidateRegion(VAddr addr, std::size_t size) { void InvalidateRegion(VAddr addr, std::size_t size) {
@ -308,18 +310,20 @@ public:
dst_surface.first->MarkAsModified(true, Tick()); dst_surface.first->MarkAsModified(true, Tick());
} }
TSurface TryFindFramebufferSurface(VAddr addr) { TSurface TryFindFramebufferSurface(VAddr addr) const {
if (!addr) { if (!addr) {
return nullptr; return nullptr;
} }
const VAddr page = addr >> registry_page_bits; const VAddr page = addr >> registry_page_bits;
std::vector<TSurface>& list = registry[page]; const auto it = registry.find(page);
for (auto& surface : list) { if (it == registry.end()) {
if (surface->GetCpuAddr() == addr) { return nullptr;
return surface;
}
} }
return nullptr; const auto& list = it->second;
const auto found = std::find_if(list.begin(), list.end(), [addr](const auto& surface) {
return surface->GetCpuAddr() == addr;
});
return found != list.end() ? *found : nullptr;
} }
u64 Tick() { u64 Tick() {
@ -498,7 +502,7 @@ private:
* @param untopological Indicates to the recycler that the texture has no way * @param untopological Indicates to the recycler that the texture has no way
* to match the overlaps due to topological reasons. * to match the overlaps due to topological reasons.
**/ **/
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, RecycleStrategy PickStrategy(VectorSurface& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
if (Settings::IsGPULevelExtreme()) { if (Settings::IsGPULevelExtreme()) {
return RecycleStrategy::Flush; return RecycleStrategy::Flush;
@ -538,9 +542,8 @@ private:
* @param untopological Indicates to the recycler that the texture has no way to match the * @param untopological Indicates to the recycler that the texture has no way to match the
* overlaps due to topological reasons. * overlaps due to topological reasons.
**/ **/
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, std::pair<TSurface, TView> RecycleSurface(VectorSurface& overlaps, const SurfaceParams& params,
const SurfaceParams& params, const GPUVAddr gpu_addr, const GPUVAddr gpu_addr, const bool preserve_contents,
const bool preserve_contents,
const MatchTopologyResult untopological) { const MatchTopologyResult untopological) {
const bool do_load = preserve_contents && Settings::IsGPULevelExtreme(); const bool do_load = preserve_contents && Settings::IsGPULevelExtreme();
for (auto& surface : overlaps) { for (auto& surface : overlaps) {
@ -650,7 +653,7 @@ private:
* @param params The parameters on the new surface. * @param params The parameters on the new surface.
* @param gpu_addr The starting address of the new surface. * @param gpu_addr The starting address of the new surface.
**/ **/
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, std::optional<std::pair<TSurface, TView>> TryReconstructSurface(VectorSurface& overlaps,
const SurfaceParams& params, const SurfaceParams& params,
const GPUVAddr gpu_addr) { const GPUVAddr gpu_addr) {
if (params.target == SurfaceTarget::Texture3D) { if (params.target == SurfaceTarget::Texture3D) {
@ -708,7 +711,7 @@ private:
* @param preserve_contents Indicates that the new surface should be loaded from memory or * @param preserve_contents Indicates that the new surface should be loaded from memory or
* left blank. * left blank.
*/ */
std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(VectorSurface& overlaps,
const SurfaceParams& params, const SurfaceParams& params,
const GPUVAddr gpu_addr, const GPUVAddr gpu_addr,
const VAddr cpu_addr, const VAddr cpu_addr,
@ -810,7 +813,7 @@ private:
TSurface& current_surface = iter->second; TSurface& current_surface = iter->second;
const auto topological_result = current_surface->MatchesTopology(params); const auto topological_result = current_surface->MatchesTopology(params);
if (topological_result != MatchTopologyResult::FullMatch) { if (topological_result != MatchTopologyResult::FullMatch) {
std::vector<TSurface> overlaps{current_surface}; VectorSurface overlaps{current_surface};
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, return RecycleSurface(overlaps, params, gpu_addr, preserve_contents,
topological_result); topological_result);
} }
@ -1126,23 +1129,25 @@ private:
} }
} }
std::vector<TSurface> GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) { VectorSurface GetSurfacesInRegion(const VAddr cpu_addr, const std::size_t size) {
if (size == 0) { if (size == 0) {
return {}; return {};
} }
const VAddr cpu_addr_end = cpu_addr + size; const VAddr cpu_addr_end = cpu_addr + size;
VAddr start = cpu_addr >> registry_page_bits;
const VAddr end = (cpu_addr_end - 1) >> registry_page_bits; const VAddr end = (cpu_addr_end - 1) >> registry_page_bits;
std::vector<TSurface> surfaces; VectorSurface surfaces;
while (start <= end) { for (VAddr start = cpu_addr >> registry_page_bits; start <= end; ++start) {
std::vector<TSurface>& list = registry[start]; const auto it = registry.find(start);
for (auto& surface : list) { if (it == registry.end()) {
if (!surface->IsPicked() && surface->Overlaps(cpu_addr, cpu_addr_end)) { continue;
surface->MarkAsPicked(true); }
surfaces.push_back(surface); for (auto& surface : it->second) {
} if (surface->IsPicked() || !surface->Overlaps(cpu_addr, cpu_addr_end)) {
continue;
}
surface->MarkAsPicked(true);
surfaces.push_back(surface);
} }
start++;
} }
for (auto& surface : surfaces) { for (auto& surface : surfaces) {
surface->MarkAsPicked(false); surface->MarkAsPicked(false);