yuzu-mainline/src/video_core/query_cache.h

397 lines
13 KiB
C++
Raw Normal View History

// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <algorithm>
#include <array>
#include <cstring>
#include <iterator>
#include <list>
#include <memory>
#include <mutex>
#include <optional>
#include <unordered_map>
2020-04-15 22:36:14 +02:00
#include <unordered_set>
#include <vector>
#include "common/assert.h"
#include "common/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
template <class QueryCache, class HostCounter>
class CounterStreamBase {
public:
explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_)
: cache{cache_}, type{type_} {}
/// Updates the state of the stream, enabling or disabling as needed.
void Update(bool enabled) {
if (enabled) {
Enable();
} else {
Disable();
}
}
/// Resets the stream to zero. It doesn't disable the query after resetting.
void Reset() {
if (current) {
current->EndQuery();
// Immediately start a new query to avoid disabling its state.
current = cache.Counter(nullptr, type);
}
last = nullptr;
}
/// Returns the current counter slicing as needed.
std::shared_ptr<HostCounter> Current() {
if (!current) {
return nullptr;
}
current->EndQuery();
last = std::move(current);
current = cache.Counter(last, type);
return last;
}
/// Returns true when the counter stream is enabled.
bool IsEnabled() const {
2020-02-14 01:11:21 +01:00
return current != nullptr;
}
private:
/// Enables the stream.
void Enable() {
if (current) {
return;
}
current = cache.Counter(last, type);
}
// Disables the stream.
void Disable() {
if (current) {
current->EndQuery();
}
last = std::exchange(current, nullptr);
}
QueryCache& cache;
const VideoCore::QueryType type;
std::shared_ptr<HostCounter> current;
std::shared_ptr<HostCounter> last;
};
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase {
public:
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::MemoryManager& gpu_memory_)
: rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
FlushAndRemoveRegion(addr, size);
}
void FlushRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
FlushAndRemoveRegion(addr, size);
}
/**
* Records a query in GPU mapped memory, potentially marked with a timestamp.
* @param gpu_addr GPU address to flush to when the mapped memory is read.
* @param type Query type, e.g. SamplesPassed.
* @param timestamp Timestamp, when empty the flushed query is assumed to be short.
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
ASSERT_OR_EXECUTE(cpu_addr, return;);
u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
configuration: implement per-game configurations (#4098) * Switch game settings to use a pointer In order to add full per-game settings, we need to be able to tell yuzu to switch to using either the global or game configuration. Using a pointer makes it easier to switch. * configuration: add new UI without changing existing funcitonality The new UI also adds General, System, Graphics, Advanced Graphics, and Audio tabs, but as yet they do nothing. This commit keeps yuzu to the same functionality as originally branched. * configuration: Rename files These weren't included in the last commit. Now they are. * configuration: setup global configuration checkbox Global config checkbox now enables/disables the appropriate tabs in the game properties dialog. The use global configuration setting is now saved to the config, defaulting to true. This also addresses some changes requested in the PR. * configuration: swap to per-game config memory for properties dialog Does not set memory going in-game. Swaps to game values when opening the properties dialog, then swaps back when closing it. Uses a `memcpy` to swap. Also implements saving config files, limited to certain groups of configurations so as to not risk setting unsafe configurations. * configuration: change config interfaces to use config-specific pointers When a game is booted, we need to be able to open the configuration dialogs without changing the settings pointer in the game's emualtion. A new pointer specific to just the configuration dialogs can be used to separate changes to just those config dialogs without affecting the emulation. * configuration: boot a game using per-game settings Swaps values where needed to boot a game. * configuration: user correct config during emulation Creates a new pointer specifically for modifying the configuration while emulation is in progress. Both the regular configuration dialog and the game properties dialog now use the pointer Settings::config_values to focus edits to the correct struct. * settings: split Settings::values into two different structs By splitting the settings into two mutually exclusive structs, it becomes easier, as a developer, to determine how to use the Settings structs after per-game configurations is merged. Other benefits include only duplicating the required settings in memory. * settings: move use_docked_mode to Controls group `use_docked_mode` is set in the input settings and cannot be accessed from the system settings. Grouping it with system settings causes it to be saved with per-game settings, which may make transferring configs more difficult later on, especially since docked mode cannot be set from within the game properties dialog. * configuration: Fix the other yuzu executables and a regression In main.cpp, we have to get the title ID before the ROM is loaded, else the renderer will reflect only the global settings and now the user's game specific settings. * settings: use a template to duplicate memory for each setting Replaces the type of each variable in the Settings::Values struct with a new class that allows basic data reading and writing. The new struct Settings::Setting duplicates the data in memory and can manage global overrides per each setting. * configuration: correct add-ons config and swap settings when apropriate Any add-ons interaction happens directly through the global values struct. Swapping bewteen structs now also includes copying the necessary global configs that cannot be changed nor saved in per-game settings. General and System config menus now update based on whether it is viewing the global or per-game settings. * settings: restore old values struct No longer needed with the Settings::Setting class template. * configuration: implement hierarchical game properties dialog This sets the apropriate global or local data in each setting. * clang format * clang format take 2 can the docker container save this? * address comments and style issues * config: read and write settings with global awareness Adds new functions to read and write settings while keeping the global state in focus. Files now generated per-game are much smaller since often they only need address the global state. * settings: restore global state when necessary Upon closing a game or the game properties dialog, we need to restore all global settings to the original global state so that we can properly open the configuration dialog or boot a different game. * configuration: guard setting values incorrectly This disables setting values while a game is running if the setting is overwritten by a per game setting. * config: don't write local settings in the global config Simple guards to prevent writing the wrong settings in the wrong files. * configuration: add comments, assume less, and clang format No longer assumes that a disabled UI element means the global state is turned off, instead opting to directly answer that question. Still however assumes a game is running if it is in that state. * configuration: fix a logic error Should not be negated * restore settings' global state regardless of accept/cancel Fixes loading a properties dialog and causing the global config dialog to show local settings. * fix more logic errors Fixed the frame limit would set the global setting from the game properties dialog. Also strengthened the Settings::Setting member variables and simplified the logic in config reading (ReadSettingGlobal). * fix another logic error In my efforts to guard RestoreGlobalState, I accidentally negated the IsPowered condition. * configure_audio: set toggle_stretched_audio to tristate * fixed custom rtc and rng seed overwriting the global value * clang format * rebased * clang format take 4 * address my own review Basically revert unintended changes * settings: literal instead of casting "No need to cast, use 1U instead" Thanks, Morph! Co-authored-by: Morph <39850852+Morph1984@users.noreply.github.com> * Revert "settings: literal instead of casting " This reverts commit 95e992a87c898f3e882ffdb415bb0ef9f80f613f. * main: fix status buttons reporting wrong settings after stop emulation * settings: Log UseDockedMode in the Controls group This should have happened when use_docked_mode was moved over to the controls group internally. This just reflects this in the log. * main: load settings if the file has a title id In other words, don't exit if the loader has trouble getting a title id. * use a zero * settings: initalize resolution factor with constructor instead of casting * Revert "settings: initalize resolution factor with constructor instead of casting" This reverts commit 54c35ecb46a29953842614620f9b7de1aa9d5dc8. * configure_graphics: guard device selector when Vulkan is global Prevents the user from editing the device selector if Vulkan is the global renderer backend. Also resets the vulkan_device variable when the users switches back-and-forth between global and Vulkan. * address reviewer concerns Changes function variables to const wherever they don't need to be changed. Sets Settings::Setting to final as it should not be inherited from. Sets ConfigurationShared::use_global_text to static. Co-Authored-By: VolcaEM <volcaem@users.noreply.github.com> * main: load per-game settings after LoadROM This prevents `Restart Emulation` from restoring the global settings *after* the per-game settings were applied. Thanks to BSoDGamingYT for finding this bug. * Revert "main: load per-game settings after LoadROM" This reverts commit 9d0d48c52d2dcf3bfb1806cc8fa7d5a271a8a804. * main: only restore global settings when necessary Loading the per-game settings cannot happen after the ROM is loaded, so we have to specify when to restore the global state. Again thanks to BSoD for finding the bug. * configuration_shared: address reviewer concerns except operator overrides Dropping operator override usage in next commit. Co-Authored-By: LC <lioncash@users.noreply.github.com> * settings: Drop operator overrides from Setting template Requires using GetValue and SetValue explicitly. Also reverts a change that broke title ID formatting in the game properties dialog. * complete rebase * configuration_shared: translate "Use global configuration" Uses ConfigurePerGame to do so, since its usage, at least as of now, corresponds with ConfigurationShared. * configure_per_game: address reviewer concern As far as I understand, it prevents the program from unnecessarily copying strings. Co-Authored-By: LC <lioncash@users.noreply.github.com> Co-authored-by: Morph <39850852+Morph1984@users.noreply.github.com> Co-authored-by: VolcaEM <volcaem@users.noreply.github.com> Co-authored-by: LC <lioncash@users.noreply.github.com>
2020-07-10 04:42:09 +02:00
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
AsyncFlushQuery(*cpu_addr);
}
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
const auto& regs = maxwell3d.regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
/// Resets a counter to zero. It doesn't disable the query after resetting.
void ResetCounter(VideoCore::QueryType type) {
std::unique_lock lock{mutex};
Stream(type).Reset();
}
/// Disable all active streams. Expected to be called at the end of a command buffer.
void DisableStreams() {
std::unique_lock lock{mutex};
for (auto& stream : streams) {
stream.Update(false);
}
}
/// Returns a new host counter.
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
VideoCore::QueryType type) {
return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
type);
}
/// Returns the counter stream of the specified type.
CounterStream& Stream(VideoCore::QueryType type) {
return streams[static_cast<std::size_t>(type)];
}
2020-02-14 01:11:21 +01:00
/// Returns the counter stream of the specified type.
const CounterStream& Stream(VideoCore::QueryType type) const {
return streams[static_cast<std::size_t>(type)];
}
2020-04-15 22:36:14 +02:00
void CommitAsyncFlushes() {
2020-04-16 18:29:53 +02:00
committed_flushes.push_back(uncommitted_flushes);
uncommitted_flushes.reset();
2020-04-15 22:36:14 +02:00
}
2020-04-16 18:29:53 +02:00
bool HasUncommittedFlushes() const {
return uncommitted_flushes != nullptr;
2020-04-15 22:36:14 +02:00
}
2020-04-16 18:29:53 +02:00
bool ShouldWaitAsyncFlushes() const {
if (committed_flushes.empty()) {
2020-04-15 22:36:14 +02:00
return false;
}
2020-04-16 18:29:53 +02:00
return committed_flushes.front() != nullptr;
2020-04-15 22:36:14 +02:00
}
void PopAsyncFlushes() {
2020-04-16 18:29:53 +02:00
if (committed_flushes.empty()) {
2020-04-15 22:36:14 +02:00
return;
}
2020-04-16 18:29:53 +02:00
auto& flush_list = committed_flushes.front();
2020-04-15 22:36:14 +02:00
if (!flush_list) {
2020-04-16 18:29:53 +02:00
committed_flushes.pop_front();
2020-04-15 22:36:14 +02:00
return;
}
for (VAddr query_address : *flush_list) {
FlushAndRemoveRegion(query_address, 4);
}
2020-04-16 18:29:53 +02:00
committed_flushes.pop_front();
2020-04-15 22:36:14 +02:00
}
private:
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
const u64 addr_begin = addr;
const u64 addr_end = addr_begin + size;
const auto in_range = [addr_begin, addr_end](const CachedQuery& query) {
const u64 cache_begin = query.GetCpuAddr();
const u64 cache_end = cache_begin + query.SizeInBytes();
return cache_begin < addr_end && addr_begin < cache_end;
};
const u64 page_end = addr_end >> YUZU_PAGEBITS;
for (u64 page = addr_begin >> YUZU_PAGEBITS; page <= page_end; ++page) {
const auto& it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
continue;
}
auto& contents = it->second;
for (auto& query : contents) {
if (!in_range(query)) {
continue;
}
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.SizeInBytes(), -1);
query.Flush();
}
std::erase_if(contents, in_range);
}
}
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
const u64 page = static_cast<u64>(cpu_addr) >> YUZU_PAGEBITS;
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
host_ptr);
}
/// Tries to a get a cached query. Returns nullptr on failure.
CachedQuery* TryGet(VAddr addr) {
const u64 page = static_cast<u64>(addr) >> YUZU_PAGEBITS;
const auto it = cached_queries.find(page);
if (it == std::end(cached_queries)) {
return nullptr;
}
auto& contents = it->second;
2020-04-06 01:26:15 +02:00
const auto found = std::find_if(std::begin(contents), std::end(contents),
[addr](auto& query) { return query.GetCpuAddr() == addr; });
return found != std::end(contents) ? &*found : nullptr;
}
2020-04-15 22:36:14 +02:00
void AsyncFlushQuery(VAddr addr) {
2020-04-16 18:29:53 +02:00
if (!uncommitted_flushes) {
uncommitted_flushes = std::make_shared<std::vector<VAddr>>();
2020-04-15 22:36:14 +02:00
}
uncommitted_flushes->push_back(addr);
2020-04-15 22:36:14 +02:00
}
static constexpr std::uintptr_t YUZU_PAGESIZE = 4096;
static constexpr unsigned YUZU_PAGEBITS = 12;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
2020-04-15 22:36:14 +02:00
std::shared_ptr<std::vector<VAddr>> uncommitted_flushes{};
std::list<std::shared_ptr<std::vector<VAddr>>> committed_flushes;
};
template <class QueryCache, class HostCounter>
class HostCounterBase {
public:
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
: dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
// Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
2020-02-14 01:11:21 +01:00
constexpr u64 depth_threshold = 96;
if (depth > depth_threshold) {
depth = 0;
base_result = dependency->Query();
dependency = nullptr;
}
}
2020-02-14 01:11:21 +01:00
virtual ~HostCounterBase() = default;
/// Returns the current value of the query.
u64 Query() {
if (result) {
return *result;
}
u64 value = BlockingQuery() + base_result;
if (dependency) {
value += dependency->Query();
dependency = nullptr;
}
2020-02-14 01:11:21 +01:00
result = value;
return *result;
}
/// Returns true when flushing this query will potentially wait.
bool WaitPending() const noexcept {
return result.has_value();
}
u64 Depth() const noexcept {
return depth;
}
protected:
/// Returns the value of query from the backend API blocking as needed.
virtual u64 BlockingQuery() const = 0;
private:
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
std::optional<u64> result; ///< Filled with the already returned value.
u64 depth; ///< Number of nested dependencies.
u64 base_result = 0; ///< Equivalent to nested dependencies value.
};
template <class HostCounter>
class CachedQueryBase {
public:
explicit CachedQueryBase(VAddr cpu_addr_, u8* host_ptr_)
: cpu_addr{cpu_addr_}, host_ptr{host_ptr_} {}
2020-02-14 01:11:21 +01:00
virtual ~CachedQueryBase() = default;
2020-02-14 01:11:21 +01:00
CachedQueryBase(CachedQueryBase&&) noexcept = default;
CachedQueryBase(const CachedQueryBase&) = delete;
2020-02-14 01:11:21 +01:00
CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
CachedQueryBase& operator=(const CachedQueryBase&) = delete;
/// Flushes the query to guest memory.
virtual void Flush() {
// When counter is nullptr it means that it's just been reseted. We are supposed to write a
// zero in these cases.
const u64 value = counter ? counter->Query() : 0;
std::memcpy(host_ptr, &value, sizeof(u64));
if (timestamp) {
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
}
}
/// Binds a counter to this query.
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
if (counter) {
// If there's an old counter set it means the query is being rewritten by the game.
// To avoid losing the data forever, flush here.
Flush();
}
counter = std::move(counter_);
timestamp = timestamp_;
}
VAddr GetCpuAddr() const noexcept {
return cpu_addr;
}
u64 SizeInBytes() const noexcept {
return SizeInBytes(timestamp.has_value());
}
2020-02-14 01:11:21 +01:00
static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
}
protected:
/// Returns true when querying the counter may potentially block.
bool WaitPending() const noexcept {
return counter && counter->WaitPending();
}
private:
static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
VAddr cpu_addr; ///< Guest CPU address.
u8* host_ptr; ///< Writable host pointer.
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
};
} // namespace VideoCommon