vk_query_cache: Implement generic query cache on Vulkan
This commit is contained in:
		| @@ -180,6 +180,8 @@ if (ENABLE_VULKAN) | |||||||
|         renderer_vulkan/vk_memory_manager.h |         renderer_vulkan/vk_memory_manager.h | ||||||
|         renderer_vulkan/vk_pipeline_cache.cpp |         renderer_vulkan/vk_pipeline_cache.cpp | ||||||
|         renderer_vulkan/vk_pipeline_cache.h |         renderer_vulkan/vk_pipeline_cache.h | ||||||
|  |         renderer_vulkan/vk_query_cache.cpp | ||||||
|  |         renderer_vulkan/vk_query_cache.h | ||||||
|         renderer_vulkan/vk_rasterizer.cpp |         renderer_vulkan/vk_rasterizer.cpp | ||||||
|         renderer_vulkan/vk_rasterizer.h |         renderer_vulkan/vk_rasterizer.h | ||||||
|         renderer_vulkan/vk_renderpass_cache.cpp |         renderer_vulkan/vk_renderpass_cache.cpp | ||||||
|   | |||||||
| @@ -88,7 +88,8 @@ private: | |||||||
|     std::shared_ptr<HostCounter> last; |     std::shared_ptr<HostCounter> last; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> | template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, | ||||||
|  |           class QueryPool> | ||||||
| class QueryCacheBase { | class QueryCacheBase { | ||||||
| public: | public: | ||||||
|     explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |     explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) | ||||||
| @@ -127,15 +128,25 @@ public: | |||||||
|  |  | ||||||
|     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. | ||||||
|     void UpdateCounters() { |     void UpdateCounters() { | ||||||
|  |         std::unique_lock lock{mutex}; | ||||||
|         const auto& regs = system.GPU().Maxwell3D().regs; |         const auto& regs = system.GPU().Maxwell3D().regs; | ||||||
|         Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); |         Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Resets a counter to zero. It doesn't disable the query after resetting. |     /// Resets a counter to zero. It doesn't disable the query after resetting. | ||||||
|     void ResetCounter(VideoCore::QueryType type) { |     void ResetCounter(VideoCore::QueryType type) { | ||||||
|  |         std::unique_lock lock{mutex}; | ||||||
|         Stream(type).Reset(); |         Stream(type).Reset(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Disable all active streams. Expected to be called at the end of a command buffer. | ||||||
|  |     void DisableStreams() { | ||||||
|  |         std::unique_lock lock{mutex}; | ||||||
|  |         for (auto& stream : streams) { | ||||||
|  |             stream.Update(false); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Returns a new host counter. |     /// Returns a new host counter. | ||||||
|     std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, |     std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency, | ||||||
|                                          VideoCore::QueryType type) { |                                          VideoCore::QueryType type) { | ||||||
| @@ -148,6 +159,9 @@ public: | |||||||
|         return streams[static_cast<std::size_t>(type)]; |         return streams[static_cast<std::size_t>(type)]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | protected: | ||||||
|  |     std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     /// Flushes a memory range to guest memory and removes it from the cache. |     /// Flushes a memory range to guest memory and removes it from the cache. | ||||||
|     void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { |     void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) { | ||||||
| @@ -213,8 +227,16 @@ private: | |||||||
| template <class QueryCache, class HostCounter> | template <class QueryCache, class HostCounter> | ||||||
| class HostCounterBase { | class HostCounterBase { | ||||||
| public: | public: | ||||||
|     explicit HostCounterBase(std::shared_ptr<HostCounter> dependency) |     explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_) | ||||||
|         : dependency{std::move(dependency)} {} |         : dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} { | ||||||
|  |         // Avoid nesting too many dependencies to avoid a stack overflow when these are deleted. | ||||||
|  |         static constexpr u64 depth_threshold = 96; | ||||||
|  |         if (depth > depth_threshold) { | ||||||
|  |             depth = 0; | ||||||
|  |             base_result = dependency->Query(); | ||||||
|  |             dependency = nullptr; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Returns the current value of the query. |     /// Returns the current value of the query. | ||||||
|     u64 Query() { |     u64 Query() { | ||||||
| @@ -222,9 +244,10 @@ public: | |||||||
|             return *result; |             return *result; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         u64 value = BlockingQuery(); |         u64 value = BlockingQuery() + base_result; | ||||||
|         if (dependency) { |         if (dependency) { | ||||||
|             value += dependency->Query(); |             value += dependency->Query(); | ||||||
|  |             dependency = nullptr; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         return *(result = value); |         return *(result = value); | ||||||
| @@ -235,6 +258,10 @@ public: | |||||||
|         return result.has_value(); |         return result.has_value(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     u64 Depth() const noexcept { | ||||||
|  |         return depth; | ||||||
|  |     } | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     /// Returns the value of query from the backend API blocking as needed. |     /// Returns the value of query from the backend API blocking as needed. | ||||||
|     virtual u64 BlockingQuery() const = 0; |     virtual u64 BlockingQuery() const = 0; | ||||||
| @@ -242,6 +269,8 @@ protected: | |||||||
| private: | private: | ||||||
|     std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. |     std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value. | ||||||
|     std::optional<u64> result;               ///< Filled with the already returned value. |     std::optional<u64> result;               ///< Filled with the already returned value. | ||||||
|  |     u64 depth;                               ///< Number of nested dependencies. | ||||||
|  |     u64 base_result = 0;                     ///< Equivalent to nested dependencies value. | ||||||
| }; | }; | ||||||
|  |  | ||||||
| template <class HostCounter> | template <class HostCounter> | ||||||
|   | |||||||
| @@ -31,15 +31,16 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { | |||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) | QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) | ||||||
|     : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, |     : VideoCommon::QueryCacheBase< | ||||||
|                                   HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>( |           QueryCache, CachedQuery, CounterStream, HostCounter, | ||||||
|                                                            gl_rasterizer)}, |           std::vector<OGLQuery>>{system, | ||||||
|  |                                  static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, | ||||||
|       gl_rasterizer{gl_rasterizer} {} |       gl_rasterizer{gl_rasterizer} {} | ||||||
|  |  | ||||||
| QueryCache::~QueryCache() = default; | QueryCache::~QueryCache() = default; | ||||||
|  |  | ||||||
| OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { | OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { | ||||||
|     auto& reserve = queries_reserve[static_cast<std::size_t>(type)]; |     auto& reserve = query_pools[static_cast<std::size_t>(type)]; | ||||||
|     OGLQuery query; |     OGLQuery query; | ||||||
|     if (reserve.empty()) { |     if (reserve.empty()) { | ||||||
|         query.Create(GetTarget(type)); |         query.Create(GetTarget(type)); | ||||||
| @@ -52,7 +53,7 @@ OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { | void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) { | ||||||
|     queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query)); |     query_pools[static_cast<std::size_t>(type)].push_back(std::move(query)); | ||||||
| } | } | ||||||
|  |  | ||||||
| bool QueryCache::AnyCommandQueued() const noexcept { | bool QueryCache::AnyCommandQueued() const noexcept { | ||||||
|   | |||||||
| @@ -6,12 +6,8 @@ | |||||||
|  |  | ||||||
| #include <array> | #include <array> | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <optional> |  | ||||||
| #include <unordered_map> |  | ||||||
| #include <vector> | #include <vector> | ||||||
|  |  | ||||||
| #include <glad/glad.h> |  | ||||||
|  |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/query_cache.h" | #include "video_core/query_cache.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| @@ -30,8 +26,8 @@ class RasterizerOpenGL; | |||||||
|  |  | ||||||
| using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; | ||||||
|  |  | ||||||
| class QueryCache final | class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, | ||||||
|     : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { |                                                             HostCounter, std::vector<OGLQuery>> { | ||||||
| public: | public: | ||||||
|     explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); |     explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); | ||||||
|     ~QueryCache(); |     ~QueryCache(); | ||||||
| @@ -44,7 +40,6 @@ public: | |||||||
|  |  | ||||||
| private: | private: | ||||||
|     RasterizerOpenGL& gl_rasterizer; |     RasterizerOpenGL& gl_rasterizer; | ||||||
|     std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve; |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { | ||||||
| @@ -59,7 +54,7 @@ private: | |||||||
|     u64 BlockingQuery() const override; |     u64 BlockingQuery() const override; | ||||||
|  |  | ||||||
|     QueryCache& cache; |     QueryCache& cache; | ||||||
|     VideoCore::QueryType type; |     const VideoCore::QueryType type; | ||||||
|     OGLQuery query; |     OGLQuery query; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||||||
|     features.depthBiasClamp = true; |     features.depthBiasClamp = true; | ||||||
|     features.geometryShader = true; |     features.geometryShader = true; | ||||||
|     features.tessellationShader = true; |     features.tessellationShader = true; | ||||||
|  |     features.occlusionQueryPrecise = true; | ||||||
|     features.fragmentStoresAndAtomics = true; |     features.fragmentStoresAndAtomics = true; | ||||||
|     features.shaderImageGatherExtended = true; |     features.shaderImageGatherExtended = true; | ||||||
|     features.shaderStorageImageWriteWithoutFormat = true; |     features.shaderStorageImageWriteWithoutFormat = true; | ||||||
| @@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan | |||||||
|     bit8_storage.uniformAndStorageBuffer8BitAccess = true; |     bit8_storage.uniformAndStorageBuffer8BitAccess = true; | ||||||
|     SetNext(next, bit8_storage); |     SetNext(next, bit8_storage); | ||||||
|  |  | ||||||
|  |     vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset; | ||||||
|  |     host_query_reset.hostQueryReset = true; | ||||||
|  |     SetNext(next, host_query_reset); | ||||||
|  |  | ||||||
|     vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; |     vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8; | ||||||
|     if (is_float16_supported) { |     if (is_float16_supported) { | ||||||
|         float16_int8.shaderFloat16 = true; |         float16_int8.shaderFloat16 = true; | ||||||
| @@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||||||
|         VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, |         VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | ||||||
|         VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, |         VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | ||||||
|         VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, |         VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | ||||||
|  |         VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME, | ||||||
|     }; |     }; | ||||||
|     std::bitset<required_extensions.size()> available_extensions{}; |     std::bitset<required_extensions.size()> available_extensions{}; | ||||||
|  |  | ||||||
| @@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev | |||||||
|         std::make_pair(features.depthBiasClamp, "depthBiasClamp"), |         std::make_pair(features.depthBiasClamp, "depthBiasClamp"), | ||||||
|         std::make_pair(features.geometryShader, "geometryShader"), |         std::make_pair(features.geometryShader, "geometryShader"), | ||||||
|         std::make_pair(features.tessellationShader, "tessellationShader"), |         std::make_pair(features.tessellationShader, "tessellationShader"), | ||||||
|  |         std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"), | ||||||
|         std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), |         std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"), | ||||||
|         std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), |         std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"), | ||||||
|         std::make_pair(features.shaderStorageImageWriteWithoutFormat, |         std::make_pair(features.shaderStorageImageWriteWithoutFormat, | ||||||
| @@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     extensions.reserve(13); |     extensions.reserve(14); | ||||||
|     extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); |     extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); | ||||||
|     extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); |     extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME); | ||||||
|     extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); |     extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME); | ||||||
| @@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami | |||||||
|     extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); |     extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME); | ||||||
|     extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); |     extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME); | ||||||
|     extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); |     extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME); | ||||||
|  |     extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME); | ||||||
|  |  | ||||||
|     [[maybe_unused]] const bool nsight = |     [[maybe_unused]] const bool nsight = | ||||||
|         std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); |         std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); | ||||||
|   | |||||||
							
								
								
									
										122
									
								
								src/video_core/renderer_vulkan/vk_query_cache.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										122
									
								
								src/video_core/renderer_vulkan/vk_query_cache.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,122 @@ | |||||||
|  | // Copyright 2020 yuzu Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include <algorithm> | ||||||
|  | #include <cstddef> | ||||||
|  | #include <cstdint> | ||||||
|  | #include <utility> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | #include "video_core/renderer_vulkan/declarations.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_device.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
|  |  | ||||||
|  | namespace Vulkan { | ||||||
|  |  | ||||||
|  | namespace { | ||||||
|  |  | ||||||
|  | constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion}; | ||||||
|  |  | ||||||
|  | constexpr vk::QueryType GetTarget(VideoCore::QueryType type) { | ||||||
|  |     return QUERY_TARGETS[static_cast<std::size_t>(type)]; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // Anonymous namespace | ||||||
|  |  | ||||||
|  | QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} | ||||||
|  |  | ||||||
|  | QueryPool::~QueryPool() = default; | ||||||
|  |  | ||||||
|  | void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { | ||||||
|  |     device = &device_; | ||||||
|  |     type = type_; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) { | ||||||
|  |     std::size_t index; | ||||||
|  |     do { | ||||||
|  |         index = CommitResource(fence); | ||||||
|  |     } while (usage[index]); | ||||||
|  |     usage[index] = true; | ||||||
|  |  | ||||||
|  |     return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)}; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void QueryPool::Allocate(std::size_t begin, std::size_t end) { | ||||||
|  |     usage.resize(end); | ||||||
|  |  | ||||||
|  |     const auto dev = device->GetLogical(); | ||||||
|  |     const u32 size = static_cast<u32>(end - begin); | ||||||
|  |     const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {}); | ||||||
|  |     pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader())); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) { | ||||||
|  |     const auto it = | ||||||
|  |         std::find_if(std::begin(pools), std::end(pools), | ||||||
|  |                      [query_pool = query.first](auto& pool) { return query_pool == *pool; }); | ||||||
|  |     ASSERT(it != std::end(pools)); | ||||||
|  |  | ||||||
|  |     const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it); | ||||||
|  |     usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|  |                            const VKDevice& device, VKScheduler& scheduler) | ||||||
|  |     : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, | ||||||
|  |                                   QueryPool>{system, rasterizer}, | ||||||
|  |       device{device}, scheduler{scheduler} { | ||||||
|  |     for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { | ||||||
|  |         query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | VKQueryCache::~VKQueryCache() = default; | ||||||
|  |  | ||||||
|  | std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { | ||||||
|  |     return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void VKQueryCache::Reserve(VideoCore::QueryType type, | ||||||
|  |                            std::pair<vk::QueryPool, std::uint32_t> query) { | ||||||
|  |     query_pools[static_cast<std::size_t>(type)].Reserve(query); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||||||
|  |                          VideoCore::QueryType type) | ||||||
|  |     : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, | ||||||
|  |       type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { | ||||||
|  |     const auto dev = cache.Device().GetLogical(); | ||||||
|  |     cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) { | ||||||
|  |         dev.resetQueryPoolEXT(query.first, query.second, 1, dld); | ||||||
|  |         cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | HostCounter::~HostCounter() { | ||||||
|  |     cache.Reserve(type, query); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void HostCounter::EndQuery() { | ||||||
|  |     cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) { | ||||||
|  |         cmdbuf.endQuery(query.first, query.second, dld); | ||||||
|  |     }); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | u64 HostCounter::BlockingQuery() const { | ||||||
|  |     if (ticks >= cache.Scheduler().Ticks()) { | ||||||
|  |         cache.Scheduler().Flush(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     const auto dev = cache.Device().GetLogical(); | ||||||
|  |     const auto& dld = cache.Device().GetDispatchLoader(); | ||||||
|  |     u64 value; | ||||||
|  |     dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value), | ||||||
|  |                             vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld); | ||||||
|  |     return value; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace Vulkan | ||||||
							
								
								
									
										104
									
								
								src/video_core/renderer_vulkan/vk_query_cache.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								src/video_core/renderer_vulkan/vk_query_cache.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | |||||||
|  | // Copyright 2020 yuzu Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <cstddef> | ||||||
|  | #include <cstdint> | ||||||
|  | #include <memory> | ||||||
|  | #include <utility> | ||||||
|  | #include <vector> | ||||||
|  |  | ||||||
|  | #include "common/common_types.h" | ||||||
|  | #include "video_core/query_cache.h" | ||||||
|  | #include "video_core/renderer_vulkan/declarations.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||||||
|  |  | ||||||
|  | namespace VideoCore { | ||||||
|  | class RasterizerInterface; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | namespace Vulkan { | ||||||
|  |  | ||||||
|  | class CachedQuery; | ||||||
|  | class HostCounter; | ||||||
|  | class VKDevice; | ||||||
|  | class VKQueryCache; | ||||||
|  | class VKScheduler; | ||||||
|  |  | ||||||
|  | using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; | ||||||
|  |  | ||||||
|  | class QueryPool final : public VKFencedPool { | ||||||
|  | public: | ||||||
|  |     explicit QueryPool(); | ||||||
|  |     ~QueryPool() override; | ||||||
|  |  | ||||||
|  |     void Initialize(const VKDevice& device, VideoCore::QueryType type); | ||||||
|  |  | ||||||
|  |     std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence); | ||||||
|  |  | ||||||
|  |     void Reserve(std::pair<vk::QueryPool, std::uint32_t> query); | ||||||
|  |  | ||||||
|  | protected: | ||||||
|  |     void Allocate(std::size_t begin, std::size_t end) override; | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     static constexpr std::size_t GROW_STEP = 512; | ||||||
|  |  | ||||||
|  |     const VKDevice* device = nullptr; | ||||||
|  |     VideoCore::QueryType type = {}; | ||||||
|  |  | ||||||
|  |     std::vector<UniqueQueryPool> pools; | ||||||
|  |     std::vector<bool> usage; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class VKQueryCache final | ||||||
|  |     : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, | ||||||
|  |                                          QueryPool> { | ||||||
|  | public: | ||||||
|  |     explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|  |                           const VKDevice& device, VKScheduler& scheduler); | ||||||
|  |     ~VKQueryCache(); | ||||||
|  |  | ||||||
|  |     std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type); | ||||||
|  |  | ||||||
|  |     void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query); | ||||||
|  |  | ||||||
|  |     const VKDevice& Device() const noexcept { | ||||||
|  |         return device; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     VKScheduler& Scheduler() const noexcept { | ||||||
|  |         return scheduler; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     const VKDevice& device; | ||||||
|  |     VKScheduler& scheduler; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { | ||||||
|  | public: | ||||||
|  |     explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, | ||||||
|  |                          VideoCore::QueryType type); | ||||||
|  |     ~HostCounter(); | ||||||
|  |  | ||||||
|  |     void EndQuery(); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     u64 BlockingQuery() const override; | ||||||
|  |  | ||||||
|  |     VKQueryCache& cache; | ||||||
|  |     const VideoCore::QueryType type; | ||||||
|  |     const std::pair<vk::QueryPool, std::uint32_t> query; | ||||||
|  |     const u64 ticks; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { | ||||||
|  | public: | ||||||
|  |     explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr) | ||||||
|  |         : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {} | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | } // namespace Vulkan | ||||||
| @@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||||||
|                     staging_pool), |                     staging_pool), | ||||||
|       pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), |       pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), | ||||||
|       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), |       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), | ||||||
|       sampler_cache(device) {} |       sampler_cache(device), query_cache(system, *this, device, scheduler) { | ||||||
|  |     scheduler.SetQueryCache(query_cache); | ||||||
|  | } | ||||||
|  |  | ||||||
| RasterizerVulkan::~RasterizerVulkan() = default; | RasterizerVulkan::~RasterizerVulkan() = default; | ||||||
|  |  | ||||||
| @@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||||||
|  |  | ||||||
|     FlushWork(); |     FlushWork(); | ||||||
|  |  | ||||||
|  |     query_cache.UpdateCounters(); | ||||||
|  |  | ||||||
|     const auto& gpu = system.GPU().Maxwell3D(); |     const auto& gpu = system.GPU().Maxwell3D(); | ||||||
|     GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; |     GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; | ||||||
|  |  | ||||||
| @@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { | |||||||
| void RasterizerVulkan::Clear() { | void RasterizerVulkan::Clear() { | ||||||
|     MICROPROFILE_SCOPE(Vulkan_Clearing); |     MICROPROFILE_SCOPE(Vulkan_Clearing); | ||||||
|  |  | ||||||
|  |     query_cache.UpdateCounters(); | ||||||
|  |  | ||||||
|     const auto& gpu = system.GPU().Maxwell3D(); |     const auto& gpu = system.GPU().Maxwell3D(); | ||||||
|     if (!system.GPU().Maxwell3D().ShouldExecute()) { |     if (!system.GPU().Maxwell3D().ShouldExecute()) { | ||||||
|         return; |         return; | ||||||
| @@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||||||
|     sampled_views.clear(); |     sampled_views.clear(); | ||||||
|     image_views.clear(); |     image_views.clear(); | ||||||
|  |  | ||||||
|  |     query_cache.UpdateCounters(); | ||||||
|  |  | ||||||
|     const auto& launch_desc = system.GPU().KeplerCompute().launch_description; |     const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||||||
|     const ComputePipelineCacheKey key{ |     const ComputePipelineCacheKey key{ | ||||||
|         code_addr, |         code_addr, | ||||||
| @@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { | |||||||
|     }); |     }); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { | ||||||
|  |     query_cache.ResetCounter(type); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type, | ||||||
|  |                              std::optional<u64> timestamp) { | ||||||
|  |     query_cache.Query(gpu_addr, type, timestamp); | ||||||
|  | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::FlushAll() {} | void RasterizerVulkan::FlushAll() {} | ||||||
|  |  | ||||||
| void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { | ||||||
|     texture_cache.FlushRegion(addr, size); |     texture_cache.FlushRegion(addr, size); | ||||||
|     buffer_cache.FlushRegion(addr, size); |     buffer_cache.FlushRegion(addr, size); | ||||||
|  |     query_cache.FlushRegion(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     texture_cache.InvalidateRegion(addr, size); |     texture_cache.InvalidateRegion(addr, size); | ||||||
|     pipeline_cache.InvalidateRegion(addr, size); |     pipeline_cache.InvalidateRegion(addr, size); | ||||||
|     buffer_cache.InvalidateRegion(addr, size); |     buffer_cache.InvalidateRegion(addr, size); | ||||||
|  |     query_cache.InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|   | |||||||
| @@ -24,6 +24,7 @@ | |||||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||||
| #include "video_core/renderer_vulkan/vk_memory_manager.h" | #include "video_core/renderer_vulkan/vk_memory_manager.h" | ||||||
| #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | #include "video_core/renderer_vulkan/vk_pipeline_cache.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | #include "video_core/renderer_vulkan/vk_renderpass_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_resource_manager.h" | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||||||
| #include "video_core/renderer_vulkan/vk_sampler_cache.h" | #include "video_core/renderer_vulkan/vk_sampler_cache.h" | ||||||
| @@ -96,7 +97,7 @@ struct ImageView { | |||||||
|     vk::ImageLayout* layout = nullptr; |     vk::ImageLayout* layout = nullptr; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| class RasterizerVulkan : public VideoCore::RasterizerAccelerated { | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | ||||||
| public: | public: | ||||||
|     explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, |     explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, | ||||||
|                               VKScreenInfo& screen_info, const VKDevice& device, |                               VKScreenInfo& screen_info, const VKDevice& device, | ||||||
| @@ -108,6 +109,8 @@ public: | |||||||
|     bool DrawMultiBatch(bool is_indexed) override; |     bool DrawMultiBatch(bool is_indexed) override; | ||||||
|     void Clear() override; |     void Clear() override; | ||||||
|     void DispatchCompute(GPUVAddr code_addr) override; |     void DispatchCompute(GPUVAddr code_addr) override; | ||||||
|  |     void ResetCounter(VideoCore::QueryType type) override; | ||||||
|  |     void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
|     void FlushRegion(CacheAddr addr, u64 size) override; |     void FlushRegion(CacheAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; |     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
| @@ -247,6 +250,7 @@ private: | |||||||
|     VKPipelineCache pipeline_cache; |     VKPipelineCache pipeline_cache; | ||||||
|     VKBufferCache buffer_cache; |     VKBufferCache buffer_cache; | ||||||
|     VKSamplerCache sampler_cache; |     VKSamplerCache sampler_cache; | ||||||
|  |     VKQueryCache query_cache; | ||||||
|  |  | ||||||
|     std::array<View, Maxwell::NumRenderTargets> color_attachments; |     std::array<View, Maxwell::NumRenderTargets> color_attachments; | ||||||
|     View zeta_attachment; |     View zeta_attachment; | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ | |||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "video_core/renderer_vulkan/declarations.h" | #include "video_core/renderer_vulkan/declarations.h" | ||||||
| #include "video_core/renderer_vulkan/vk_device.h" | #include "video_core/renderer_vulkan/vk_device.h" | ||||||
|  | #include "video_core/renderer_vulkan/vk_query_cache.h" | ||||||
| #include "video_core/renderer_vulkan/vk_resource_manager.h" | #include "video_core/renderer_vulkan/vk_resource_manager.h" | ||||||
| #include "video_core/renderer_vulkan/vk_scheduler.h" | #include "video_core/renderer_vulkan/vk_scheduler.h" | ||||||
|  |  | ||||||
| @@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) { | |||||||
| } | } | ||||||
|  |  | ||||||
| void VKScheduler::AllocateNewContext() { | void VKScheduler::AllocateNewContext() { | ||||||
|  |     ++ticks; | ||||||
|  |  | ||||||
|     std::unique_lock lock{mutex}; |     std::unique_lock lock{mutex}; | ||||||
|     current_fence = next_fence; |     current_fence = next_fence; | ||||||
|     next_fence = &resource_manager.CommitFence(); |     next_fence = &resource_manager.CommitFence(); | ||||||
| @@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() { | |||||||
|     current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); |     current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence); | ||||||
|     current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, |     current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, | ||||||
|                          device.GetDispatchLoader()); |                          device.GetDispatchLoader()); | ||||||
|  |     // Enable counters once again. These are disabled when a command buffer is finished. | ||||||
|  |     if (query_cache) { | ||||||
|  |         query_cache->UpdateCounters(); | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void VKScheduler::InvalidateState() { | void VKScheduler::InvalidateState() { | ||||||
| @@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void VKScheduler::EndPendingOperations() { | void VKScheduler::EndPendingOperations() { | ||||||
|  |     query_cache->DisableStreams(); | ||||||
|     EndRenderPass(); |     EndRenderPass(); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ | |||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
|  | #include <atomic> | ||||||
| #include <condition_variable> | #include <condition_variable> | ||||||
| #include <memory> | #include <memory> | ||||||
| #include <optional> | #include <optional> | ||||||
| @@ -18,6 +19,7 @@ namespace Vulkan { | |||||||
|  |  | ||||||
| class VKDevice; | class VKDevice; | ||||||
| class VKFence; | class VKFence; | ||||||
|  | class VKQueryCache; | ||||||
| class VKResourceManager; | class VKResourceManager; | ||||||
|  |  | ||||||
| class VKFenceView { | class VKFenceView { | ||||||
| @@ -67,6 +69,11 @@ public: | |||||||
|     /// Binds a pipeline to the current execution context. |     /// Binds a pipeline to the current execution context. | ||||||
|     void BindGraphicsPipeline(vk::Pipeline pipeline); |     void BindGraphicsPipeline(vk::Pipeline pipeline); | ||||||
|  |  | ||||||
|  |     /// Assigns the query cache. | ||||||
|  |     void SetQueryCache(VKQueryCache& query_cache_) { | ||||||
|  |         query_cache = &query_cache_; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Returns true when viewports have been set in the current command buffer. |     /// Returns true when viewports have been set in the current command buffer. | ||||||
|     bool TouchViewports() { |     bool TouchViewports() { | ||||||
|         return std::exchange(state.viewports, true); |         return std::exchange(state.viewports, true); | ||||||
| @@ -112,6 +119,11 @@ public: | |||||||
|         return current_fence; |         return current_fence; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Returns the current command buffer tick. | ||||||
|  |     u64 Ticks() const { | ||||||
|  |         return ticks; | ||||||
|  |     } | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     class Command { |     class Command { | ||||||
|     public: |     public: | ||||||
| @@ -205,6 +217,8 @@ private: | |||||||
|  |  | ||||||
|     const VKDevice& device; |     const VKDevice& device; | ||||||
|     VKResourceManager& resource_manager; |     VKResourceManager& resource_manager; | ||||||
|  |     VKQueryCache* query_cache = nullptr; | ||||||
|  |  | ||||||
|     vk::CommandBuffer current_cmdbuf; |     vk::CommandBuffer current_cmdbuf; | ||||||
|     VKFence* current_fence = nullptr; |     VKFence* current_fence = nullptr; | ||||||
|     VKFence* next_fence = nullptr; |     VKFence* next_fence = nullptr; | ||||||
| @@ -227,6 +241,7 @@ private: | |||||||
|     Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; |     Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; | ||||||
|     std::mutex mutex; |     std::mutex mutex; | ||||||
|     std::condition_variable cv; |     std::condition_variable cv; | ||||||
|  |     std::atomic<u64> ticks = 0; | ||||||
|     bool quit = false; |     bool quit = false; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user