OpenGL: Add GL_PRIMITIVES_GENERATED and GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries
This commit is contained in:
		| @@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void Maxwell3D::ProcessCounterReset() { | void Maxwell3D::ProcessCounterReset() { | ||||||
|     switch (regs.clear_report_value) { |     const auto query_type = [clear_report = regs.clear_report_value]() { | ||||||
|     case Regs::ClearReport::ZPassPixelCount: |         switch (clear_report) { | ||||||
|         rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); |         case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount: | ||||||
|         break; |             return VideoCommon::QueryType::ZPassPixelCount64; | ||||||
|     default: |         case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded: | ||||||
|         LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); |             return VideoCommon::QueryType::StreamingPrimitivesSucceeded; | ||||||
|         break; |         case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated: | ||||||
|     } |             return VideoCommon::QueryType::PrimitivesGenerated; | ||||||
|  |         case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut: | ||||||
|  |             return VideoCommon::QueryType::VtgPrimitivesOut; | ||||||
|  |         default: | ||||||
|  |             LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report); | ||||||
|  |             return VideoCommon::QueryType::Payload; | ||||||
|  |         } | ||||||
|  |     }(); | ||||||
|  |     rasterizer->ResetCounter(query_type); | ||||||
| } | } | ||||||
|  |  | ||||||
| void Maxwell3D::ProcessSyncPoint() { | void Maxwell3D::ProcessSyncPoint() { | ||||||
|   | |||||||
| @@ -28,8 +28,11 @@ | |||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| enum class QueryType { | enum class QueryType { | ||||||
|     SamplesPassed, |     SamplesPassed, | ||||||
|  |     PrimitivesGenerated, | ||||||
|  |     TfbPrimitivesWritten, | ||||||
|  |     Count, | ||||||
| }; | }; | ||||||
| constexpr std::size_t NumQueryTypes = 1; | constexpr std::size_t NumQueryTypes = static_cast<size_t>(QueryType::Count); | ||||||
| } // namespace VideoCore | } // namespace VideoCore | ||||||
|  |  | ||||||
| namespace VideoCommon { | namespace VideoCommon { | ||||||
| @@ -44,15 +47,6 @@ public: | |||||||
|     explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) |     explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) | ||||||
|         : cache{cache_}, type{type_} {} |         : cache{cache_}, type{type_} {} | ||||||
|  |  | ||||||
|     /// Updates the state of the stream, enabling or disabling as needed. |  | ||||||
|     void Update(bool enabled) { |  | ||||||
|         if (enabled) { |  | ||||||
|             Enable(); |  | ||||||
|         } else { |  | ||||||
|             Disable(); |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     /// Resets the stream to zero. It doesn't disable the query after resetting. |     /// Resets the stream to zero. It doesn't disable the query after resetting. | ||||||
|     void Reset() { |     void Reset() { | ||||||
|         if (current) { |         if (current) { | ||||||
| @@ -80,7 +74,6 @@ public: | |||||||
|         return current != nullptr; |         return current != nullptr; | ||||||
|     } |     } | ||||||
|  |  | ||||||
| private: |  | ||||||
|     /// Enables the stream. |     /// Enables the stream. | ||||||
|     void Enable() { |     void Enable() { | ||||||
|         if (current) { |         if (current) { | ||||||
| @@ -97,6 +90,7 @@ private: | |||||||
|         last = std::exchange(current, nullptr); |         last = std::exchange(current, nullptr); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|     QueryCache& cache; |     QueryCache& cache; | ||||||
|     const VideoCore::QueryType type; |     const VideoCore::QueryType type; | ||||||
|  |  | ||||||
| @@ -112,8 +106,14 @@ public: | |||||||
|         : rasterizer{rasterizer_}, |         : rasterizer{rasterizer_}, | ||||||
|           // Use reinterpret_cast instead of static_cast as workaround for |           // Use reinterpret_cast instead of static_cast as workaround for | ||||||
|           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) |           // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) | ||||||
|           cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast<QueryCache&>(*this), |           cpu_memory{cpu_memory_}, streams{{ | ||||||
|                                                           VideoCore::QueryType::SamplesPassed}}} { |                                        {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||||
|  |                                                       VideoCore::QueryType::SamplesPassed}}, | ||||||
|  |                                        {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||||
|  |                                                       VideoCore::QueryType::PrimitivesGenerated}}, | ||||||
|  |                                        {CounterStream{reinterpret_cast<QueryCache&>(*this), | ||||||
|  |                                                       VideoCore::QueryType::TfbPrimitivesWritten}}, | ||||||
|  |                                    }} { | ||||||
|         (void)slot_async_jobs.insert(); // Null value |         (void)slot_async_jobs.insert(); // Null value | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -157,12 +157,11 @@ public: | |||||||
|         AsyncFlushQuery(query, timestamp, lock); |         AsyncFlushQuery(query, timestamp, lock); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. |     /// Enables all available GPU counters | ||||||
|     void UpdateCounters() { |     void EnableCounters() { | ||||||
|         std::unique_lock lock{mutex}; |         std::unique_lock lock{mutex}; | ||||||
|         if (maxwell3d) { |         for (auto& stream : streams) { | ||||||
|             const auto& regs = maxwell3d->regs; |             stream.Enable(); | ||||||
|             Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable); |  | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -176,7 +175,7 @@ public: | |||||||
|     void DisableStreams() { |     void DisableStreams() { | ||||||
|         std::unique_lock lock{mutex}; |         std::unique_lock lock{mutex}; | ||||||
|         for (auto& stream : streams) { |         for (auto& stream : streams) { | ||||||
|             stream.Update(false); |             stream.Disable(); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -353,7 +352,7 @@ private: | |||||||
|  |  | ||||||
|     std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; |     std::shared_ptr<std::vector<AsyncJobId>> uncommitted_flushes{}; | ||||||
|     std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; |     std::list<std::shared_ptr<std::vector<AsyncJobId>>> committed_flushes; | ||||||
| }; | }; // namespace VideoCommon | ||||||
|  |  | ||||||
| template <class QueryCache, class HostCounter> | template <class QueryCache, class HostCounter> | ||||||
| class HostCounterBase { | class HostCounterBase { | ||||||
|   | |||||||
| @@ -18,16 +18,27 @@ namespace OpenGL { | |||||||
|  |  | ||||||
| namespace { | namespace { | ||||||
|  |  | ||||||
| constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED}; |  | ||||||
|  |  | ||||||
| constexpr GLenum GetTarget(VideoCore::QueryType type) { | constexpr GLenum GetTarget(VideoCore::QueryType type) { | ||||||
|     return QueryTargets[static_cast<std::size_t>(type)]; |     switch (type) { | ||||||
|  |     case VideoCore::QueryType::SamplesPassed: | ||||||
|  |         return GL_SAMPLES_PASSED; | ||||||
|  |     case VideoCore::QueryType::PrimitivesGenerated: | ||||||
|  |         return GL_PRIMITIVES_GENERATED; | ||||||
|  |     case VideoCore::QueryType::TfbPrimitivesWritten: | ||||||
|  |         return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; | ||||||
|  |     default: | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |     UNIMPLEMENTED_MSG("Query type {}", type); | ||||||
|  |     return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) | ||||||
|     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} |     : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { | ||||||
|  |     EnableCounters(); | ||||||
|  | } | ||||||
|  |  | ||||||
| QueryCache::~QueryCache() = default; | QueryCache::~QueryCache() = default; | ||||||
|  |  | ||||||
| @@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) { | |||||||
|     auto& stream = cache->Stream(type); |     auto& stream = cache->Stream(type); | ||||||
|     const bool slice_counter = WaitPending() && stream.IsEnabled(); |     const bool slice_counter = WaitPending() && stream.IsEnabled(); | ||||||
|     if (slice_counter) { |     if (slice_counter) { | ||||||
|         stream.Update(false); |         stream.Disable(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); |     auto result = VideoCommon::CachedQueryBase<HostCounter>::Flush(); | ||||||
|  |  | ||||||
|     if (slice_counter) { |     if (slice_counter) { | ||||||
|         stream.Update(true); |         stream.Enable(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
|   | |||||||
| @@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; | |||||||
| void oglEnable(GLenum cap, bool state) { | void oglEnable(GLenum cap, bool state) { | ||||||
|     (state ? glEnable : glDisable)(cap); |     (state ? glEnable : glDisable)(cap); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) { | ||||||
|  |     switch (type) { | ||||||
|  |     case VideoCommon::QueryType::PrimitivesGenerated: | ||||||
|  |     case VideoCommon::QueryType::VtgPrimitivesOut: | ||||||
|  |         return VideoCore::QueryType::PrimitivesGenerated; | ||||||
|  |     case VideoCommon::QueryType::ZPassPixelCount64: | ||||||
|  |         return VideoCore::QueryType::SamplesPassed; | ||||||
|  |     case VideoCommon::QueryType::StreamingPrimitivesSucceeded: | ||||||
|  |         // case VideoCommon::QueryType::StreamingByteCount: | ||||||
|  |         // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride | ||||||
|  |         return VideoCore::QueryType::TfbPrimitivesWritten; | ||||||
|  |     default: | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  | } | ||||||
| } // Anonymous namespace | } // Anonymous namespace | ||||||
|  |  | ||||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
| @@ -212,7 +228,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { | |||||||
|  |  | ||||||
|     SCOPE_EXIT({ gpu.TickWork(); }); |     SCOPE_EXIT({ gpu.TickWork(); }); | ||||||
|     gpu_memory->FlushCaching(); |     gpu_memory->FlushCaching(); | ||||||
|     query_cache.UpdateCounters(); |  | ||||||
|  |  | ||||||
|     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; |     GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; | ||||||
|     if (!pipeline) { |     if (!pipeline) { | ||||||
| @@ -330,7 +345,6 @@ void RasterizerOpenGL::DrawTexture() { | |||||||
|     MICROPROFILE_SCOPE(OpenGL_Drawing); |     MICROPROFILE_SCOPE(OpenGL_Drawing); | ||||||
|  |  | ||||||
|     SCOPE_EXIT({ gpu.TickWork(); }); |     SCOPE_EXIT({ gpu.TickWork(); }); | ||||||
|     query_cache.UpdateCounters(); |  | ||||||
|  |  | ||||||
|     texture_cache.SynchronizeGraphicsDescriptors(); |     texture_cache.SynchronizeGraphicsDescriptors(); | ||||||
|     texture_cache.UpdateRenderTargets(false); |     texture_cache.UpdateRenderTargets(false); | ||||||
| @@ -397,21 +411,28 @@ void RasterizerOpenGL::DispatchCompute() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { | ||||||
|     if (type == VideoCommon::QueryType::ZPassPixelCount64) { |     const auto query_cache_type = MaxwellToVideoCoreQuery(type); | ||||||
|         query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); |     if (!query_cache_type.has_value()) { | ||||||
|  |         UNIMPLEMENTED_MSG("Reset query type: {}", type); | ||||||
|  |         return; | ||||||
|     } |     } | ||||||
|  |     query_cache.ResetCounter(*query_cache_type); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||||
|                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { |                              VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { | ||||||
|     if (type == VideoCommon::QueryType::ZPassPixelCount64) { |     const auto query_cache_type = MaxwellToVideoCoreQuery(type); | ||||||
|         if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { |     if (!query_cache_type.has_value()) { | ||||||
|             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); |         return QueryFallback(gpu_addr, type, flags, payload, subreport); | ||||||
|         } else { |  | ||||||
|             query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); |  | ||||||
|         } |  | ||||||
|         return; |  | ||||||
|     } |     } | ||||||
|  |     const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout); | ||||||
|  |     const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt; | ||||||
|  |     query_cache.Query(gpu_addr, *query_cache_type, timestamp); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||||
|  |                                      VideoCommon::QueryPropertiesFlags flags, u32 payload, | ||||||
|  |                                      u32 subreport) { | ||||||
|     if (type != VideoCommon::QueryType::Payload) { |     if (type != VideoCommon::QueryType::Payload) { | ||||||
|         payload = 1u; |         payload = 1u; | ||||||
|     } |     } | ||||||
| @@ -1294,15 +1315,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum | |||||||
|     program->ConfigureTransformFeedback(); |     program->ConfigureTransformFeedback(); | ||||||
|  |  | ||||||
|     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || |     UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || | ||||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) || |                      regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); | ||||||
|                      regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry)); |  | ||||||
|     UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); |  | ||||||
|  |  | ||||||
|     // We may have to call BeginTransformFeedbackNV here since they seem to call different |     // We may have to call BeginTransformFeedbackNV here since they seem to call different | ||||||
|     // implementations on Nvidia's driver (the pointer is different) but we are using |     // implementations on Nvidia's driver (the pointer is different) but we are using | ||||||
|     // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB |     // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB | ||||||
|     // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. |     // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. | ||||||
|     glBeginTransformFeedback(GL_POINTS); |     glBeginTransformFeedback(primitive_mode); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::EndTransformFeedback() { | void RasterizerOpenGL::EndTransformFeedback() { | ||||||
|   | |||||||
| @@ -225,6 +225,9 @@ private: | |||||||
|     /// End a transform feedback |     /// End a transform feedback | ||||||
|     void EndTransformFeedback(); |     void EndTransformFeedback(); | ||||||
|  |  | ||||||
|  |     void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, | ||||||
|  |                        VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); | ||||||
|  |  | ||||||
|     Tegra::GPU& gpu; |     Tegra::GPU& gpu; | ||||||
|  |  | ||||||
|     const Device& device; |     const Device& device; | ||||||
|   | |||||||
| @@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { | void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { | ||||||
|  |     if (type != VideoCommon::QueryType::ZPassPixelCount64) { | ||||||
|  |         LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|     query_cache.CounterReset(type); |     query_cache.CounterReset(type); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user