diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 592c28ba3..95ba4f76c 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -586,14 +586,22 @@ void Maxwell3D::ProcessQueryCondition() { } void Maxwell3D::ProcessCounterReset() { - switch (regs.clear_report_value) { - case Regs::ClearReport::ZPassPixelCount: - rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); - break; - default: - LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); - break; - } + const auto query_type = [clear_report = regs.clear_report_value]() { + switch (clear_report) { + case Tegra::Engines::Maxwell3D::Regs::ClearReport::ZPassPixelCount: + return VideoCommon::QueryType::ZPassPixelCount64; + case Tegra::Engines::Maxwell3D::Regs::ClearReport::StreamingPrimitivesSucceeded: + return VideoCommon::QueryType::StreamingPrimitivesSucceeded; + case Tegra::Engines::Maxwell3D::Regs::ClearReport::PrimitivesGenerated: + return VideoCommon::QueryType::PrimitivesGenerated; + case Tegra::Engines::Maxwell3D::Regs::ClearReport::VtgPrimitivesOut: + return VideoCommon::QueryType::VtgPrimitivesOut; + default: + LOG_DEBUG(HW_GPU, "Unimplemented counter reset={}", clear_report); + return VideoCommon::QueryType::Payload; + } + }(); + rasterizer->ResetCounter(query_type); } void Maxwell3D::ProcessSyncPoint() { diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 9fcaeeac7..a64404ce4 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -28,8 +28,11 @@ namespace VideoCore { enum class QueryType { SamplesPassed, + PrimitivesGenerated, + TfbPrimitivesWritten, + Count, }; -constexpr std::size_t NumQueryTypes = 1; +constexpr std::size_t NumQueryTypes = static_cast(QueryType::Count); } // namespace VideoCore namespace VideoCommon { @@ -44,15 +47,6 @@ public: explicit CounterStreamBase(QueryCache& cache_, VideoCore::QueryType type_) : cache{cache_}, type{type_} {} - /// Updates the state of the stream, enabling or disabling as needed. - void Update(bool enabled) { - if (enabled) { - Enable(); - } else { - Disable(); - } - } - /// Resets the stream to zero. It doesn't disable the query after resetting. void Reset() { if (current) { @@ -80,7 +74,6 @@ public: return current != nullptr; } -private: /// Enables the stream. void Enable() { if (current) { @@ -97,6 +90,7 @@ private: last = std::exchange(current, nullptr); } +private: QueryCache& cache; const VideoCore::QueryType type; @@ -112,8 +106,14 @@ public: : rasterizer{rasterizer_}, // Use reinterpret_cast instead of static_cast as workaround for // UBSan bug (https://github.com/llvm/llvm-project/issues/59060) - cpu_memory{cpu_memory_}, streams{{CounterStream{reinterpret_cast(*this), - VideoCore::QueryType::SamplesPassed}}} { + cpu_memory{cpu_memory_}, streams{{ + {CounterStream{reinterpret_cast(*this), + VideoCore::QueryType::SamplesPassed}}, + {CounterStream{reinterpret_cast(*this), + VideoCore::QueryType::PrimitivesGenerated}}, + {CounterStream{reinterpret_cast(*this), + VideoCore::QueryType::TfbPrimitivesWritten}}, + }} { (void)slot_async_jobs.insert(); // Null value } @@ -157,12 +157,11 @@ public: AsyncFlushQuery(query, timestamp, lock); } - /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. - void UpdateCounters() { + /// Enables all available GPU counters + void EnableCounters() { std::unique_lock lock{mutex}; - if (maxwell3d) { - const auto& regs = maxwell3d->regs; - Stream(VideoCore::QueryType::SamplesPassed).Update(regs.zpass_pixel_count_enable); + for (auto& stream : streams) { + stream.Enable(); } } @@ -176,7 +175,7 @@ public: void DisableStreams() { std::unique_lock lock{mutex}; for (auto& stream : streams) { - stream.Update(false); + stream.Disable(); } } @@ -353,7 +352,7 @@ private: std::shared_ptr> uncommitted_flushes{}; std::list>> committed_flushes; -}; +}; // namespace VideoCommon template class HostCounterBase { diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index ec142d48e..fef7360ed 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -18,16 +18,27 @@ namespace OpenGL { namespace { -constexpr std::array QueryTargets = {GL_SAMPLES_PASSED}; - constexpr GLenum GetTarget(VideoCore::QueryType type) { - return QueryTargets[static_cast(type)]; + switch (type) { + case VideoCore::QueryType::SamplesPassed: + return GL_SAMPLES_PASSED; + case VideoCore::QueryType::PrimitivesGenerated: + return GL_PRIMITIVES_GENERATED; + case VideoCore::QueryType::TfbPrimitivesWritten: + return GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN; + default: + break; + } + UNIMPLEMENTED_MSG("Query type {}", type); + return 0; } } // Anonymous namespace QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Core::Memory::Memory& cpu_memory_) - : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} {} + : QueryCacheLegacy(rasterizer_, cpu_memory_), gl_rasterizer{rasterizer_} { + EnableCounters(); +} QueryCache::~QueryCache() = default; @@ -103,13 +114,13 @@ u64 CachedQuery::Flush([[maybe_unused]] bool async) { auto& stream = cache->Stream(type); const bool slice_counter = WaitPending() && stream.IsEnabled(); if (slice_counter) { - stream.Update(false); + stream.Disable(); } auto result = VideoCommon::CachedQueryBase::Flush(); if (slice_counter) { - stream.Update(true); + stream.Enable(); } return result; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 279e5a4e0..0545e33c5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -51,6 +51,22 @@ constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16; void oglEnable(GLenum cap, bool state) { (state ? glEnable : glDisable)(cap); } + +std::optional MaxwellToVideoCoreQuery(VideoCommon::QueryType type) { + switch (type) { + case VideoCommon::QueryType::PrimitivesGenerated: + case VideoCommon::QueryType::VtgPrimitivesOut: + return VideoCore::QueryType::PrimitivesGenerated; + case VideoCommon::QueryType::ZPassPixelCount64: + return VideoCore::QueryType::SamplesPassed; + case VideoCommon::QueryType::StreamingPrimitivesSucceeded: + // case VideoCommon::QueryType::StreamingByteCount: + // TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride + return VideoCore::QueryType::TfbPrimitivesWritten; + default: + return std::nullopt; + } +} } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, @@ -212,7 +228,6 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { SCOPE_EXIT({ gpu.TickWork(); }); gpu_memory->FlushCaching(); - query_cache.UpdateCounters(); GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()}; if (!pipeline) { @@ -330,7 +345,6 @@ void RasterizerOpenGL::DrawTexture() { MICROPROFILE_SCOPE(OpenGL_Drawing); SCOPE_EXIT({ gpu.TickWork(); }); - query_cache.UpdateCounters(); texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -397,21 +411,28 @@ void RasterizerOpenGL::DispatchCompute() { } void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { - if (type == VideoCommon::QueryType::ZPassPixelCount64) { - query_cache.ResetCounter(VideoCore::QueryType::SamplesPassed); + const auto query_cache_type = MaxwellToVideoCoreQuery(type); + if (!query_cache_type.has_value()) { + UNIMPLEMENTED_MSG("Reset query type: {}", type); + return; } + query_cache.ResetCounter(*query_cache_type); } void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { - if (type == VideoCommon::QueryType::ZPassPixelCount64) { - if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { - query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); - } else { - query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, std::nullopt); - } - return; + const auto query_cache_type = MaxwellToVideoCoreQuery(type); + if (!query_cache_type.has_value()) { + return QueryFallback(gpu_addr, type, flags, payload, subreport); } + const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout); + const auto timestamp = has_timeout ? std::optional{gpu.GetTicks()} : std::nullopt; + query_cache.Query(gpu_addr, *query_cache_type, timestamp); +} + +void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, + u32 subreport) { if (type != VideoCommon::QueryType::Payload) { payload = 1u; } @@ -1294,15 +1315,13 @@ void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum program->ConfigureTransformFeedback(); UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation) || - regs.IsShaderConfigEnabled(Maxwell::ShaderType::Geometry)); - UNIMPLEMENTED_IF(primitive_mode != GL_POINTS); + regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation)); // We may have to call BeginTransformFeedbackNV here since they seem to call different // implementations on Nvidia's driver (the pointer is different) but we are using // ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB // extension doesn't define BeginTransformFeedback (without NV) interactions. It just works. - glBeginTransformFeedback(GL_POINTS); + glBeginTransformFeedback(primitive_mode); } void RasterizerOpenGL::EndTransformFeedback() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ceffe1f1e..b79d7a70c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -225,6 +225,9 @@ private: /// End a transform feedback void EndTransformFeedback(); + void QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport); + Tegra::GPU& gpu; const Device& device; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 59829c88b..241fc34be 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -485,6 +485,10 @@ void RasterizerVulkan::DispatchCompute() { } void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) { + if (type != VideoCommon::QueryType::ZPassPixelCount64) { + LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type); + return; + } query_cache.CounterReset(type); }