OpenGL: Implement Fencing backend.
This commit is contained in:
		| @@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void Maxwell3D::ReleaseFences() { |  | ||||||
|     for (const auto pair : delay_fences) { |  | ||||||
|         const auto [addr, payload] = pair; |  | ||||||
|         memory_manager.Write<u32>(addr, static_cast<u32>(payload)); |  | ||||||
|     } |  | ||||||
|     delay_fences.clear(); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void Maxwell3D::ProcessQueryGet() { | void Maxwell3D::ProcessQueryGet() { | ||||||
|     // TODO(Subv): Support the other query units. |     // TODO(Subv): Support the other query units. | ||||||
|     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, |     ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, | ||||||
| @@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() { | |||||||
|  |  | ||||||
|     switch (regs.query.query_get.operation) { |     switch (regs.query.query_get.operation) { | ||||||
|     case Regs::QueryOperation::Release: { |     case Regs::QueryOperation::Release: { | ||||||
|         rasterizer.FlushCommands(); |  | ||||||
|         rasterizer.SyncGuestHost(); |  | ||||||
|         const u64 result = regs.query.query_sequence; |         const u64 result = regs.query.query_sequence; | ||||||
|         delay_fences.emplace_back(regs.query.QueryAddress(), result); |         if (regs.query.query_get.fence == 1) { | ||||||
|  |             rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result)); | ||||||
|  |         } else { | ||||||
|  |             StampQueryResult(result, regs.query.query_get.short_query == 0); | ||||||
|  |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case Regs::QueryOperation::Acquire: |     case Regs::QueryOperation::Acquire: | ||||||
|   | |||||||
| @@ -1427,8 +1427,6 @@ public: | |||||||
|         Tables tables{}; |         Tables tables{}; | ||||||
|     } dirty; |     } dirty; | ||||||
|  |  | ||||||
|     void ReleaseFences(); |  | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     void InitializeRegisterDefaults(); |     void InitializeRegisterDefaults(); | ||||||
|  |  | ||||||
| @@ -1469,8 +1467,6 @@ private: | |||||||
|  |  | ||||||
|     std::array<u8, Regs::NUM_REGS> dirty_pointers{}; |     std::array<u8, Regs::NUM_REGS> dirty_pointers{}; | ||||||
|  |  | ||||||
|     std::vector<std::pair<GPUVAddr, u64>> delay_fences; |  | ||||||
|  |  | ||||||
|     /// Retrieves information about a specific TIC entry from the TIC buffer. |     /// Retrieves information about a specific TIC entry from the TIC buffer. | ||||||
|     Texture::TICEntry GetTICEntry(u32 tic_index) const; |     Texture::TICEntry GetTICEntry(u32 tic_index) const; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -147,7 +147,7 @@ void GPU::SyncGuestHost() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void GPU::OnCommandListEnd() { | void GPU::OnCommandListEnd() { | ||||||
|     maxwell_3d->ReleaseFences(); |     renderer.Rasterizer().ReleaseFences(); | ||||||
| } | } | ||||||
| // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence | ||||||
| // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. | ||||||
|   | |||||||
| @@ -157,7 +157,7 @@ public: | |||||||
|  |  | ||||||
|     void FlushCommands(); |     void FlushCommands(); | ||||||
|     void SyncGuestHost(); |     void SyncGuestHost(); | ||||||
|     void OnCommandListEnd(); |     virtual void OnCommandListEnd(); | ||||||
|  |  | ||||||
|     /// Returns a reference to the Maxwell3D GPU engine. |     /// Returns a reference to the Maxwell3D GPU engine. | ||||||
|     Engines::Maxwell3D& Maxwell3D(); |     Engines::Maxwell3D& Maxwell3D(); | ||||||
|   | |||||||
| @@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const { | |||||||
|     gpu_thread.WaitIdle(); |     gpu_thread.WaitIdle(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void GPUAsynch::OnCommandListEnd() { | ||||||
|  |     gpu_thread.OnCommandListEnd(); | ||||||
|  | } | ||||||
|  |  | ||||||
| } // namespace VideoCommon | } // namespace VideoCommon | ||||||
|   | |||||||
| @@ -32,6 +32,8 @@ public: | |||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||||
|     void WaitIdle() const override; |     void WaitIdle() const override; | ||||||
|  |  | ||||||
|  |     void OnCommandListEnd() override; | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; |     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic | |||||||
|             dma_pusher.DispatchCalls(); |             dma_pusher.DispatchCalls(); | ||||||
|         } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { |         } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | ||||||
|             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); |             renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); | ||||||
|  |         } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) { | ||||||
|  |             renderer.Rasterizer().ReleaseFences(); | ||||||
|         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { |         } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | ||||||
|             renderer.Rasterizer().FlushRegion(data->addr, data->size); |             renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||||||
|         } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { |         } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||||
| @@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void ThreadManager::OnCommandListEnd() { | ||||||
|  |     PushCommand(OnCommandListEndCommand()); | ||||||
|  | } | ||||||
|  |  | ||||||
| u64 ThreadManager::PushCommand(CommandData&& command_data) { | u64 ThreadManager::PushCommand(CommandData&& command_data) { | ||||||
|     const u64 fence{++state.last_fence}; |     const u64 fence{++state.last_fence}; | ||||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |     state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | ||||||
|   | |||||||
| @@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final { | |||||||
|     u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | /// Command to signal to the GPU thread that processing has ended | ||||||
|  | struct OnCommandListEndCommand final {}; | ||||||
|  |  | ||||||
| using CommandData = | using CommandData = | ||||||
|     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, |     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||||||
|                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; |                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>; | ||||||
|  |  | ||||||
| struct CommandDataContainer { | struct CommandDataContainer { | ||||||
|     CommandDataContainer() = default; |     CommandDataContainer() = default; | ||||||
| @@ -122,6 +125,8 @@ public: | |||||||
|     // Wait until the gpu thread is idle. |     // Wait until the gpu thread is idle. | ||||||
|     void WaitIdle() const; |     void WaitIdle() const; | ||||||
|  |  | ||||||
|  |     void OnCommandListEnd(); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     /// Pushes a command to be executed by the GPU thread |     /// Pushes a command to be executed by the GPU thread | ||||||
|     u64 PushCommand(CommandData&& command_data); |     u64 PushCommand(CommandData&& command_data); | ||||||
|   | |||||||
| @@ -49,6 +49,14 @@ public: | |||||||
|     /// Records a GPU query and caches it |     /// Records a GPU query and caches it | ||||||
|     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; |     virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0; | ||||||
|  |  | ||||||
|  |     virtual void SignalFence(GPUVAddr addr, u32 value) { | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     virtual void ReleaseFences() { | ||||||
|  |  | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Notify rasterizer that all caches should be flushed to Switch memory |     /// Notify rasterizer that all caches should be flushed to Switch memory | ||||||
|     virtual void FlushAll() = 0; |     virtual void FlushAll() = 0; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() { | |||||||
|     buffer_cache.SyncGuestHost(); |     buffer_cache.SyncGuestHost(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) { | ||||||
|  |     if (!fences.empty()) { | ||||||
|  |         const std::pair<GPUVAddr, u32>& current_fence = fences.front(); | ||||||
|  |         const auto [address, payload] = current_fence; | ||||||
|  |         texture_cache.PopAsyncFlushes(); | ||||||
|  |         auto& gpu{system.GPU()}; | ||||||
|  |         auto& memory_manager{gpu.MemoryManager()}; | ||||||
|  |         memory_manager.Write<u32>(address, payload); | ||||||
|  |         fences.pop_front(); | ||||||
|  |     } | ||||||
|  |     fences.emplace_back(addr, value); | ||||||
|  |     texture_cache.CommitAsyncFlushes(); | ||||||
|  |     FlushCommands(); | ||||||
|  |     SyncGuestHost(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void RasterizerOpenGL::ReleaseFences() { | ||||||
|  |     while (!fences.empty()) { | ||||||
|  |         const std::pair<GPUVAddr, u32>& current_fence = fences.front(); | ||||||
|  |         const auto [address, payload] = current_fence; | ||||||
|  |         texture_cache.PopAsyncFlushes(); | ||||||
|  |         auto& gpu{system.GPU()}; | ||||||
|  |         auto& memory_manager{gpu.MemoryManager()}; | ||||||
|  |         memory_manager.Write<u32>(address, payload); | ||||||
|  |         fences.pop_front(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||||
|     if (Settings::IsGPULevelExtreme()) { |     if (Settings::IsGPULevelExtreme()) { | ||||||
|         FlushRegion(addr, size); |         FlushRegion(addr, size); | ||||||
|   | |||||||
| @@ -69,6 +69,8 @@ public: | |||||||
|     void InvalidateRegion(VAddr addr, u64 size) override; |     void InvalidateRegion(VAddr addr, u64 size) override; | ||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     void SyncGuestHost() override; |     void SyncGuestHost() override; | ||||||
|  |     void SignalFence(GPUVAddr addr, u32 value) override; | ||||||
|  |     void ReleaseFences() override; | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; | ||||||
|     void FlushCommands() override; |     void FlushCommands() override; | ||||||
|     void TickFrame() override; |     void TickFrame() override; | ||||||
|   | |||||||
| @@ -238,7 +238,7 @@ public: | |||||||
|             surface->MarkAsRenderTarget(false, NO_RT); |             surface->MarkAsRenderTarget(false, NO_RT); | ||||||
|             const auto& cr_params = surface->GetSurfaceParams(); |             const auto& cr_params = surface->GetSurfaceParams(); | ||||||
|             if (!cr_params.is_tiled) { |             if (!cr_params.is_tiled) { | ||||||
|                 FlushSurface(surface); |                 AsyncFlushSurface(surface); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         render_targets[index].target = surface_view.first; |         render_targets[index].target = surface_view.first; | ||||||
| @@ -317,6 +317,26 @@ public: | |||||||
|         return ++ticks; |         return ++ticks; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void CommitAsyncFlushes() { | ||||||
|  |         commited_flushes.push_back(uncommited_flushes); | ||||||
|  |         uncommited_flushes.reset(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void PopAsyncFlushes() { | ||||||
|  |         if (commited_flushes.empty()) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         auto& flush_list = commited_flushes.front(); | ||||||
|  |         if (!flush_list) { | ||||||
|  |             commited_flushes.pop_front(); | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         for (TSurface& surface : *flush_list) { | ||||||
|  |             FlushSurface(surface); | ||||||
|  |         } | ||||||
|  |         commited_flushes.pop_front(); | ||||||
|  |     } | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |     explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||||
|                           bool is_astc_supported) |                           bool is_astc_supported) | ||||||
| @@ -1152,6 +1172,13 @@ private: | |||||||
|         TView view; |         TView view; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  |     void AsyncFlushSurface(TSurface& surface) { | ||||||
|  |         if (!uncommited_flushes) { | ||||||
|  |             uncommited_flushes = std::make_shared<std::list<TSurface>>(); | ||||||
|  |         } | ||||||
|  |         uncommited_flushes->push_back(surface); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
|  |  | ||||||
|     FormatLookupTable format_lookup_table; |     FormatLookupTable format_lookup_table; | ||||||
| @@ -1198,6 +1225,9 @@ private: | |||||||
|  |  | ||||||
|     std::list<TSurface> marked_for_unregister; |     std::list<TSurface> marked_for_unregister; | ||||||
|  |  | ||||||
|  |     std::shared_ptr<std::list<TSurface>> uncommited_flushes{}; | ||||||
|  |     std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes; | ||||||
|  |  | ||||||
|     StagingCache staging_cache; |     StagingCache staging_cache; | ||||||
|     std::recursive_mutex mutex; |     std::recursive_mutex mutex; | ||||||
| }; | }; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user