Merge pull request #6162 from degasus/no_spin_loops
video_core: Avoid spin loops.
This commit is contained in:
		| @@ -83,11 +83,15 @@ public: | |||||||
|         return true; |         return true; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     T PopWait() { |     void Wait() { | ||||||
|         if (Empty()) { |         if (Empty()) { | ||||||
|             std::unique_lock lock{cv_mutex}; |             std::unique_lock lock{cv_mutex}; | ||||||
|             cv.wait(lock, [this]() { return !Empty(); }); |             cv.wait(lock, [this]() { return !Empty(); }); | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     T PopWait() { | ||||||
|  |         Wait(); | ||||||
|         T t; |         T t; | ||||||
|         Pop(t); |         Pop(t); | ||||||
|         return t; |         return t; | ||||||
| @@ -156,6 +160,10 @@ public: | |||||||
|         return spsc_queue.Pop(t); |         return spsc_queue.Pop(t); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void Wait() { | ||||||
|  |         spsc_queue.Wait(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     T PopWait() { |     T PopWait() { | ||||||
|         return spsc_queue.PopWait(); |         return spsc_queue.PopWait(); | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -296,7 +296,7 @@ struct System::Impl { | |||||||
|         exit_lock = false; |         exit_lock = false; | ||||||
|  |  | ||||||
|         if (gpu_core) { |         if (gpu_core) { | ||||||
|             gpu_core->WaitIdle(); |             gpu_core->ShutDown(); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         services.reset(); |         services.reset(); | ||||||
|   | |||||||
| @@ -517,8 +517,8 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | |||||||
|     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); |     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||||||
| } | } | ||||||
|  |  | ||||||
| void GPU::WaitIdle() const { | void GPU::ShutDown() { | ||||||
|     gpu_thread.WaitIdle(); |     gpu_thread.ShutDown(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void GPU::OnCommandListEnd() { | void GPU::OnCommandListEnd() { | ||||||
|   | |||||||
| @@ -219,8 +219,8 @@ public: | |||||||
|         return *shader_notify; |         return *shader_notify; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     // Waits for the GPU to finish working |     // Stops the GPU execution and waits for the GPU to finish working | ||||||
|     void WaitIdle() const; |     void ShutDown(); | ||||||
|  |  | ||||||
|     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. |     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||||||
|     void WaitFence(u32 syncpoint_id, u32 value); |     void WaitFence(u32 syncpoint_id, u32 value); | ||||||
|   | |||||||
| @@ -29,8 +29,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||||||
|     system.RegisterHostThread(); |     system.RegisterHostThread(); | ||||||
|  |  | ||||||
|     // Wait for first GPU command before acquiring the window context |     // Wait for first GPU command before acquiring the window context | ||||||
|     while (state.queue.Empty()) |     state.queue.Wait(); | ||||||
|         ; |  | ||||||
|  |  | ||||||
|     // If emulation was stopped during disk shader loading, abort before trying to acquire context |     // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||||||
|     if (!state.is_running) { |     if (!state.is_running) { | ||||||
| @@ -57,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | |||||||
|         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||||
|             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); |             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | ||||||
|         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { |         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | ||||||
|             return; |             ASSERT(state.is_running == false); | ||||||
|         } else { |         } else { | ||||||
|             UNREACHABLE(); |             UNREACHABLE(); | ||||||
|         } |         } | ||||||
|         state.signaled_fence.store(next.fence); |         state.signaled_fence.store(next.fence); | ||||||
|  |         if (next.block) { | ||||||
|  |             // We have to lock the write_lock to ensure that the condition_variable wait not get a | ||||||
|  |             // race between the check and the lock itself. | ||||||
|  |             std::lock_guard lk(state.write_lock); | ||||||
|  |             state.cv.notify_all(); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -69,13 +74,7 @@ ThreadManager::ThreadManager(Core::System& system_, bool is_async_) | |||||||
|     : system{system_}, is_async{is_async_} {} |     : system{system_}, is_async{is_async_} {} | ||||||
|  |  | ||||||
| ThreadManager::~ThreadManager() { | ThreadManager::~ThreadManager() { | ||||||
|     if (!thread.joinable()) { |     ShutDown(); | ||||||
|         return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     // Notify GPU thread that a shutdown is pending |  | ||||||
|     PushCommand(EndProcessingCommand()); |  | ||||||
|     thread.join(); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | ||||||
| @@ -112,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||||||
|     case Settings::GPUAccuracy::Extreme: { |     case Settings::GPUAccuracy::Extreme: { | ||||||
|         auto& gpu = system.GPU(); |         auto& gpu = system.GPU(); | ||||||
|         u64 fence = gpu.RequestFlush(addr, size); |         u64 fence = gpu.RequestFlush(addr, size); | ||||||
|         PushCommand(GPUTickCommand()); |         PushCommand(GPUTickCommand(), true); | ||||||
|         while (fence > gpu.CurrentFlushRequestFence()) { |         ASSERT(fence <= gpu.CurrentFlushRequestFence()); | ||||||
|         } |  | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     default: |     default: | ||||||
| @@ -131,23 +129,45 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | |||||||
|     rasterizer->OnCPUWrite(addr, size); |     rasterizer->OnCPUWrite(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void ThreadManager::WaitIdle() const { | void ThreadManager::ShutDown() { | ||||||
|     while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && |     if (!state.is_running) { | ||||||
|            system.IsPoweredOn()) { |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         std::lock_guard lk(state.write_lock); | ||||||
|  |         state.is_running = false; | ||||||
|  |         state.cv.notify_all(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     if (!thread.joinable()) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // Notify GPU thread that a shutdown is pending | ||||||
|  |     PushCommand(EndProcessingCommand()); | ||||||
|  |     thread.join(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void ThreadManager::OnCommandListEnd() { | void ThreadManager::OnCommandListEnd() { | ||||||
|     PushCommand(OnCommandListEndCommand()); |     PushCommand(OnCommandListEndCommand()); | ||||||
| } | } | ||||||
|  |  | ||||||
| u64 ThreadManager::PushCommand(CommandData&& command_data) { | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | ||||||
|     const u64 fence{++state.last_fence}; |  | ||||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence)); |  | ||||||
|  |  | ||||||
|     if (!is_async) { |     if (!is_async) { | ||||||
|         // In synchronous GPU mode, block the caller until the command has executed |         // In synchronous GPU mode, block the caller until the command has executed | ||||||
|         WaitIdle(); |         block = true; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     std::unique_lock lk(state.write_lock); | ||||||
|  |     const u64 fence{++state.last_fence}; | ||||||
|  |     state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); | ||||||
|  |  | ||||||
|  |     if (block) { | ||||||
|  |         state.cv.wait(lk, [this, fence] { | ||||||
|  |             return fence <= state.signaled_fence.load(std::memory_order_relaxed) || | ||||||
|  |                    !state.is_running; | ||||||
|  |         }); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return fence; |     return fence; | ||||||
|   | |||||||
| @@ -90,21 +90,24 @@ using CommandData = | |||||||
| struct CommandDataContainer { | struct CommandDataContainer { | ||||||
|     CommandDataContainer() = default; |     CommandDataContainer() = default; | ||||||
|  |  | ||||||
|     explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) |     explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) | ||||||
|         : data{std::move(data_)}, fence{next_fence_} {} |         : data{std::move(data_)}, fence{next_fence_}, block(block_) {} | ||||||
|  |  | ||||||
|     CommandData data; |     CommandData data; | ||||||
|     u64 fence{}; |     u64 fence{}; | ||||||
|  |     bool block{}; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /// Struct used to synchronize the GPU thread | /// Struct used to synchronize the GPU thread | ||||||
| struct SynchState final { | struct SynchState final { | ||||||
|     std::atomic_bool is_running{true}; |     std::atomic_bool is_running{true}; | ||||||
|  |  | ||||||
|     using CommandQueue = Common::MPSCQueue<CommandDataContainer>; |     using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||||||
|  |     std::mutex write_lock; | ||||||
|     CommandQueue queue; |     CommandQueue queue; | ||||||
|     u64 last_fence{}; |     u64 last_fence{}; | ||||||
|     std::atomic<u64> signaled_fence{}; |     std::atomic<u64> signaled_fence{}; | ||||||
|  |     std::condition_variable cv; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /// Class used to manage the GPU thread | /// Class used to manage the GPU thread | ||||||
| @@ -132,14 +135,14 @@ public: | |||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); |     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||||
|  |  | ||||||
|     // Wait until the gpu thread is idle. |     // Stops the GPU execution and waits for the GPU to finish working | ||||||
|     void WaitIdle() const; |     void ShutDown(); | ||||||
|  |  | ||||||
|     void OnCommandListEnd(); |     void OnCommandListEnd(); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     /// Pushes a command to be executed by the GPU thread |     /// Pushes a command to be executed by the GPU thread | ||||||
|     u64 PushCommand(CommandData&& command_data); |     u64 PushCommand(CommandData&& command_data, bool block = false); | ||||||
|  |  | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|     const bool is_async; |     const bool is_async; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user