Merge pull request #6162 from degasus/no_spin_loops
video_core: Avoid spin loops.
This commit is contained in:
		| @@ -83,11 +83,15 @@ public: | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     T PopWait() { | ||||
|     void Wait() { | ||||
|         if (Empty()) { | ||||
|             std::unique_lock lock{cv_mutex}; | ||||
|             cv.wait(lock, [this]() { return !Empty(); }); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     T PopWait() { | ||||
|         Wait(); | ||||
|         T t; | ||||
|         Pop(t); | ||||
|         return t; | ||||
| @@ -156,6 +160,10 @@ public: | ||||
|         return spsc_queue.Pop(t); | ||||
|     } | ||||
|  | ||||
|     void Wait() { | ||||
|         spsc_queue.Wait(); | ||||
|     } | ||||
|  | ||||
|     T PopWait() { | ||||
|         return spsc_queue.PopWait(); | ||||
|     } | ||||
|   | ||||
| @@ -296,7 +296,7 @@ struct System::Impl { | ||||
|         exit_lock = false; | ||||
|  | ||||
|         if (gpu_core) { | ||||
|             gpu_core->WaitIdle(); | ||||
|             gpu_core->ShutDown(); | ||||
|         } | ||||
|  | ||||
|         services.reset(); | ||||
|   | ||||
| @@ -517,8 +517,8 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { | ||||
|     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); | ||||
| } | ||||
|  | ||||
| void GPU::WaitIdle() const { | ||||
|     gpu_thread.WaitIdle(); | ||||
| void GPU::ShutDown() { | ||||
|     gpu_thread.ShutDown(); | ||||
| } | ||||
|  | ||||
| void GPU::OnCommandListEnd() { | ||||
|   | ||||
| @@ -219,8 +219,8 @@ public: | ||||
|         return *shader_notify; | ||||
|     } | ||||
|  | ||||
|     // Waits for the GPU to finish working | ||||
|     void WaitIdle() const; | ||||
|     // Stops the GPU execution and waits for the GPU to finish working | ||||
|     void ShutDown(); | ||||
|  | ||||
|     /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. | ||||
|     void WaitFence(u32 syncpoint_id, u32 value); | ||||
|   | ||||
| @@ -29,8 +29,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
|     system.RegisterHostThread(); | ||||
|  | ||||
|     // Wait for first GPU command before acquiring the window context | ||||
|     while (state.queue.Empty()) | ||||
|         ; | ||||
|     state.queue.Wait(); | ||||
|  | ||||
|     // If emulation was stopped during disk shader loading, abort before trying to acquire context | ||||
|     if (!state.is_running) { | ||||
| @@ -57,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, | ||||
|         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||
|             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); | ||||
|         } else if (std::holds_alternative<EndProcessingCommand>(next.data)) { | ||||
|             return; | ||||
|             ASSERT(state.is_running == false); | ||||
|         } else { | ||||
|             UNREACHABLE(); | ||||
|         } | ||||
|         state.signaled_fence.store(next.fence); | ||||
|         if (next.block) { | ||||
|             // We have to lock the write_lock to ensure that the condition_variable wait not get a | ||||
|             // race between the check and the lock itself. | ||||
|             std::lock_guard lk(state.write_lock); | ||||
|             state.cv.notify_all(); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -69,13 +74,7 @@ ThreadManager::ThreadManager(Core::System& system_, bool is_async_) | ||||
|     : system{system_}, is_async{is_async_} {} | ||||
|  | ||||
| ThreadManager::~ThreadManager() { | ||||
|     if (!thread.joinable()) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // Notify GPU thread that a shutdown is pending | ||||
|     PushCommand(EndProcessingCommand()); | ||||
|     thread.join(); | ||||
|     ShutDown(); | ||||
| } | ||||
|  | ||||
| void ThreadManager::StartThread(VideoCore::RendererBase& renderer, | ||||
| @@ -112,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | ||||
|     case Settings::GPUAccuracy::Extreme: { | ||||
|         auto& gpu = system.GPU(); | ||||
|         u64 fence = gpu.RequestFlush(addr, size); | ||||
|         PushCommand(GPUTickCommand()); | ||||
|         while (fence > gpu.CurrentFlushRequestFence()) { | ||||
|         } | ||||
|         PushCommand(GPUTickCommand(), true); | ||||
|         ASSERT(fence <= gpu.CurrentFlushRequestFence()); | ||||
|         break; | ||||
|     } | ||||
|     default: | ||||
| @@ -131,23 +129,45 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||
|     rasterizer->OnCPUWrite(addr, size); | ||||
| } | ||||
|  | ||||
| void ThreadManager::WaitIdle() const { | ||||
|     while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && | ||||
|            system.IsPoweredOn()) { | ||||
| void ThreadManager::ShutDown() { | ||||
|     if (!state.is_running) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     { | ||||
|         std::lock_guard lk(state.write_lock); | ||||
|         state.is_running = false; | ||||
|         state.cv.notify_all(); | ||||
|     } | ||||
|  | ||||
|     if (!thread.joinable()) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // Notify GPU thread that a shutdown is pending | ||||
|     PushCommand(EndProcessingCommand()); | ||||
|     thread.join(); | ||||
| } | ||||
|  | ||||
| void ThreadManager::OnCommandListEnd() { | ||||
|     PushCommand(OnCommandListEndCommand()); | ||||
| } | ||||
|  | ||||
| u64 ThreadManager::PushCommand(CommandData&& command_data) { | ||||
|     const u64 fence{++state.last_fence}; | ||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence)); | ||||
|  | ||||
| u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | ||||
|     if (!is_async) { | ||||
|         // In synchronous GPU mode, block the caller until the command has executed | ||||
|         WaitIdle(); | ||||
|         block = true; | ||||
|     } | ||||
|  | ||||
|     std::unique_lock lk(state.write_lock); | ||||
|     const u64 fence{++state.last_fence}; | ||||
|     state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); | ||||
|  | ||||
|     if (block) { | ||||
|         state.cv.wait(lk, [this, fence] { | ||||
|             return fence <= state.signaled_fence.load(std::memory_order_relaxed) || | ||||
|                    !state.is_running; | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     return fence; | ||||
|   | ||||
| @@ -90,21 +90,24 @@ using CommandData = | ||||
| struct CommandDataContainer { | ||||
|     CommandDataContainer() = default; | ||||
|  | ||||
|     explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) | ||||
|         : data{std::move(data_)}, fence{next_fence_} {} | ||||
|     explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) | ||||
|         : data{std::move(data_)}, fence{next_fence_}, block(block_) {} | ||||
|  | ||||
|     CommandData data; | ||||
|     u64 fence{}; | ||||
|     bool block{}; | ||||
| }; | ||||
|  | ||||
| /// Struct used to synchronize the GPU thread | ||||
| struct SynchState final { | ||||
|     std::atomic_bool is_running{true}; | ||||
|  | ||||
|     using CommandQueue = Common::MPSCQueue<CommandDataContainer>; | ||||
|     using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||||
|     std::mutex write_lock; | ||||
|     CommandQueue queue; | ||||
|     u64 last_fence{}; | ||||
|     std::atomic<u64> signaled_fence{}; | ||||
|     std::condition_variable cv; | ||||
| }; | ||||
|  | ||||
| /// Class used to manage the GPU thread | ||||
| @@ -132,14 +135,14 @@ public: | ||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||
|  | ||||
|     // Wait until the gpu thread is idle. | ||||
|     void WaitIdle() const; | ||||
|     // Stops the GPU execution and waits for the GPU to finish working | ||||
|     void ShutDown(); | ||||
|  | ||||
|     void OnCommandListEnd(); | ||||
|  | ||||
| private: | ||||
|     /// Pushes a command to be executed by the GPU thread | ||||
|     u64 PushCommand(CommandData&& command_data); | ||||
|     u64 PushCommand(CommandData&& command_data, bool block = false); | ||||
|  | ||||
|     Core::System& system; | ||||
|     const bool is_async; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user