video_core: Block in WaitFence.

This function is called rarely and blocks quite often for a long time. So don't waste power and let the CPU sleep. This might also increase the performance as the other cores might be allowed to clock higher.
2019-12-30 13:03:20 +01:00
parent f846e3d6d0
commit cb9dd01ffd
3 changed files with 9 additions and 5 deletions
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -192,7 +192,7 @@ void NVFlinger::Compose() {

        const auto& igbp_buffer = buffer->get().igbp_buffer;

-        const auto& gpu = system.GPU();
+        auto& gpu = system.GPU();
        const auto& multi_fence = buffer->get().multi_fence;
        for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
            const auto& fence = multi_fence.fences[fence_id];
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,19 +66,20 @@ const DmaPusher& GPU::DmaPusher() const {
    return *dma_pusher;
 }

-void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+void GPU::WaitFence(u32 syncpoint_id, u32 value) {
    // Synced GPU, is always in sync
    if (!is_async) {
        return;
    }
    MICROPROFILE_SCOPE(GPU_wait);
-    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
-    }
+    std::unique_lock lock{sync_mutex};
+    sync_cv.wait(lock, [=]() { return syncpoints[syncpoint_id].load() >= value; });
 }

 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
    syncpoints[syncpoint_id]++;
    std::lock_guard lock{sync_mutex};
+    sync_cv.notify_all();
    if (!syncpt_interrupts[syncpoint_id].empty()) {
        u32 value = syncpoints[syncpoint_id].load();
        auto it = syncpt_interrupts[syncpoint_id].begin();
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@

 #include <array>
 #include <atomic>
+#include <condition_variable>
 #include <list>
 #include <memory>
 #include <mutex>
@@ -181,7 +182,7 @@ public:
    virtual void WaitIdle() const = 0;

    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
-    void WaitFence(u32 syncpoint_id, u32 value) const;
+    void WaitFence(u32 syncpoint_id, u32 value);

    void IncrementSyncPoint(u32 syncpoint_id);

@@ -312,6 +313,8 @@ private:

    std::mutex sync_mutex;

+    std::condition_variable sync_cv;
+
    const bool is_async;
 };