Memory Tracking: Optimize tracking to only use atomic writes when contested with the host GPU
This commit is contained in:
		| @@ -55,7 +55,6 @@ | |||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
| #include "video_core/video_core.h" | #include "video_core/video_core.h" | ||||||
|  |  | ||||||
|  |  | ||||||
| MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); | MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64)); | ||||||
| MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); | MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64)); | ||||||
| MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); | MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64)); | ||||||
| @@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs, | |||||||
| struct System::Impl { | struct System::Impl { | ||||||
|     explicit Impl(System& system) |     explicit Impl(System& system) | ||||||
|         : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, |         : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{}, | ||||||
|           cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {} |           cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system}, | ||||||
|  |           gpu_dirty_memory_write_manager{} { | ||||||
|  |         memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     void Initialize(System& system) { |     void Initialize(System& system) { | ||||||
|         device_memory = std::make_unique<Core::DeviceMemory>(); |         device_memory = std::make_unique<Core::DeviceMemory>(); | ||||||
| @@ -236,6 +238,8 @@ struct System::Impl { | |||||||
|         // Setting changes may require a full system reinitialization (e.g., disabling multicore). |         // Setting changes may require a full system reinitialization (e.g., disabling multicore). | ||||||
|         ReinitializeIfNecessary(system); |         ReinitializeIfNecessary(system); | ||||||
|  |  | ||||||
|  |         memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager); | ||||||
|  |  | ||||||
|         kernel.Initialize(); |         kernel.Initialize(); | ||||||
|         cpu_manager.Initialize(); |         cpu_manager.Initialize(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,3 +1,6 @@ | |||||||
|  | // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||||
|  | // SPDX-License-Identifier: GPL-3.0-or-later | ||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include <atomic> | #include <atomic> | ||||||
| @@ -59,8 +62,7 @@ public: | |||||||
|                 mask = mask >> empty_bits; |                 mask = mask >> empty_bits; | ||||||
|  |  | ||||||
|                 const size_t continuous_bits = std::countr_one(mask); |                 const size_t continuous_bits = std::countr_one(mask); | ||||||
|                 callback((transform.address << Memory::YUZU_PAGEBITS) + offset, |                 callback((transform.address << page_bits) + offset, continuous_bits << align_bits); | ||||||
|                          continuous_bits << align_bits); |  | ||||||
|                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; |                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0; | ||||||
|                 offset += continuous_bits << align_bits; |                 offset += continuous_bits << align_bits; | ||||||
|             } |             } | ||||||
| @@ -74,6 +76,10 @@ private: | |||||||
|         u64 mask; |         u64 mask; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  |     constexpr static size_t page_bits = Memory::YUZU_PAGEBITS; | ||||||
|  |     constexpr static size_t page_size = 1ULL << page_bits; | ||||||
|  |     constexpr static size_t page_mask = page_size - 1; | ||||||
|  |  | ||||||
|     constexpr static size_t align_bits = 6U; |     constexpr static size_t align_bits = 6U; | ||||||
|     constexpr static size_t align_size = 1U << align_bits; |     constexpr static size_t align_size = 1U << align_bits; | ||||||
|     constexpr static size_t align_mask = align_size - 1; |     constexpr static size_t align_mask = align_size - 1; | ||||||
| @@ -94,11 +100,11 @@ private: | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     TransformAddress BuildTransform(VAddr address, size_t size) { |     TransformAddress BuildTransform(VAddr address, size_t size) { | ||||||
|         const size_t minor_address = address & Memory::YUZU_PAGEMASK; |         const size_t minor_address = address & page_mask; | ||||||
|         const size_t minor_bit = minor_address >> align_bits; |         const size_t minor_bit = minor_address >> align_bits; | ||||||
|         const size_t top_bit = (minor_address + size + align_mask) >> align_bits; |         const size_t top_bit = (minor_address + size + align_mask) >> align_bits; | ||||||
|         TransformAddress result{}; |         TransformAddress result{}; | ||||||
|         result.address = address >> Memory::YUZU_PAGEBITS; |         result.address = address >> page_bits; | ||||||
|         result.mask = CreateMask<u64>(top_bit, minor_bit); |         result.mask = CreateMask<u64>(top_bit, minor_bit); | ||||||
|         return result; |         return result; | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ | |||||||
|  |  | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include <cstring> | #include <cstring> | ||||||
|  | #include <span> | ||||||
|  |  | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/atomic_ops.h" | #include "common/atomic_ops.h" | ||||||
| @@ -679,7 +680,7 @@ struct Memory::Impl { | |||||||
|                 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, |                 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, | ||||||
|                           GetInteger(vaddr), static_cast<u64>(data)); |                           GetInteger(vaddr), static_cast<u64>(data)); | ||||||
|             }, |             }, | ||||||
|             [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); |             [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); | ||||||
|         if (ptr) { |         if (ptr) { | ||||||
|             std::memcpy(ptr, &data, sizeof(T)); |             std::memcpy(ptr, &data, sizeof(T)); | ||||||
|         } |         } | ||||||
| @@ -693,7 +694,7 @@ struct Memory::Impl { | |||||||
|                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", |                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}", | ||||||
|                           sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); |                           sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data)); | ||||||
|             }, |             }, | ||||||
|             [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); }); |             [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); }); | ||||||
|         if (ptr) { |         if (ptr) { | ||||||
|             const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); |             const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr); | ||||||
|             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | ||||||
| @@ -708,7 +709,7 @@ struct Memory::Impl { | |||||||
|                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", |                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}", | ||||||
|                           GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); |                           GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0])); | ||||||
|             }, |             }, | ||||||
|             [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); }); |             [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); }); | ||||||
|         if (ptr) { |         if (ptr) { | ||||||
|             const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); |             const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr); | ||||||
|             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); |             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected); | ||||||
| @@ -718,7 +719,7 @@ struct Memory::Impl { | |||||||
|  |  | ||||||
|     void HandleRasterizerDownload(VAddr address, size_t size) { |     void HandleRasterizerDownload(VAddr address, size_t size) { | ||||||
|         const size_t core = system.GetCurrentHostThreadID(); |         const size_t core = system.GetCurrentHostThreadID(); | ||||||
|         auto& current_area = rasterizer_areas[core]; |         auto& current_area = rasterizer_read_areas[core]; | ||||||
|         const VAddr end_address = address + size; |         const VAddr end_address = address + size; | ||||||
|         if (current_area.start_address <= address && end_address <= current_area.end_address) |         if (current_area.start_address <= address && end_address <= current_area.end_address) | ||||||
|             [[likely]] { |             [[likely]] { | ||||||
| @@ -727,9 +728,31 @@ struct Memory::Impl { | |||||||
|         current_area = system.GPU().OnCPURead(address, size); |         current_area = system.GPU().OnCPURead(address, size); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     Common::PageTable* current_page_table = nullptr; |     void HandleRasterizerWrite(VAddr address, size_t size) { | ||||||
|     std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{}; |         const size_t core = system.GetCurrentHostThreadID(); | ||||||
|  |         auto& current_area = rasterizer_write_areas[core]; | ||||||
|  |         VAddr subaddress = address >> YUZU_PAGEBITS; | ||||||
|  |         bool do_collection = current_area.last_address == subaddress; | ||||||
|  |         if (!do_collection) [[unlikely]] { | ||||||
|  |             do_collection = system.GPU().OnCPUWrite(address, size); | ||||||
|  |             if (!do_collection) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             current_area.last_address = subaddress; | ||||||
|  |         } | ||||||
|  |         gpu_dirty_managers[core].Collect(address, size); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     struct GPUDirtyState { | ||||||
|  |         VAddr last_address; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|  |     Common::PageTable* current_page_table = nullptr; | ||||||
|  |     std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> | ||||||
|  |         rasterizer_read_areas{}; | ||||||
|  |     std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{}; | ||||||
|  |     std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| Memory::Memory(Core::System& system_) : system{system_} { | Memory::Memory(Core::System& system_) : system{system_} { | ||||||
| @@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size) | |||||||
|     impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); |     impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) { | ||||||
|  |     impl->gpu_dirty_managers = managers; | ||||||
|  | } | ||||||
|  |  | ||||||
| Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) { | ||||||
|     return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); |     return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -5,6 +5,7 @@ | |||||||
|  |  | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
| #include <memory> | #include <memory> | ||||||
|  | #include <span> | ||||||
| #include <string> | #include <string> | ||||||
| #include "common/typed_address.h" | #include "common/typed_address.h" | ||||||
| #include "core/hle/result.h" | #include "core/hle/result.h" | ||||||
| @@ -15,7 +16,8 @@ struct PageTable; | |||||||
|  |  | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| } | class GPUDirtyMemoryManager; | ||||||
|  | } // namespace Core | ||||||
|  |  | ||||||
| namespace Kernel { | namespace Kernel { | ||||||
| class PhysicalMemory; | class PhysicalMemory; | ||||||
| @@ -458,6 +460,8 @@ public: | |||||||
|      */ |      */ | ||||||
|     void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); |     void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); | ||||||
|  |  | ||||||
|  |     void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -132,6 +132,19 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) { | |||||||
|     InlineMemoryImplementation(cpu_addr, size, tmp_buffer); |     InlineMemoryImplementation(cpu_addr, size, tmp_buffer); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <class P> | ||||||
|  | bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) { | ||||||
|  |     const bool is_dirty = IsRegionRegistered(cpu_addr, size); | ||||||
|  |     if (!is_dirty) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) { | ||||||
|  |         return true; | ||||||
|  |     } | ||||||
|  |     WriteMemory(cpu_addr, size); | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, | std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr, | ||||||
|                                                                               u64 size) { |                                                                               u64 size) { | ||||||
| @@ -1574,7 +1587,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size, | |||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||||||
|                                   std::span<const u8> inlined_buffer) { |                                                 std::span<const u8> inlined_buffer) { | ||||||
|     const IntervalType subtract_interval{dest_address, dest_address + copy_size}; |     const IntervalType subtract_interval{dest_address, dest_address + copy_size}; | ||||||
|     ClearDownload(subtract_interval); |     ClearDownload(subtract_interval); | ||||||
|     common_ranges.subtract(subtract_interval); |     common_ranges.subtract(subtract_interval); | ||||||
|   | |||||||
| @@ -245,6 +245,8 @@ public: | |||||||
|  |  | ||||||
|     void CachedWriteMemory(VAddr cpu_addr, u64 size); |     void CachedWriteMemory(VAddr cpu_addr, u64 size); | ||||||
|  |  | ||||||
|  |     bool OnCPUWrite(VAddr cpu_addr, u64 size); | ||||||
|  |  | ||||||
|     void DownloadMemory(VAddr cpu_addr, u64 size); |     void DownloadMemory(VAddr cpu_addr, u64 size); | ||||||
|  |  | ||||||
|     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); |     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size); | ||||||
| @@ -543,7 +545,8 @@ private: | |||||||
|  |  | ||||||
|     void ClearDownload(IntervalType subtract_interval); |     void ClearDownload(IntervalType subtract_interval); | ||||||
|  |  | ||||||
|     void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer); |     void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, | ||||||
|  |                                     std::span<const u8> inlined_buffer); | ||||||
|  |  | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
|     Core::Memory::Memory& cpu_memory; |     Core::Memory::Memory& cpu_memory; | ||||||
|   | |||||||
| @@ -69,7 +69,6 @@ public: | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     void SignalFence(std::function<void()>&& func) { |     void SignalFence(std::function<void()>&& func) { | ||||||
|         rasterizer.InvalidateGPUCache(); |  | ||||||
|         bool delay_fence = Settings::IsGPULevelHigh(); |         bool delay_fence = Settings::IsGPULevelHigh(); | ||||||
|         if constexpr (!can_async_check) { |         if constexpr (!can_async_check) { | ||||||
|             TryReleasePendingFences<false>(); |             TryReleasePendingFences<false>(); | ||||||
| @@ -96,6 +95,7 @@ public: | |||||||
|             guard.unlock(); |             guard.unlock(); | ||||||
|             cv.notify_all(); |             cv.notify_all(); | ||||||
|         } |         } | ||||||
|  |         rasterizer.InvalidateGPUCache(); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     void SignalSyncPoint(u32 value) { |     void SignalSyncPoint(u32 value) { | ||||||
|   | |||||||
| @@ -96,7 +96,7 @@ struct GPU::Impl { | |||||||
|     /// Synchronizes CPU writes with Host GPU memory. |     /// Synchronizes CPU writes with Host GPU memory. | ||||||
|     void InvalidateGPUCache() { |     void InvalidateGPUCache() { | ||||||
|         std::function<void(VAddr, size_t)> callback_writes( |         std::function<void(VAddr, size_t)> callback_writes( | ||||||
|             [this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); }); |             [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); | ||||||
|         system.GatherGPUDirtyMemory(callback_writes); |         system.GatherGPUDirtyMemory(callback_writes); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -301,6 +301,10 @@ struct GPU::Impl { | |||||||
|         gpu_thread.InvalidateRegion(addr, size); |         gpu_thread.InvalidateRegion(addr, size); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     bool OnCPUWrite(VAddr addr, u64 size) { | ||||||
|  |         return rasterizer->OnCPUWrite(addr, size); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) { |     void FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||||
|         gpu_thread.FlushAndInvalidateRegion(addr, size); |         gpu_thread.FlushAndInvalidateRegion(addr, size); | ||||||
| @@ -563,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) { | |||||||
|     impl->InvalidateRegion(addr, size); |     impl->InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | bool GPU::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|  |     return impl->OnCPUWrite(addr, size); | ||||||
|  | } | ||||||
|  |  | ||||||
| void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||||
|     impl->FlushAndInvalidateRegion(addr, size); |     impl->FlushAndInvalidateRegion(addr, size); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -250,6 +250,10 @@ public: | |||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated |     /// Notify rasterizer that any caches of the specified region should be invalidated | ||||||
|     void InvalidateRegion(VAddr addr, u64 size); |     void InvalidateRegion(VAddr addr, u64 size); | ||||||
|  |  | ||||||
|  |     /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is | ||||||
|  |     /// sensible, false otherwise | ||||||
|  |     bool OnCPUWrite(VAddr addr, u64 size); | ||||||
|  |  | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); |     void FlushAndInvalidateRegion(VAddr addr, u64 size); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system, | |||||||
|         } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { |         } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { | ||||||
|             rasterizer->FlushRegion(flush->addr, flush->size); |             rasterizer->FlushRegion(flush->addr, flush->size); | ||||||
|         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { |         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||||
|             rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); |             rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size); | ||||||
|         } else { |         } else { | ||||||
|             ASSERT(false); |             ASSERT(false); | ||||||
|         } |         } | ||||||
| @@ -102,12 +102,12 @@ void ThreadManager::TickGPU() { | |||||||
| } | } | ||||||
|  |  | ||||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||||
|     rasterizer->OnCPUWrite(addr, size); |     rasterizer->OnCacheInvalidation(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | ||||||
|     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important |     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important | ||||||
|     rasterizer->OnCPUWrite(addr, size); |     rasterizer->OnCacheInvalidation(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { | ||||||
|   | |||||||
| @@ -109,7 +109,9 @@ public: | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Notify rasterizer that any caches of the specified region are desync with guest |     /// Notify rasterizer that any caches of the specified region are desync with guest | ||||||
|     virtual void OnCPUWrite(VAddr addr, u64 size) = 0; |     virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0; | ||||||
|  |  | ||||||
|  |     virtual bool OnCPUWrite(VAddr addr, u64 size) = 0; | ||||||
|  |  | ||||||
|     /// Sync memory between guest and host. |     /// Sync memory between guest and host. | ||||||
|     virtual void InvalidateGPUCache() = 0; |     virtual void InvalidateGPUCache() = 0; | ||||||
|   | |||||||
| @@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp | |||||||
|     return false; |     return false; | ||||||
| } | } | ||||||
| void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {} | ||||||
| void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {} | bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  | void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {} | ||||||
| VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) { | ||||||
|     VideoCore::RasterizerDownloadArea new_area{ |     VideoCore::RasterizerDownloadArea new_area{ | ||||||
|         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), |         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE), | ||||||
|   | |||||||
| @@ -53,7 +53,8 @@ public: | |||||||
|                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(VAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||||
|  |     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(VAddr addr, u64 size) override; | ||||||
|   | |||||||
| @@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|  |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|  |     if (addr == 0 || size == 0) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|  |         if (buffer_cache.OnCPUWrite(addr, size)) { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|  |         texture_cache.WriteMemory(addr, size); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     shader_cache.InvalidateRegion(addr, size); | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     shader_cache.OnCPUWrite(addr, size); |  | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         texture_cache.WriteMemory(addr, size); |         texture_cache.WriteMemory(addr, size); | ||||||
| @@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||||||
|         std::scoped_lock lock{buffer_cache.mutex}; |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|         buffer_cache.CachedWriteMemory(addr, size); |         buffer_cache.CachedWriteMemory(addr, size); | ||||||
|     } |     } | ||||||
|  |     shader_cache.InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::InvalidateGPUCache() { | void RasterizerOpenGL::InvalidateGPUCache() { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     gpu.InvalidateGPUCache(); | ||||||
|     shader_cache.SyncGuestHost(); |  | ||||||
|     { |  | ||||||
|         std::scoped_lock lock{buffer_cache.mutex}; |  | ||||||
|         buffer_cache.FlushCachedWrites(); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | ||||||
| @@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | |||||||
|         std::scoped_lock lock{buffer_cache.mutex}; |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|         buffer_cache.WriteMemory(addr, size); |         buffer_cache.WriteMemory(addr, size); | ||||||
|     } |     } | ||||||
|     shader_cache.OnCPUWrite(addr, size); |     shader_cache.OnCacheInvalidation(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | ||||||
|   | |||||||
| @@ -98,7 +98,8 @@ public: | |||||||
|     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; |     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(VAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||||
|  |     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(VAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|   | |||||||
| @@ -566,7 +566,28 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | ||||||
|  |     if (addr == 0 || size == 0) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|  |         if (buffer_cache.OnCPUWrite(addr, size)) { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|  |         texture_cache.WriteMemory(addr, size); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pipeline_cache.InvalidateRegion(addr, size); | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { | ||||||
|     if (addr == 0 || size == 0) { |     if (addr == 0 || size == 0) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| @@ -595,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | |||||||
|         std::scoped_lock lock{buffer_cache.mutex}; |         std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|         buffer_cache.WriteMemory(addr, size); |         buffer_cache.WriteMemory(addr, size); | ||||||
|     } |     } | ||||||
|     pipeline_cache.OnCPUWrite(addr, size); |     pipeline_cache.OnCacheInvalidation(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | ||||||
|   | |||||||
| @@ -96,7 +96,8 @@ public: | |||||||
|     void InvalidateRegion(VAddr addr, u64 size, |     void InvalidateRegion(VAddr addr, u64 size, | ||||||
|                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; |                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override; | ||||||
|     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; |     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override; | ||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCacheInvalidation(VAddr addr, u64 size) override; | ||||||
|  |     bool OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     void InvalidateGPUCache() override; |     void InvalidateGPUCache() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(VAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { | |||||||
|     RemovePendingShaders(); |     RemovePendingShaders(); | ||||||
| } | } | ||||||
|  |  | ||||||
| void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { | void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) { | ||||||
|     std::scoped_lock lock{invalidation_mutex}; |     std::scoped_lock lock{invalidation_mutex}; | ||||||
|     InvalidatePagesInRegion(addr, size); |     InvalidatePagesInRegion(addr, size); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -62,7 +62,7 @@ public: | |||||||
|     /// @brief Unmarks a memory region as cached and marks it for removal |     /// @brief Unmarks a memory region as cached and marks it for removal | ||||||
|     /// @param addr Start address of the CPU write operation |     /// @param addr Start address of the CPU write operation | ||||||
|     /// @param size Number of bytes of the CPU write operation |     /// @param size Number of bytes of the CPU write operation | ||||||
|     void OnCPUWrite(VAddr addr, size_t size); |     void OnCacheInvalidation(VAddr addr, size_t size); | ||||||
|  |  | ||||||
|     /// @brief Flushes delayed removal operations |     /// @brief Flushes delayed removal operations | ||||||
|     void SyncGuestHost(); |     void SyncGuestHost(); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user