Merge pull request #12869 from FernandoS27/smmu-fixes
SMMU: A set of different fixes.
This commit is contained in:
		| @@ -106,6 +106,7 @@ add_library(common STATIC | ||||
|     precompiled_headers.h | ||||
|     quaternion.h | ||||
|     range_map.h | ||||
|     range_mutex.h | ||||
|     reader_writer_queue.h | ||||
|     ring_buffer.h | ||||
|     ${CMAKE_CURRENT_BINARY_DIR}/scm_rev.cpp | ||||
|   | ||||
							
								
								
									
										93
									
								
								src/common/range_mutex.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								src/common/range_mutex.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| // SPDX-FileCopyrightText: 2024 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <condition_variable> | ||||
| #include <mutex> | ||||
|  | ||||
| #include "common/intrusive_list.h" | ||||
|  | ||||
| namespace Common { | ||||
|  | ||||
| class ScopedRangeLock; | ||||
|  | ||||
| class RangeMutex { | ||||
| public: | ||||
|     explicit RangeMutex() = default; | ||||
|     ~RangeMutex() = default; | ||||
|  | ||||
| private: | ||||
|     friend class ScopedRangeLock; | ||||
|  | ||||
|     void Lock(ScopedRangeLock& l); | ||||
|     void Unlock(ScopedRangeLock& l); | ||||
|     bool HasIntersectionLocked(ScopedRangeLock& l); | ||||
|  | ||||
| private: | ||||
|     std::mutex m_mutex; | ||||
|     std::condition_variable m_cv; | ||||
|  | ||||
|     using LockList = Common::IntrusiveListBaseTraits<ScopedRangeLock>::ListType; | ||||
|     LockList m_list; | ||||
| }; | ||||
|  | ||||
| class ScopedRangeLock : public Common::IntrusiveListBaseNode<ScopedRangeLock> { | ||||
| public: | ||||
|     explicit ScopedRangeLock(RangeMutex& mutex, u64 address, u64 size) | ||||
|         : m_mutex(mutex), m_address(address), m_size(size) { | ||||
|         if (m_size > 0) { | ||||
|             m_mutex.Lock(*this); | ||||
|         } | ||||
|     } | ||||
|     ~ScopedRangeLock() { | ||||
|         if (m_size > 0) { | ||||
|             m_mutex.Unlock(*this); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     u64 GetAddress() const { | ||||
|         return m_address; | ||||
|     } | ||||
|  | ||||
|     u64 GetSize() const { | ||||
|         return m_size; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     RangeMutex& m_mutex; | ||||
|     const u64 m_address{}; | ||||
|     const u64 m_size{}; | ||||
| }; | ||||
|  | ||||
| inline void RangeMutex::Lock(ScopedRangeLock& l) { | ||||
|     std::unique_lock lk{m_mutex}; | ||||
|     m_cv.wait(lk, [&] { return !HasIntersectionLocked(l); }); | ||||
|     m_list.push_back(l); | ||||
| } | ||||
|  | ||||
| inline void RangeMutex::Unlock(ScopedRangeLock& l) { | ||||
|     { | ||||
|         std::scoped_lock lk{m_mutex}; | ||||
|         m_list.erase(m_list.iterator_to(l)); | ||||
|     } | ||||
|     m_cv.notify_all(); | ||||
| } | ||||
|  | ||||
| inline bool RangeMutex::HasIntersectionLocked(ScopedRangeLock& l) { | ||||
|     const auto cur_begin = l.GetAddress(); | ||||
|     const auto cur_last = l.GetAddress() + l.GetSize() - 1; | ||||
|  | ||||
|     for (const auto& other : m_list) { | ||||
|         const auto other_begin = other.GetAddress(); | ||||
|         const auto other_last = other.GetAddress() + other.GetSize() - 1; | ||||
|  | ||||
|         if (cur_begin <= other_last && other_begin <= cur_last) { | ||||
|             return true; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| } // namespace Common | ||||
| @@ -5,11 +5,13 @@ | ||||
|  | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| #include <bit> | ||||
| #include <deque> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "common/range_mutex.h" | ||||
| #include "common/scratch_buffer.h" | ||||
| #include "common/virtual_buffer.h" | ||||
|  | ||||
| @@ -180,31 +182,35 @@ private: | ||||
|     } | ||||
|  | ||||
|     Common::VirtualBuffer<VAddr> cpu_backing_address; | ||||
|     static constexpr size_t subentries = 8 / sizeof(u8); | ||||
|     using CounterType = u8; | ||||
|     using CounterAtomicType = std::atomic_uint8_t; | ||||
|     static constexpr size_t subentries = 8 / sizeof(CounterType); | ||||
|     static constexpr size_t subentries_mask = subentries - 1; | ||||
|     static constexpr size_t subentries_shift = | ||||
|         std::countr_zero(sizeof(u64)) - std::countr_zero(sizeof(CounterType)); | ||||
|     class CounterEntry final { | ||||
|     public: | ||||
|         CounterEntry() = default; | ||||
|  | ||||
|         std::atomic_uint8_t& Count(std::size_t page) { | ||||
|         CounterAtomicType& Count(std::size_t page) { | ||||
|             return values[page & subentries_mask]; | ||||
|         } | ||||
|  | ||||
|         const std::atomic_uint8_t& Count(std::size_t page) const { | ||||
|         const CounterAtomicType& Count(std::size_t page) const { | ||||
|             return values[page & subentries_mask]; | ||||
|         } | ||||
|  | ||||
|     private: | ||||
|         std::array<std::atomic_uint8_t, subentries> values{}; | ||||
|         std::array<CounterAtomicType, subentries> values{}; | ||||
|     }; | ||||
|     static_assert(sizeof(CounterEntry) == subentries * sizeof(u8), | ||||
|     static_assert(sizeof(CounterEntry) == subentries * sizeof(CounterType), | ||||
|                   "CounterEntry should be 8 bytes!"); | ||||
|  | ||||
|     static constexpr size_t num_counter_entries = | ||||
|         (1ULL << (device_virtual_bits - page_bits)) / subentries; | ||||
|     using CachedPages = std::array<CounterEntry, num_counter_entries>; | ||||
|     std::unique_ptr<CachedPages> cached_pages; | ||||
|     std::mutex counter_guard; | ||||
|     Common::RangeMutex counter_guard; | ||||
|     std::mutex mapping_guard; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -213,8 +213,8 @@ void DeviceMemoryManager<Traits>::Free(DAddr start, size_t size) { | ||||
| } | ||||
|  | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, | ||||
|                                       Asid asid, bool track) { | ||||
| void DeviceMemoryManager<Traits>::Map(DAddr address, VAddr virtual_address, size_t size, Asid asid, | ||||
|                                       bool track) { | ||||
|     Core::Memory::Memory* process_memory = registered_processes[asid.id]; | ||||
|     size_t start_page_d = address >> Memory::YUZU_PAGEBITS; | ||||
|     size_t num_pages = Common::AlignUp(size, Memory::YUZU_PAGESIZE) >> Memory::YUZU_PAGEBITS; | ||||
| @@ -508,12 +508,7 @@ void DeviceMemoryManager<Traits>::UnregisterProcess(Asid asid) { | ||||
|  | ||||
| template <typename Traits> | ||||
| void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size, s32 delta) { | ||||
|     std::unique_lock<std::mutex> lk(counter_guard, std::defer_lock); | ||||
|     const auto Lock = [&] { | ||||
|         if (!lk) { | ||||
|             lk.lock(); | ||||
|         } | ||||
|     }; | ||||
|     Common::ScopedRangeLock lk(counter_guard, addr, size); | ||||
|     u64 uncache_begin = 0; | ||||
|     u64 cache_begin = 0; | ||||
|     u64 uncache_bytes = 0; | ||||
| @@ -524,22 +519,36 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | ||||
|     const size_t page_end = Common::DivCeil(addr + size, Memory::YUZU_PAGESIZE); | ||||
|     size_t page = addr >> Memory::YUZU_PAGEBITS; | ||||
|     auto [asid, base_vaddress] = ExtractCPUBacking(page); | ||||
|     size_t vpage = base_vaddress >> Memory::YUZU_PAGEBITS; | ||||
|     auto* memory_device_inter = registered_processes[asid.id]; | ||||
|     const auto release_pending = [&] { | ||||
|         if (uncache_bytes > 0) { | ||||
|             MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, | ||||
|                               uncache_bytes, false); | ||||
|             uncache_bytes = 0; | ||||
|         } | ||||
|         if (cache_bytes > 0) { | ||||
|             MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, | ||||
|                               cache_bytes, true); | ||||
|             cache_bytes = 0; | ||||
|         } | ||||
|     }; | ||||
|     for (; page != page_end; ++page) { | ||||
|         std::atomic_uint8_t& count = cached_pages->at(page >> 3).Count(page); | ||||
|         CounterAtomicType& count = cached_pages->at(page >> subentries_shift).Count(page); | ||||
|         auto [asid_2, vpage] = ExtractCPUBacking(page); | ||||
|         vpage >>= Memory::YUZU_PAGEBITS; | ||||
|  | ||||
|         if (delta > 0) { | ||||
|             ASSERT_MSG(count.load(std::memory_order::relaxed) < std::numeric_limits<u8>::max(), | ||||
|                        "Count may overflow!"); | ||||
|         } else if (delta < 0) { | ||||
|             ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!"); | ||||
|         } else { | ||||
|             ASSERT_MSG(false, "Delta must be non-zero!"); | ||||
|         if (vpage == 0) [[unlikely]] { | ||||
|             release_pending(); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         if (asid.id != asid_2.id) [[unlikely]] { | ||||
|             release_pending(); | ||||
|             memory_device_inter = registered_processes[asid_2.id]; | ||||
|         } | ||||
|  | ||||
|         // Adds or subtracts 1, as count is a unsigned 8-bit value | ||||
|         count.fetch_add(static_cast<u8>(delta), std::memory_order_release); | ||||
|         count.fetch_add(static_cast<CounterType>(delta), std::memory_order_release); | ||||
|  | ||||
|         // Assume delta is either -1 or 1 | ||||
|         if (count.load(std::memory_order::relaxed) == 0) { | ||||
| @@ -548,7 +557,6 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | ||||
|             } | ||||
|             uncache_bytes += Memory::YUZU_PAGESIZE; | ||||
|         } else if (uncache_bytes > 0) { | ||||
|             Lock(); | ||||
|             MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, | ||||
|                               uncache_bytes, false); | ||||
|             uncache_bytes = 0; | ||||
| @@ -559,23 +567,12 @@ void DeviceMemoryManager<Traits>::UpdatePagesCachedCount(DAddr addr, size_t size | ||||
|             } | ||||
|             cache_bytes += Memory::YUZU_PAGESIZE; | ||||
|         } else if (cache_bytes > 0) { | ||||
|             Lock(); | ||||
|             MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||
|                               true); | ||||
|             MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, | ||||
|                               cache_bytes, true); | ||||
|             cache_bytes = 0; | ||||
|         } | ||||
|         vpage++; | ||||
|     } | ||||
|     if (uncache_bytes > 0) { | ||||
|         Lock(); | ||||
|         MarkRegionCaching(memory_device_inter, uncache_begin << Memory::YUZU_PAGEBITS, uncache_bytes, | ||||
|                           false); | ||||
|     } | ||||
|     if (cache_bytes > 0) { | ||||
|         Lock(); | ||||
|         MarkRegionCaching(memory_device_inter, cache_begin << Memory::YUZU_PAGEBITS, cache_bytes, | ||||
|                           true); | ||||
|     } | ||||
|     release_pending(); | ||||
| } | ||||
|  | ||||
| } // namespace Core | ||||
|   | ||||
| @@ -83,7 +83,9 @@ SessionId Container::OpenSession(Kernel::KProcess* process) { | ||||
|  | ||||
|             // Check if this memory block is heap. | ||||
|             if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) { | ||||
|                 if (svc_mem_info.size > region_size) { | ||||
|                 if (region_start + region_size == svc_mem_info.base_address) { | ||||
|                     region_size += svc_mem_info.size; | ||||
|                 } else if (svc_mem_info.size > region_size) { | ||||
|                     region_size = svc_mem_info.size; | ||||
|                     region_start = svc_mem_info.base_address; | ||||
|                 } | ||||
|   | ||||
| @@ -1431,7 +1431,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, DA | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|     ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); | ||||
|     ForEachSparseImageInRegion(channel_state->gpu_memory.GetID(), gpu_addr, size_bytes, | ||||
|                                region_check_gpu); | ||||
|  | ||||
|     bool can_rescale = info.rescaleable; | ||||
|     bool any_rescaled = false; | ||||
| @@ -1842,7 +1843,7 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s | ||||
|     if (!storage_id) { | ||||
|         return; | ||||
|     } | ||||
|     auto& gpu_page_table = gpu_page_table_storage[*storage_id]; | ||||
|     auto& gpu_page_table = gpu_page_table_storage[*storage_id * 2]; | ||||
|     ForEachGPUPage(gpu_addr, size, | ||||
|                    [this, &gpu_page_table, &images, gpu_addr, size, func](u64 page) { | ||||
|                        const auto it = gpu_page_table.find(page); | ||||
| @@ -1882,41 +1883,48 @@ void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, s | ||||
|  | ||||
| template <class P> | ||||
| template <typename Func> | ||||
| void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { | ||||
| void TextureCache<P>::ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, | ||||
|                                                  Func&& func) { | ||||
|     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||
|     boost::container::small_vector<ImageId, 8> images; | ||||
|     ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { | ||||
|         const auto it = sparse_page_table.find(page); | ||||
|         if (it == sparse_page_table.end()) { | ||||
|             if constexpr (BOOL_BREAK) { | ||||
|                 return false; | ||||
|             } else { | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|         for (const ImageId image_id : it->second) { | ||||
|             Image& image = slot_images[image_id]; | ||||
|             if (True(image.flags & ImageFlagBits::Picked)) { | ||||
|                 continue; | ||||
|             } | ||||
|             if (!image.OverlapsGPU(gpu_addr, size)) { | ||||
|                 continue; | ||||
|             } | ||||
|             image.flags |= ImageFlagBits::Picked; | ||||
|             images.push_back(image_id); | ||||
|             if constexpr (BOOL_BREAK) { | ||||
|                 if (func(image_id, image)) { | ||||
|                     return true; | ||||
|                 } | ||||
|             } else { | ||||
|                 func(image_id, image); | ||||
|             } | ||||
|         } | ||||
|         if constexpr (BOOL_BREAK) { | ||||
|             return false; | ||||
|         } | ||||
|     }); | ||||
|     auto storage_id = getStorageID(as_id); | ||||
|     if (!storage_id) { | ||||
|         return; | ||||
|     } | ||||
|     auto& sparse_page_table = gpu_page_table_storage[*storage_id * 2 + 1]; | ||||
|     ForEachGPUPage(gpu_addr, size, | ||||
|                    [this, &sparse_page_table, &images, gpu_addr, size, func](u64 page) { | ||||
|                        const auto it = sparse_page_table.find(page); | ||||
|                        if (it == sparse_page_table.end()) { | ||||
|                            if constexpr (BOOL_BREAK) { | ||||
|                                return false; | ||||
|                            } else { | ||||
|                                return; | ||||
|                            } | ||||
|                        } | ||||
|                        for (const ImageId image_id : it->second) { | ||||
|                            Image& image = slot_images[image_id]; | ||||
|                            if (True(image.flags & ImageFlagBits::Picked)) { | ||||
|                                continue; | ||||
|                            } | ||||
|                            if (!image.OverlapsGPU(gpu_addr, size)) { | ||||
|                                continue; | ||||
|                            } | ||||
|                            image.flags |= ImageFlagBits::Picked; | ||||
|                            images.push_back(image_id); | ||||
|                            if constexpr (BOOL_BREAK) { | ||||
|                                if (func(image_id, image)) { | ||||
|                                    return true; | ||||
|                                } | ||||
|                            } else { | ||||
|                                func(image_id, image); | ||||
|                            } | ||||
|                        } | ||||
|                        if constexpr (BOOL_BREAK) { | ||||
|                            return false; | ||||
|                        } | ||||
|                    }); | ||||
|     for (const ImageId image_id : images) { | ||||
|         slot_images[image_id].flags &= ~ImageFlagBits::Picked; | ||||
|     } | ||||
| @@ -1988,8 +1996,9 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { | ||||
|             sparse_maps.push_back(map_id); | ||||
|         }); | ||||
|     sparse_views.emplace(image_id, std::move(sparse_maps)); | ||||
|     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, | ||||
|                    [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); | ||||
|     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { | ||||
|         (*channel_state->sparse_page_table)[page].push_back(image_id); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| template <class P> | ||||
| @@ -2042,7 +2051,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { | ||||
|         return; | ||||
|     } | ||||
|     ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { | ||||
|         clear_page_table(page, sparse_page_table); | ||||
|         clear_page_table(page, (*channel_state->sparse_page_table)); | ||||
|     }); | ||||
|     auto it = sparse_views.find(image_id); | ||||
|     ASSERT(it != sparse_views.end()); | ||||
| @@ -2496,13 +2505,15 @@ void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel | ||||
|     const auto it = channel_map.find(channel.bind_id); | ||||
|     auto* this_state = &channel_storage[it->second]; | ||||
|     const auto& this_as_ref = address_spaces[channel.memory_manager->GetID()]; | ||||
|     this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id]; | ||||
|     this_state->gpu_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2]; | ||||
|     this_state->sparse_page_table = &gpu_page_table_storage[this_as_ref.storage_id * 2 + 1]; | ||||
| } | ||||
|  | ||||
| /// Bind a channel for execution. | ||||
| template <class P> | ||||
| void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) { | ||||
|     gpu_page_table_storage.emplace_back(); | ||||
|     gpu_page_table_storage.emplace_back(); | ||||
| } | ||||
|  | ||||
| } // namespace VideoCommon | ||||
|   | ||||
| @@ -86,6 +86,7 @@ public: | ||||
|     std::unordered_map<TSCEntry, SamplerId> samplers; | ||||
|  | ||||
|     TextureCacheGPUMap* gpu_page_table; | ||||
|     TextureCacheGPUMap* sparse_page_table; | ||||
| }; | ||||
|  | ||||
| template <class P> | ||||
| @@ -357,7 +358,7 @@ private: | ||||
|     void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | ||||
|  | ||||
|     template <typename Func> | ||||
|     void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | ||||
|     void ForEachSparseImageInRegion(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | ||||
|  | ||||
|     /// Iterates over all the images in a region calling func | ||||
|     template <typename Func> | ||||
| @@ -431,7 +432,6 @@ private: | ||||
|     std::unordered_map<RenderTargets, FramebufferId> framebuffers; | ||||
|  | ||||
|     std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table; | ||||
|     std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table; | ||||
|     std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views; | ||||
|  | ||||
|     DAddr virtual_invalid_space{}; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user