Texture cache: Fix the remaining issues with memory mnagement and unmapping.
This commit is contained in:
		| @@ -16,6 +16,7 @@ | |||||||
| #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | #include "core/hle/service/nvdrv/devices/nvhost_gpu.h" | ||||||
| #include "core/hle/service/nvdrv/nvdrv.h" | #include "core/hle/service/nvdrv/nvdrv.h" | ||||||
| #include "video_core/control/channel_state.h" | #include "video_core/control/channel_state.h" | ||||||
|  | #include "video_core/gpu.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
|  |  | ||||||
| @@ -24,6 +25,7 @@ namespace Service::Nvidia::Devices { | |||||||
| nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) | nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core) | ||||||
|     : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{}, |     : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()}, vm{}, | ||||||
|       gmmu{} {} |       gmmu{} {} | ||||||
|  |  | ||||||
| nvhost_as_gpu::~nvhost_as_gpu() = default; | nvhost_as_gpu::~nvhost_as_gpu() = default; | ||||||
|  |  | ||||||
| NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input, | ||||||
| @@ -132,6 +134,7 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>& | |||||||
|     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); |     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); | ||||||
|  |  | ||||||
|     gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); |     gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); | ||||||
|  |     system.GPU().InitAddressSpace(*gmmu); | ||||||
|     vm.initialised = true; |     vm.initialised = true; | ||||||
|  |  | ||||||
|     return NvResult::Success; |     return NvResult::Success; | ||||||
|   | |||||||
| @@ -3,6 +3,7 @@ | |||||||
| #include <deque> | #include <deque> | ||||||
| #include <limits> | #include <limits> | ||||||
| #include <mutex> | #include <mutex> | ||||||
|  | #include <optional> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
|  |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| @@ -59,6 +60,15 @@ public: | |||||||
|         return ref->second.gpu_memory; |         return ref->second.gpu_memory; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     std::optional<size_t> getStorageID(size_t id) const { | ||||||
|  |         std::unique_lock<std::mutex> lk(config_mutex); | ||||||
|  |         const auto ref = address_spaces.find(id); | ||||||
|  |         if (ref == address_spaces.end()) { | ||||||
|  |             return std::nullopt; | ||||||
|  |         } | ||||||
|  |         return ref->second.storage_id; | ||||||
|  |     } | ||||||
|  |  | ||||||
| protected: | protected: | ||||||
|     static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; |     static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -73,6 +73,10 @@ struct GPU::Impl { | |||||||
|         rasterizer->InitializeChannel(to_init); |         rasterizer->InitializeChannel(to_init); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void InitAddressSpace(Tegra::MemoryManager& memory_manager) { | ||||||
|  |         memory_manager.BindRasterizer(rasterizer); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     void ReleaseChannel(Control::ChannelState& to_release) { |     void ReleaseChannel(Control::ChannelState& to_release) { | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|     } |     } | ||||||
| @@ -452,6 +456,10 @@ void GPU::ReleaseChannel(Control::ChannelState& to_release) { | |||||||
|     impl->ReleaseChannel(to_release); |     impl->ReleaseChannel(to_release); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | void GPU::InitAddressSpace(Tegra::MemoryManager& memory_manager) { | ||||||
|  |     impl->InitAddressSpace(memory_manager); | ||||||
|  | } | ||||||
|  |  | ||||||
| void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) { | ||||||
|     impl->BindRenderer(std::move(renderer)); |     impl->BindRenderer(std::move(renderer)); | ||||||
| } | } | ||||||
|   | |||||||
| @@ -118,6 +118,8 @@ public: | |||||||
|  |  | ||||||
|     void ReleaseChannel(Control::ChannelState& to_release); |     void ReleaseChannel(Control::ChannelState& to_release); | ||||||
|  |  | ||||||
|  |     void InitAddressSpace(Tegra::MemoryManager& memory_manager); | ||||||
|  |  | ||||||
|     /// Request a host GPU memory flush from the CPU. |     /// Request a host GPU memory flush from the CPU. | ||||||
|     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); |     [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -59,10 +59,19 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||||||
|     } |     } | ||||||
|     for (u64 offset{}; offset < size; offset += page_size) { |     for (u64 offset{}; offset < size; offset += page_size) { | ||||||
|         const GPUVAddr current_gpu_addr = gpu_addr + offset; |         const GPUVAddr current_gpu_addr = gpu_addr + offset; | ||||||
|  |         [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); | ||||||
|         SetEntry(current_gpu_addr, entry_type); |         SetEntry(current_gpu_addr, entry_type); | ||||||
|  |         if (current_entry_type != entry_type) { | ||||||
|  |             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); | ||||||
|  |         } | ||||||
|         if constexpr (entry_type == EntryType::Mapped) { |         if constexpr (entry_type == EntryType::Mapped) { | ||||||
|             const VAddr current_cpu_addr = cpu_addr + offset; |             const VAddr current_cpu_addr = cpu_addr + offset; | ||||||
|             const auto index = PageEntryIndex(current_gpu_addr); |             const auto index = PageEntryIndex(current_gpu_addr); | ||||||
|  |             const u32 sub_value = static_cast<u32>(current_cpu_addr >> 12ULL); | ||||||
|  |             if (current_entry_type == entry_type && sub_value != page_table[index]) { | ||||||
|  |                 rasterizer->InvalidateRegion(static_cast<VAddr>(page_table[index]) << 12ULL, | ||||||
|  |                                              page_size); | ||||||
|  |             } | ||||||
|             page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL); |             page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL); | ||||||
|         } |         } | ||||||
|         remaining_size -= page_size; |         remaining_size -= page_size; | ||||||
| @@ -168,7 +177,7 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t s | |||||||
|     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; |     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | ||||||
|     while (page_index < page_last) { |     while (page_index < page_last) { | ||||||
|         const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; |         const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; | ||||||
|         if (page_addr && *page_addr != 0) { |         if (page_addr) { | ||||||
|             return page_addr; |             return page_addr; | ||||||
|         } |         } | ||||||
|         ++page_index; |         ++page_index; | ||||||
|   | |||||||
| @@ -95,7 +95,7 @@ public: | |||||||
|     virtual void UnmapMemory(VAddr addr, u64 size) = 0; |     virtual void UnmapMemory(VAddr addr, u64 size) = 0; | ||||||
|  |  | ||||||
|     /// Remap GPU memory range. This means underneath backing memory changed |     /// Remap GPU memory range. This means underneath backing memory changed | ||||||
|     virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; |     virtual void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) = 0; | ||||||
|  |  | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory | ||||||
|     /// and invalidated |     /// and invalidated | ||||||
|   | |||||||
| @@ -379,10 +379,10 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { | |||||||
|     shader_cache.OnCPUWrite(addr, size); |     shader_cache.OnCPUWrite(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { | void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         texture_cache.UnmapGPUMemory(addr, size); |         texture_cache.UnmapGPUMemory(as_id, addr, size); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -82,7 +82,7 @@ public: | |||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     void SyncGuestHost() override; |     void SyncGuestHost() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(VAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; |     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||||
|     void SignalSyncPoint(u32 value) override; |     void SignalSyncPoint(u32 value) override; | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|   | |||||||
| @@ -441,10 +441,10 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { | |||||||
|     pipeline_cache.OnCPUWrite(addr, size); |     pipeline_cache.OnCPUWrite(addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { | void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | ||||||
|     { |     { | ||||||
|         std::scoped_lock lock{texture_cache.mutex}; |         std::scoped_lock lock{texture_cache.mutex}; | ||||||
|         texture_cache.UnmapGPUMemory(addr, size); |         texture_cache.UnmapGPUMemory(as_id, addr, size); | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -78,7 +78,7 @@ public: | |||||||
|     void OnCPUWrite(VAddr addr, u64 size) override; |     void OnCPUWrite(VAddr addr, u64 size) override; | ||||||
|     void SyncGuestHost() override; |     void SyncGuestHost() override; | ||||||
|     void UnmapMemory(VAddr addr, u64 size) override; |     void UnmapMemory(VAddr addr, u64 size) override; | ||||||
|     void ModifyGPUMemory(GPUVAddr addr, u64 size) override; |     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; |     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||||
|     void SignalSyncPoint(u32 value) override; |     void SignalSyncPoint(u32 value) override; | ||||||
|     void SignalReference() override; |     void SignalReference() override; | ||||||
|   | |||||||
| @@ -480,12 +480,20 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { | |||||||
| } | } | ||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | void TextureCache<P>::UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size) { | ||||||
|     std::vector<ImageId> deleted_images; |     std::vector<ImageId> deleted_images; | ||||||
|     ForEachImageInRegionGPU(gpu_addr, size, |     ForEachImageInRegionGPU(as_id, gpu_addr, size, | ||||||
|                             [&](ImageId id, Image&) { deleted_images.push_back(id); }); |                             [&](ImageId id, Image&) { deleted_images.push_back(id); }); | ||||||
|     for (const ImageId id : deleted_images) { |     for (const ImageId id : deleted_images) { | ||||||
|         Image& image = slot_images[id]; |         Image& image = slot_images[id]; | ||||||
|  |         if (True(image.flags & ImageFlagBits::CpuModified)) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         image.flags |= ImageFlagBits::CpuModified; | ||||||
|  |         if (True(image.flags & ImageFlagBits::Tracked)) { | ||||||
|  |             UntrackImage(image, id); | ||||||
|  |         } | ||||||
|  |         /* | ||||||
|         if (True(image.flags & ImageFlagBits::Remapped)) { |         if (True(image.flags & ImageFlagBits::Remapped)) { | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
| @@ -493,6 +501,7 @@ void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { | |||||||
|         if (True(image.flags & ImageFlagBits::Tracked)) { |         if (True(image.flags & ImageFlagBits::Tracked)) { | ||||||
|             UntrackImage(image, id); |             UntrackImage(image, id); | ||||||
|         } |         } | ||||||
|  |         */ | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -1322,13 +1331,19 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f | |||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| template <typename Func> | template <typename Func> | ||||||
| void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { | void TextureCache<P>::ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, | ||||||
|  |                                               Func&& func) { | ||||||
|     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; |     using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; | ||||||
|     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |     static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; | ||||||
|     boost::container::small_vector<ImageId, 8> images; |     boost::container::small_vector<ImageId, 8> images; | ||||||
|     ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { |     auto storage_id = getStorageID(as_id); | ||||||
|         const auto it = channel_state->gpu_page_table->find(page); |     if (!storage_id) { | ||||||
|         if (it == channel_state->gpu_page_table->end()) { |         return; | ||||||
|  |     } | ||||||
|  |     auto& gpu_page_table = gpu_page_table_storage[*storage_id]; | ||||||
|  |     ForEachGPUPage(gpu_addr, size, [this, gpu_page_table, &images, gpu_addr, size, func](u64 page) { | ||||||
|  |         const auto it = gpu_page_table.find(page); | ||||||
|  |         if (it == gpu_page_table.end()) { | ||||||
|             if constexpr (BOOL_BREAK) { |             if constexpr (BOOL_BREAK) { | ||||||
|                 return false; |                 return false; | ||||||
|             } else { |             } else { | ||||||
|   | |||||||
| @@ -173,7 +173,7 @@ public: | |||||||
|     void UnmapMemory(VAddr cpu_addr, size_t size); |     void UnmapMemory(VAddr cpu_addr, size_t size); | ||||||
|  |  | ||||||
|     /// Remove images in a region |     /// Remove images in a region | ||||||
|     void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); |     void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size); | ||||||
|  |  | ||||||
|     /// Blit an image with the given parameters |     /// Blit an image with the given parameters | ||||||
|     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, |     void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, | ||||||
| @@ -309,7 +309,7 @@ private: | |||||||
|     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); |     void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); | ||||||
|  |  | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); |     void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func); | ||||||
|  |  | ||||||
|     template <typename Func> |     template <typename Func> | ||||||
|     void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); |     void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user