MemoryManager: initial multi paging system implementation.
This commit is contained in:
		| @@ -19,6 +19,9 @@ MultiLevelPageTable<BaseAddr>::MultiLevelPageTable(std::size_t address_space_bit | |||||||
|                                                    std::size_t page_bits_) |                                                    std::size_t page_bits_) | ||||||
|     : address_space_bits{address_space_bits_}, |     : address_space_bits{address_space_bits_}, | ||||||
|       first_level_bits{first_level_bits_}, page_bits{page_bits_} { |       first_level_bits{first_level_bits_}, page_bits{page_bits_} { | ||||||
|  |     if (page_bits == 0) { | ||||||
|  |       return; | ||||||
|  |     } | ||||||
|     first_level_shift = address_space_bits - first_level_bits; |     first_level_shift = address_space_bits - first_level_bits; | ||||||
|     first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); |     first_level_chunk_size = (1ULL << (first_level_shift - page_bits)) * sizeof(BaseAddr); | ||||||
|     alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); |     alloc_size = (1ULL << (address_space_bits - page_bits)) * sizeof(BaseAddr); | ||||||
|   | |||||||
| @@ -133,7 +133,8 @@ NvResult nvhost_as_gpu::AllocAsEx(const std::vector<u8>& input, std::vector<u8>& | |||||||
|     const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; |     const u64 end_big_pages{(vm.va_range_end - vm.va_range_split) >> vm.big_page_size_bits}; | ||||||
|     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); |     vm.big_page_allocator = std::make_unique<VM::Allocator>(start_big_pages, end_big_pages); | ||||||
|  |  | ||||||
|     gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, VM::PAGE_SIZE_BITS); |     gmmu = std::make_shared<Tegra::MemoryManager>(system, 40, vm.big_page_size_bits, | ||||||
|  |                                                   VM::PAGE_SIZE_BITS); | ||||||
|     system.GPU().InitAddressSpace(*gmmu); |     system.GPU().InitAddressSpace(*gmmu); | ||||||
|     vm.initialised = true; |     vm.initialised = true; | ||||||
|  |  | ||||||
| @@ -189,6 +190,7 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector< | |||||||
|         .size = size, |         .size = size, | ||||||
|         .page_size = params.page_size, |         .page_size = params.page_size, | ||||||
|         .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, |         .sparse = (params.flags & MappingFlags::Sparse) != MappingFlags::None, | ||||||
|  |         .big_pages = params.page_size != VM::YUZU_PAGESIZE, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     std::memcpy(output.data(), ¶ms, output.size()); |     std::memcpy(output.data(), ¶ms, output.size()); | ||||||
| @@ -209,7 +211,7 @@ void nvhost_as_gpu::FreeMappingLocked(u64 offset) { | |||||||
|     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state |     // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | ||||||
|     // Only FreeSpace can unmap them fully |     // Only FreeSpace can unmap them fully | ||||||
|     if (mapping->sparse_alloc) |     if (mapping->sparse_alloc) | ||||||
|         gmmu->MapSparse(offset, mapping->size); |         gmmu->MapSparse(offset, mapping->size, mapping->big_page); | ||||||
|     else |     else | ||||||
|         gmmu->Unmap(offset, mapping->size); |         gmmu->Unmap(offset, mapping->size); | ||||||
|  |  | ||||||
| @@ -294,8 +296,9 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||||||
|             return NvResult::BadValue; |             return NvResult::BadValue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         const bool use_big_pages = alloc->second.big_pages; | ||||||
|         if (!entry.handle) { |         if (!entry.handle) { | ||||||
|             gmmu->MapSparse(virtual_address, size); |             gmmu->MapSparse(virtual_address, size, use_big_pages); | ||||||
|         } else { |         } else { | ||||||
|             auto handle{nvmap.GetHandle(entry.handle)}; |             auto handle{nvmap.GetHandle(entry.handle)}; | ||||||
|             if (!handle) { |             if (!handle) { | ||||||
| @@ -306,7 +309,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out | |||||||
|                 handle->address + |                 handle->address + | ||||||
|                 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; |                 (static_cast<u64>(entry.handle_offset_big_pages) << vm.big_page_size_bits))}; | ||||||
|  |  | ||||||
|             gmmu->Map(virtual_address, cpu_address, size); |             gmmu->Map(virtual_address, cpu_address, size, use_big_pages); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -345,7 +348,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||||||
|             u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; |             u64 gpu_address{static_cast<u64>(params.offset + params.buffer_offset)}; | ||||||
|             VAddr cpu_address{mapping->ptr + params.buffer_offset}; |             VAddr cpu_address{mapping->ptr + params.buffer_offset}; | ||||||
|  |  | ||||||
|             gmmu->Map(gpu_address, cpu_address, params.mapping_size); |             gmmu->Map(gpu_address, cpu_address, params.mapping_size, mapping->big_page); | ||||||
|  |  | ||||||
|             return NvResult::Success; |             return NvResult::Success; | ||||||
|         } catch ([[maybe_unused]] const std::out_of_range& e) { |         } catch ([[maybe_unused]] const std::out_of_range& e) { | ||||||
| @@ -363,22 +366,6 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||||||
|     VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; |     VAddr cpu_address{static_cast<VAddr>(handle->address + params.buffer_offset)}; | ||||||
|     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; |     u64 size{params.mapping_size ? params.mapping_size : handle->orig_size}; | ||||||
|  |  | ||||||
|     if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { |  | ||||||
|         auto alloc{allocation_map.upper_bound(params.offset)}; |  | ||||||
|  |  | ||||||
|         if (alloc-- == allocation_map.begin() || |  | ||||||
|             (params.offset - alloc->first) + size > alloc->second.size) { |  | ||||||
|             UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!"); |  | ||||||
|             return NvResult::BadValue; |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         gmmu->Map(params.offset, cpu_address, size); |  | ||||||
|  |  | ||||||
|         auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, false, |  | ||||||
|                                                alloc->second.sparse)}; |  | ||||||
|         alloc->second.mappings.push_back(mapping); |  | ||||||
|         mapping_map[params.offset] = mapping; |  | ||||||
|     } else { |  | ||||||
|     bool big_page{[&]() { |     bool big_page{[&]() { | ||||||
|         if (Common::IsAligned(handle->align, vm.big_page_size)) |         if (Common::IsAligned(handle->align, vm.big_page_size)) | ||||||
|             return true; |             return true; | ||||||
| @@ -390,6 +377,24 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||||||
|         } |         } | ||||||
|     }()}; |     }()}; | ||||||
|  |  | ||||||
|  |     if ((params.flags & MappingFlags::Fixed) != MappingFlags::None) { | ||||||
|  |         auto alloc{allocation_map.upper_bound(params.offset)}; | ||||||
|  |  | ||||||
|  |         if (alloc-- == allocation_map.begin() || | ||||||
|  |             (params.offset - alloc->first) + size > alloc->second.size) { | ||||||
|  |             UNREACHABLE_MSG("Cannot perform a fixed mapping into an unallocated region!"); | ||||||
|  |             return NvResult::BadValue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         const bool use_big_pages = alloc->second.big_pages && big_page; | ||||||
|  |         gmmu->Map(params.offset, cpu_address, size, use_big_pages); | ||||||
|  |  | ||||||
|  |         auto mapping{std::make_shared<Mapping>(cpu_address, params.offset, size, true, | ||||||
|  |                                                use_big_pages, alloc->second.sparse)}; | ||||||
|  |         alloc->second.mappings.push_back(mapping); | ||||||
|  |         mapping_map[params.offset] = mapping; | ||||||
|  |     } else { | ||||||
|  |  | ||||||
|         auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; |         auto& allocator{big_page ? *vm.big_page_allocator : *vm.small_page_allocator}; | ||||||
|         u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; |         u32 page_size{big_page ? vm.big_page_size : VM::YUZU_PAGESIZE}; | ||||||
|         u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; |         u32 page_size_bits{big_page ? vm.big_page_size_bits : VM::PAGE_SIZE_BITS}; | ||||||
| @@ -402,7 +407,7 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8 | |||||||
|             return NvResult::InsufficientMemory; |             return NvResult::InsufficientMemory; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         gmmu->Map(params.offset, cpu_address, size); |         gmmu->Map(params.offset, cpu_address, Common::AlignUp(size, page_size), big_page); | ||||||
|  |  | ||||||
|         auto mapping{ |         auto mapping{ | ||||||
|             std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; |             std::make_shared<Mapping>(cpu_address, params.offset, size, false, big_page, false)}; | ||||||
| @@ -439,7 +444,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8 | |||||||
|         // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state |         // Sparse mappings shouldn't be fully unmapped, just returned to their sparse state | ||||||
|         // Only FreeSpace can unmap them fully |         // Only FreeSpace can unmap them fully | ||||||
|         if (mapping->sparse_alloc) { |         if (mapping->sparse_alloc) { | ||||||
|             gmmu->MapSparse(params.offset, mapping->size); |             gmmu->MapSparse(params.offset, mapping->size, mapping->big_page); | ||||||
|         } else { |         } else { | ||||||
|             gmmu->Unmap(params.offset, mapping->size); |             gmmu->Unmap(params.offset, mapping->size); | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -177,6 +177,7 @@ private: | |||||||
|         std::list<std::shared_ptr<Mapping>> mappings; |         std::list<std::shared_ptr<Mapping>> mappings; | ||||||
|         u32 page_size; |         u32 page_size; | ||||||
|         bool sparse; |         bool sparse; | ||||||
|  |         bool big_pages; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     std::map<u64, std::shared_ptr<Mapping>> |     std::map<u64, std::shared_ptr<Mapping>> | ||||||
|   | |||||||
| @@ -9,6 +9,8 @@ | |||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
|  | #include "core/hle/kernel/k_page_table.h" | ||||||
|  | #include "core/hle/kernel/k_process.h" | ||||||
| #include "core/hle/service/nvdrv/core/container.h" | #include "core/hle/service/nvdrv/core/container.h" | ||||||
| #include "core/hle/service/nvdrv/core/nvmap.h" | #include "core/hle/service/nvdrv/core/nvmap.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvmap.h" | #include "core/hle/service/nvdrv/devices/nvmap.h" | ||||||
| @@ -136,6 +138,10 @@ NvResult nvmap::IocAlloc(const std::vector<u8>& input, std::vector<u8>& output) | |||||||
|         LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); |         LOG_CRITICAL(Service_NVDRV, "Object failed to allocate, handle={:08X}", params.handle); | ||||||
|         return result; |         return result; | ||||||
|     } |     } | ||||||
|  |     ASSERT(system.CurrentProcess() | ||||||
|  |                ->PageTable() | ||||||
|  |                .LockForDeviceAddressSpace(handle_description->address, handle_description->size) | ||||||
|  |                .IsSuccess()); | ||||||
|     std::memcpy(output.data(), ¶ms, sizeof(params)); |     std::memcpy(output.data(), ¶ms, sizeof(params)); | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| @@ -256,6 +262,10 @@ NvResult nvmap::IocFree(const std::vector<u8>& input, std::vector<u8>& output) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (auto freeInfo{file.FreeHandle(params.handle, false)}) { |     if (auto freeInfo{file.FreeHandle(params.handle, false)}) { | ||||||
|  |         ASSERT(system.CurrentProcess() | ||||||
|  |                    ->PageTable() | ||||||
|  |                    .UnlockForDeviceAddressSpace(freeInfo->address, freeInfo->size) | ||||||
|  |                    .IsSuccess()); | ||||||
|         params.address = freeInfo->address; |         params.address = freeInfo->address; | ||||||
|         params.size = static_cast<u32>(freeInfo->size); |         params.size = static_cast<u32>(freeInfo->size); | ||||||
|         params.flags.raw = 0; |         params.flags.raw = 0; | ||||||
|   | |||||||
| @@ -7,6 +7,7 @@ | |||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/logging/log.h" | #include "common/logging/log.h" | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
|  | #include "core/device_memory.h" | ||||||
| #include "core/hle/kernel/k_page_table.h" | #include "core/hle/kernel/k_page_table.h" | ||||||
| #include "core/hle/kernel/k_process.h" | #include "core/hle/kernel/k_process.h" | ||||||
| #include "core/memory.h" | #include "core/memory.h" | ||||||
| @@ -14,41 +15,70 @@ | |||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
|  |  | ||||||
|  | #pragma optimize("", off) | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  |  | ||||||
| std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | std::atomic<size_t> MemoryManager::unique_identifier_generator{}; | ||||||
|  |  | ||||||
| MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 page_bits_) | MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64 big_page_bits_, | ||||||
|     : system{system_}, address_space_bits{address_space_bits_}, page_bits{page_bits_}, entries{}, |                              u64 page_bits_) | ||||||
|       page_table{address_space_bits, address_space_bits + page_bits - 38, page_bits}, |     : system{system_}, memory{system.Memory()}, device_memory{system.DeviceMemory()}, | ||||||
|  |       address_space_bits{address_space_bits_}, page_bits{page_bits_}, big_page_bits{big_page_bits_}, | ||||||
|  |       entries{}, big_entries{}, page_table{address_space_bits, address_space_bits + page_bits - 38, | ||||||
|  |                                            page_bits != big_page_bits ? page_bits : 0}, | ||||||
|       unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { |       unique_identifier{unique_identifier_generator.fetch_add(1, std::memory_order_acq_rel)} { | ||||||
|     address_space_size = 1ULL << address_space_bits; |     address_space_size = 1ULL << address_space_bits; | ||||||
|     allocate_start = address_space_bits > 32 ? 1ULL << 32 : 0; |  | ||||||
|     page_size = 1ULL << page_bits; |     page_size = 1ULL << page_bits; | ||||||
|     page_mask = page_size - 1ULL; |     page_mask = page_size - 1ULL; | ||||||
|     const u64 page_table_bits = address_space_bits - cpu_page_bits; |     big_page_size = 1ULL << big_page_bits; | ||||||
|  |     big_page_mask = big_page_size - 1ULL; | ||||||
|  |     const u64 page_table_bits = address_space_bits - page_bits; | ||||||
|  |     const u64 big_page_table_bits = address_space_bits - big_page_bits; | ||||||
|     const u64 page_table_size = 1ULL << page_table_bits; |     const u64 page_table_size = 1ULL << page_table_bits; | ||||||
|  |     const u64 big_page_table_size = 1ULL << big_page_table_bits; | ||||||
|     page_table_mask = page_table_size - 1; |     page_table_mask = page_table_size - 1; | ||||||
|  |     big_page_table_mask = big_page_table_size - 1; | ||||||
|  |  | ||||||
|  |     big_entries.resize(big_page_table_size / 32, 0); | ||||||
|  |     big_page_table_cpu.resize(big_page_table_size); | ||||||
|  |     big_page_table_physical.resize(big_page_table_size); | ||||||
|     entries.resize(page_table_size / 32, 0); |     entries.resize(page_table_size / 32, 0); | ||||||
| } | } | ||||||
|  |  | ||||||
| MemoryManager::~MemoryManager() = default; | MemoryManager::~MemoryManager() = default; | ||||||
|  |  | ||||||
|  | template <bool is_big_page> | ||||||
| MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { | MemoryManager::EntryType MemoryManager::GetEntry(size_t position) const { | ||||||
|  |     if constexpr (is_big_page) { | ||||||
|  |         position = position >> big_page_bits; | ||||||
|  |         const u64 entry_mask = big_entries[position / 32]; | ||||||
|  |         const size_t sub_index = position % 32; | ||||||
|  |         return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); | ||||||
|  |     } else { | ||||||
|         position = position >> page_bits; |         position = position >> page_bits; | ||||||
|         const u64 entry_mask = entries[position / 32]; |         const u64 entry_mask = entries[position / 32]; | ||||||
|         const size_t sub_index = position % 32; |         const size_t sub_index = position % 32; | ||||||
|         return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); |         return static_cast<EntryType>((entry_mask >> (2 * sub_index)) & 0x03ULL); | ||||||
|     } |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | template <bool is_big_page> | ||||||
| void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { | void MemoryManager::SetEntry(size_t position, MemoryManager::EntryType entry) { | ||||||
|  |     if constexpr (is_big_page) { | ||||||
|  |         position = position >> big_page_bits; | ||||||
|  |         const u64 entry_mask = big_entries[position / 32]; | ||||||
|  |         const size_t sub_index = position % 32; | ||||||
|  |         big_entries[position / 32] = | ||||||
|  |             (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); | ||||||
|  |     } else { | ||||||
|         position = position >> page_bits; |         position = position >> page_bits; | ||||||
|         const u64 entry_mask = entries[position / 32]; |         const u64 entry_mask = entries[position / 32]; | ||||||
|         const size_t sub_index = position % 32; |         const size_t sub_index = position % 32; | ||||||
|         entries[position / 32] = |         entries[position / 32] = | ||||||
|             (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); |             (~(3ULL << sub_index * 2) & entry_mask) | (static_cast<u64>(entry) << sub_index * 2); | ||||||
|     } |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| template <MemoryManager::EntryType entry_type> | template <MemoryManager::EntryType entry_type> | ||||||
| GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | ||||||
| @@ -59,48 +89,66 @@ GPUVAddr MemoryManager::PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cp | |||||||
|     } |     } | ||||||
|     for (u64 offset{}; offset < size; offset += page_size) { |     for (u64 offset{}; offset < size; offset += page_size) { | ||||||
|         const GPUVAddr current_gpu_addr = gpu_addr + offset; |         const GPUVAddr current_gpu_addr = gpu_addr + offset; | ||||||
|         [[maybe_unused]] const auto current_entry_type = GetEntry(current_gpu_addr); |         [[maybe_unused]] const auto current_entry_type = GetEntry<false>(current_gpu_addr); | ||||||
|         SetEntry(current_gpu_addr, entry_type); |         SetEntry<false>(current_gpu_addr, entry_type); | ||||||
|         if (current_entry_type != entry_type) { |         if (current_entry_type != entry_type) { | ||||||
|             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); |             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, page_size); | ||||||
|         } |         } | ||||||
|         if constexpr (entry_type == EntryType::Mapped) { |         if constexpr (entry_type == EntryType::Mapped) { | ||||||
|             const VAddr current_cpu_addr = cpu_addr + offset; |             const VAddr current_cpu_addr = cpu_addr + offset; | ||||||
|             const auto index = PageEntryIndex(current_gpu_addr); |             const auto index = PageEntryIndex<false>(current_gpu_addr); | ||||||
|             const u32 sub_value = static_cast<u32>(current_cpu_addr >> 12ULL); |             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | ||||||
|             if (current_entry_type == entry_type && sub_value != page_table[index]) { |             page_table[index] = sub_value; | ||||||
|                 rasterizer->InvalidateRegion(static_cast<VAddr>(page_table[index]) << 12ULL, |  | ||||||
|                                              page_size); |  | ||||||
|             } |  | ||||||
|             page_table[index] = static_cast<u32>(current_cpu_addr >> 12ULL); |  | ||||||
|         } |         } | ||||||
|         remaining_size -= page_size; |         remaining_size -= page_size; | ||||||
|     } |     } | ||||||
|     return gpu_addr; |     return gpu_addr; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <MemoryManager::EntryType entry_type> | ||||||
|  | GPUVAddr MemoryManager::BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, | ||||||
|  |                                        size_t size) { | ||||||
|  |     u64 remaining_size{size}; | ||||||
|  |     for (u64 offset{}; offset < size; offset += big_page_size) { | ||||||
|  |         const GPUVAddr current_gpu_addr = gpu_addr + offset; | ||||||
|  |         [[maybe_unused]] const auto current_entry_type = GetEntry<true>(current_gpu_addr); | ||||||
|  |         SetEntry<true>(current_gpu_addr, entry_type); | ||||||
|  |         if (current_entry_type != entry_type) { | ||||||
|  |             rasterizer->ModifyGPUMemory(unique_identifier, gpu_addr, big_page_size); | ||||||
|  |         } | ||||||
|  |         if constexpr (entry_type == EntryType::Mapped) { | ||||||
|  |             const VAddr current_cpu_addr = cpu_addr + offset; | ||||||
|  |             const auto index = PageEntryIndex<true>(current_gpu_addr); | ||||||
|  |             const u32 sub_value = static_cast<u32>(current_cpu_addr >> cpu_page_bits); | ||||||
|  |             big_page_table_cpu[index] = sub_value; | ||||||
|  |             const PAddr phys_address = | ||||||
|  |                 device_memory.GetPhysicalAddr(memory.GetPointer(current_cpu_addr)); | ||||||
|  |             big_page_table_physical[index] = static_cast<u32>(phys_address); | ||||||
|  |         } | ||||||
|  |         remaining_size -= big_page_size; | ||||||
|  |     } | ||||||
|  |     return gpu_addr; | ||||||
|  | } | ||||||
|  |  | ||||||
| void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||||
|     rasterizer = rasterizer_; |     rasterizer = rasterizer_; | ||||||
| } | } | ||||||
|  |  | ||||||
| GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { | GPUVAddr MemoryManager::Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, | ||||||
|  |                             bool is_big_pages) { | ||||||
|  |     if (is_big_pages) [[likely]] { | ||||||
|  |         return BigPageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); | ||||||
|  |     } | ||||||
|     return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); |     return PageTableOp<EntryType::Mapped>(gpu_addr, cpu_addr, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size) { | GPUVAddr MemoryManager::MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages) { | ||||||
|  |     if (is_big_pages) [[likely]] { | ||||||
|  |         return BigPageTableOp<EntryType::Reserved>(gpu_addr, 0, size); | ||||||
|  |     } | ||||||
|     return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); |     return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| GPUVAddr MemoryManager::MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align) { |  | ||||||
|     return Map(*FindFreeRange(size, align), cpu_addr, size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| GPUVAddr MemoryManager::MapAllocate32(VAddr cpu_addr, std::size_t size) { |  | ||||||
|     const std::optional<GPUVAddr> gpu_addr = FindFreeRange(size, 1, true); |  | ||||||
|     ASSERT(gpu_addr); |  | ||||||
|     return Map(*gpu_addr, cpu_addr, size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | ||||||
|     if (size == 0) { |     if (size == 0) { | ||||||
|         return; |         return; | ||||||
| @@ -115,63 +163,26 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { | |||||||
|         rasterizer->UnmapMemory(*cpu_addr, map_size); |         rasterizer->UnmapMemory(*cpu_addr, map_size); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     BigPageTableOp<EntryType::Free>(gpu_addr, 0, size); | ||||||
|     PageTableOp<EntryType::Free>(gpu_addr, 0, size); |     PageTableOp<EntryType::Free>(gpu_addr, 0, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { |  | ||||||
|     for (u64 offset{}; offset < size; offset += page_size) { |  | ||||||
|         if (GetEntry(gpu_addr + offset) != EntryType::Free) { |  | ||||||
|             return std::nullopt; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return PageTableOp<EntryType::Reserved>(gpu_addr, 0, size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| GPUVAddr MemoryManager::Allocate(std::size_t size, std::size_t align) { |  | ||||||
|     return *AllocateFixed(*FindFreeRange(size, align), size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align, |  | ||||||
|                                                      bool start_32bit_address) const { |  | ||||||
|     if (!align) { |  | ||||||
|         align = page_size; |  | ||||||
|     } else { |  | ||||||
|         align = Common::AlignUp(align, page_size); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     u64 available_size{}; |  | ||||||
|     GPUVAddr gpu_addr{start_32bit_address ? 0 : allocate_start}; |  | ||||||
|     while (gpu_addr + available_size < address_space_size) { |  | ||||||
|         if (GetEntry(gpu_addr + available_size) == EntryType::Free) { |  | ||||||
|             available_size += page_size; |  | ||||||
|  |  | ||||||
|             if (available_size >= size) { |  | ||||||
|                 return gpu_addr; |  | ||||||
|             } |  | ||||||
|         } else { |  | ||||||
|             gpu_addr += available_size + page_size; |  | ||||||
|             available_size = 0; |  | ||||||
|  |  | ||||||
|             const auto remainder{gpu_addr % align}; |  | ||||||
|             if (remainder) { |  | ||||||
|                 gpu_addr = (gpu_addr - remainder) + align; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     return std::nullopt; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { | ||||||
|     if (GetEntry(gpu_addr) != EntryType::Mapped) { |     if (GetEntry<true>(gpu_addr) != EntryType::Mapped) [[unlikely]] { | ||||||
|  |         if (GetEntry<false>(gpu_addr) != EntryType::Mapped) { | ||||||
|             return std::nullopt; |             return std::nullopt; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|     const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex(gpu_addr)]) << 12ULL; |         const VAddr cpu_addr_base = static_cast<VAddr>(page_table[PageEntryIndex<false>(gpu_addr)]) | ||||||
|  |                                     << cpu_page_bits; | ||||||
|         return cpu_addr_base + (gpu_addr & page_mask); |         return cpu_addr_base + (gpu_addr & page_mask); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     const VAddr cpu_addr_base = | ||||||
|  |         static_cast<VAddr>(big_page_table_cpu[PageEntryIndex<true>(gpu_addr)]) << cpu_page_bits; | ||||||
|  |     return cpu_addr_base + (gpu_addr & big_page_mask); | ||||||
|  | } | ||||||
|  |  | ||||||
| std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { | ||||||
|     size_t page_index{addr >> page_bits}; |     size_t page_index{addr >> page_bits}; | ||||||
|     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; |     const size_t page_last{(addr + size + page_size - 1) >> page_bits}; | ||||||
| @@ -225,7 +236,7 @@ u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) { | |||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return system.Memory().GetPointer(*address); |     return memory.GetPointer(*address); | ||||||
| } | } | ||||||
|  |  | ||||||
| const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | ||||||
| @@ -234,98 +245,161 @@ const u8* MemoryManager::GetPointer(GPUVAddr gpu_addr) const { | |||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     return system.Memory().GetPointer(*address); |     return memory.GetPointer(*address); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, | #pragma inline_recursion(on) | ||||||
|                                   bool is_safe) const { |  | ||||||
|  | template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||||||
|  | inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, | ||||||
|  |                                            FuncMapped&& func_mapped, FuncReserved&& func_reserved, | ||||||
|  |                                            FuncUnmapped&& func_unmapped) const { | ||||||
|  |     u64 used_page_size; | ||||||
|  |     u64 used_page_mask; | ||||||
|  |     u64 used_page_bits; | ||||||
|  |     if constexpr (is_big_pages) { | ||||||
|  |         used_page_size = big_page_size; | ||||||
|  |         used_page_mask = big_page_mask; | ||||||
|  |         used_page_bits = big_page_bits; | ||||||
|  |     } else { | ||||||
|  |         used_page_size = page_size; | ||||||
|  |         used_page_mask = page_mask; | ||||||
|  |         used_page_bits = page_bits; | ||||||
|  |     } | ||||||
|     std::size_t remaining_size{size}; |     std::size_t remaining_size{size}; | ||||||
|     std::size_t page_index{gpu_src_addr >> page_bits}; |     std::size_t page_index{gpu_src_addr >> used_page_bits}; | ||||||
|     std::size_t page_offset{gpu_src_addr & page_mask}; |     std::size_t page_offset{gpu_src_addr & used_page_mask}; | ||||||
|  |     GPUVAddr current_address = gpu_src_addr; | ||||||
|  |  | ||||||
|     while (remaining_size > 0) { |     while (remaining_size > 0) { | ||||||
|         const std::size_t copy_amount{ |         const std::size_t copy_amount{ | ||||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |             std::min(static_cast<std::size_t>(used_page_size) - page_offset, remaining_size)}; | ||||||
|         const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; |         auto entry = GetEntry<is_big_pages>(current_address); | ||||||
|         if (page_addr) { |         if (entry == EntryType::Mapped) [[likely]] { | ||||||
|             const auto src_addr{*page_addr + page_offset}; |             func_mapped(page_index, page_offset, copy_amount); | ||||||
|             if (is_safe) { |         } else if (entry == EntryType::Reserved) { | ||||||
|                 // Flush must happen on the rasterizer interface, such that memory is always |             func_reserved(page_index, page_offset, copy_amount); | ||||||
|                 // synchronous when it is read (even when in asynchronous GPU mode). |         } else [[unlikely]] { | ||||||
|                 // Fixes Dead Cells title menu. |             func_unmapped(page_index, page_offset, copy_amount); | ||||||
|                 rasterizer->FlushRegion(src_addr, copy_amount); |  | ||||||
|         } |         } | ||||||
|             system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); |  | ||||||
|         } else { |  | ||||||
|             std::memset(dest_buffer, 0, copy_amount); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         page_index++; |         page_index++; | ||||||
|         page_offset = 0; |         page_offset = 0; | ||||||
|         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; |  | ||||||
|         remaining_size -= copy_amount; |         remaining_size -= copy_amount; | ||||||
|  |         current_address += copy_amount; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <bool is_safe> | ||||||
|  | void MemoryManager::ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, | ||||||
|  |                                   std::size_t size) const { | ||||||
|  |     auto set_to_zero = [&]([[maybe_unused]] std::size_t page_index, | ||||||
|  |                            [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { | ||||||
|  |         std::memset(dest_buffer, 0, copy_amount); | ||||||
|  |         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||||
|  |     }; | ||||||
|  |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|  |         const VAddr cpu_addr_base = | ||||||
|  |             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|  |         if constexpr (is_safe) { | ||||||
|  |             rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||||||
|  |         } | ||||||
|  |         memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | ||||||
|  |         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||||
|  |     }; | ||||||
|  |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|  |         const VAddr cpu_addr_base = | ||||||
|  |             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||||
|  |         if constexpr (is_safe) { | ||||||
|  |             rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||||||
|  |         } | ||||||
|  |         memory.ReadBlockUnsafe(cpu_addr_base, dest_buffer, copy_amount); | ||||||
|  |         // u8* physical = device_memory.GetPointer(big_page_table_physical[page_index] + offset); | ||||||
|  |         // std::memcpy(dest_buffer, physical, copy_amount); | ||||||
|  |         dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount; | ||||||
|  |     }; | ||||||
|  |     auto read_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  |                                 std::size_t copy_amount) { | ||||||
|  |         GPUVAddr base = (page_index << big_page_bits) + offset; | ||||||
|  |         MemoryOperation<false>(base, copy_amount, mapped_normal, set_to_zero, set_to_zero); | ||||||
|  |     }; | ||||||
|  |     MemoryOperation<true>(gpu_src_addr, size, mapped_big, set_to_zero, read_short_pages); | ||||||
|  | } | ||||||
|  |  | ||||||
| void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { | void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const { | ||||||
|     ReadBlockImpl(gpu_src_addr, dest_buffer, size, true); |     ReadBlockImpl<true>(gpu_src_addr, dest_buffer, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, | void MemoryManager::ReadBlockUnsafe(GPUVAddr gpu_src_addr, void* dest_buffer, | ||||||
|                                     const std::size_t size) const { |                                     const std::size_t size) const { | ||||||
|     ReadBlockImpl(gpu_src_addr, dest_buffer, size, false); |     ReadBlockImpl<false>(gpu_src_addr, dest_buffer, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, | template <bool is_safe> | ||||||
|                                    bool is_safe) { | void MemoryManager::WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, | ||||||
|     std::size_t remaining_size{size}; |                                    std::size_t size) { | ||||||
|     std::size_t page_index{gpu_dest_addr >> page_bits}; |     auto just_advance = [&]([[maybe_unused]] std::size_t page_index, | ||||||
|     std::size_t page_offset{gpu_dest_addr & page_mask}; |                             [[maybe_unused]] std::size_t offset, std::size_t copy_amount) { | ||||||
|  |  | ||||||
|     while (remaining_size > 0) { |  | ||||||
|         const std::size_t copy_amount{ |  | ||||||
|             std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; |  | ||||||
|         const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; |  | ||||||
|         if (page_addr) { |  | ||||||
|             const auto dest_addr{*page_addr + page_offset}; |  | ||||||
|  |  | ||||||
|             if (is_safe) { |  | ||||||
|                 // Invalidate must happen on the rasterizer interface, such that memory is always |  | ||||||
|                 // synchronous when it is written (even when in asynchronous GPU mode). |  | ||||||
|                 rasterizer->InvalidateRegion(dest_addr, copy_amount); |  | ||||||
|             } |  | ||||||
|             system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         page_index++; |  | ||||||
|         page_offset = 0; |  | ||||||
|         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; |         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||||
|         remaining_size -= copy_amount; |     }; | ||||||
|  |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|  |         const VAddr cpu_addr_base = | ||||||
|  |             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|  |         if constexpr (is_safe) { | ||||||
|  |             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); | ||||||
|         } |         } | ||||||
|  |         memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | ||||||
|  |         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||||
|  |     }; | ||||||
|  |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|  |         const VAddr cpu_addr_base = | ||||||
|  |             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||||
|  |         if constexpr (is_safe) { | ||||||
|  |             rasterizer->InvalidateRegion(cpu_addr_base, copy_amount); | ||||||
|  |         } | ||||||
|  |         memory.WriteBlockUnsafe(cpu_addr_base, src_buffer, copy_amount); | ||||||
|  |         /*u8* physical = | ||||||
|  |             device_memory.GetPointer(big_page_table_physical[page_index] << cpu_page_bits) + offset; | ||||||
|  |         std::memcpy(physical, src_buffer, copy_amount);*/ | ||||||
|  |         src_buffer = static_cast<const u8*>(src_buffer) + copy_amount; | ||||||
|  |     }; | ||||||
|  |     auto write_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  |                                  std::size_t copy_amount) { | ||||||
|  |         GPUVAddr base = (page_index << big_page_bits) + offset; | ||||||
|  |         MemoryOperation<false>(base, copy_amount, mapped_normal, just_advance, just_advance); | ||||||
|  |     }; | ||||||
|  |     MemoryOperation<true>(gpu_dest_addr, size, mapped_big, just_advance, write_short_pages); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { | void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size) { | ||||||
|     WriteBlockImpl(gpu_dest_addr, src_buffer, size, true); |     WriteBlockImpl<true>(gpu_dest_addr, src_buffer, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, | void MemoryManager::WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, | ||||||
|                                      std::size_t size) { |                                      std::size_t size) { | ||||||
|     WriteBlockImpl(gpu_dest_addr, src_buffer, size, false); |     WriteBlockImpl<false>(gpu_dest_addr, src_buffer, size); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { | void MemoryManager::FlushRegion(GPUVAddr gpu_addr, size_t size) const { | ||||||
|     size_t remaining_size{size}; |     auto do_nothing = [&]([[maybe_unused]] std::size_t page_index, | ||||||
|     size_t page_index{gpu_addr >> page_bits}; |                           [[maybe_unused]] std::size_t offset, | ||||||
|     size_t page_offset{gpu_addr & page_mask}; |                           [[maybe_unused]] std::size_t copy_amount) {}; | ||||||
|     while (remaining_size > 0) { |  | ||||||
|         const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; |     auto mapped_normal = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         if (const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; page_addr) { |         const VAddr cpu_addr_base = | ||||||
|             rasterizer->FlushRegion(*page_addr + page_offset, num_bytes); |             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|         } |         rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||||||
|         ++page_index; |     }; | ||||||
|         page_offset = 0; |     auto mapped_big = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|         remaining_size -= num_bytes; |         const VAddr cpu_addr_base = | ||||||
|     } |             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||||
|  |         rasterizer->FlushRegion(cpu_addr_base, copy_amount); | ||||||
|  |     }; | ||||||
|  |     auto flush_short_pages = [&](std::size_t page_index, std::size_t offset, | ||||||
|  |                                  std::size_t copy_amount) { | ||||||
|  |         GPUVAddr base = (page_index << big_page_bits) + offset; | ||||||
|  |         MemoryOperation<false>(base, copy_amount, mapped_normal, do_nothing, do_nothing); | ||||||
|  |     }; | ||||||
|  |     MemoryOperation<true>(gpu_addr, size, mapped_big, do_nothing, flush_short_pages); | ||||||
| } | } | ||||||
|  |  | ||||||
| void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { | void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size) { | ||||||
| @@ -348,7 +422,7 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { | |||||||
| } | } | ||||||
|  |  | ||||||
| bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { | bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { | ||||||
|     size_t page_index{gpu_addr >> page_bits}; |     size_t page_index{gpu_addr >> big_page_bits}; | ||||||
|     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; |     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | ||||||
|     std::optional<VAddr> old_page_addr{}; |     std::optional<VAddr> old_page_addr{}; | ||||||
|     while (page_index != page_last) { |     while (page_index != page_last) { | ||||||
| @@ -371,7 +445,7 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||||||
|     size_t page_index{gpu_addr >> page_bits}; |     size_t page_index{gpu_addr >> page_bits}; | ||||||
|     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; |     const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; | ||||||
|     while (page_index < page_last) { |     while (page_index < page_last) { | ||||||
|         if (GetEntry(page_index << page_bits) == EntryType::Free) { |         if (GetEntry<false>(page_index << page_bits) == EntryType::Free) { | ||||||
|             return false; |             return false; | ||||||
|         } |         } | ||||||
|         ++page_index; |         ++page_index; | ||||||
| @@ -379,47 +453,63 @@ bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) cons | |||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #pragma inline_recursion(on) | ||||||
|  |  | ||||||
| std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange( | ||||||
|     GPUVAddr gpu_addr, std::size_t size) const { |     GPUVAddr gpu_addr, std::size_t size) const { | ||||||
|     std::vector<std::pair<GPUVAddr, std::size_t>> result{}; |     std::vector<std::pair<GPUVAddr, std::size_t>> result{}; | ||||||
|     size_t page_index{gpu_addr >> page_bits}; |  | ||||||
|     size_t remaining_size{size}; |  | ||||||
|     size_t page_offset{gpu_addr & page_mask}; |  | ||||||
|     std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; |     std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{}; | ||||||
|     std::optional<VAddr> old_page_addr{}; |     std::optional<VAddr> old_page_addr{}; | ||||||
|     const auto extend_size = [this, &last_segment, &page_index, &page_offset](std::size_t bytes) { |     const auto split = [&last_segment, &result]([[maybe_unused]] std::size_t page_index, | ||||||
|         if (!last_segment) { |                                                 [[maybe_unused]] std::size_t offset, | ||||||
|             const GPUVAddr new_base_addr = (page_index << page_bits) + page_offset; |                                                 [[maybe_unused]] std::size_t copy_amount) { | ||||||
|             last_segment = {new_base_addr, bytes}; |  | ||||||
|         } else { |  | ||||||
|             last_segment->second += bytes; |  | ||||||
|         } |  | ||||||
|     }; |  | ||||||
|     const auto split = [&last_segment, &result] { |  | ||||||
|         if (last_segment) { |         if (last_segment) { | ||||||
|             result.push_back(*last_segment); |             result.push_back(*last_segment); | ||||||
|             last_segment = std::nullopt; |             last_segment = std::nullopt; | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|     while (remaining_size > 0) { |     const auto extend_size_big = [this, &split, &old_page_addr, | ||||||
|         const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; |                                   &last_segment](std::size_t page_index, std::size_t offset, | ||||||
|         const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; |                                                  std::size_t copy_amount) { | ||||||
|         if (!page_addr || *page_addr == 0) { |         const VAddr cpu_addr_base = | ||||||
|             split(); |             (static_cast<VAddr>(big_page_table_cpu[page_index]) << cpu_page_bits) + offset; | ||||||
|         } else if (old_page_addr) { |         if (old_page_addr) { | ||||||
|             if (*old_page_addr + page_size != *page_addr) { |             if (*old_page_addr != cpu_addr_base) { | ||||||
|                 split(); |                 split(0, 0, 0); | ||||||
|             } |             } | ||||||
|             extend_size(num_bytes); |         } | ||||||
|  |         old_page_addr = {cpu_addr_base + copy_amount}; | ||||||
|  |         if (!last_segment) { | ||||||
|  |             const GPUVAddr new_base_addr = (page_index << big_page_bits) + offset; | ||||||
|  |             last_segment = {new_base_addr, copy_amount}; | ||||||
|         } else { |         } else { | ||||||
|             extend_size(num_bytes); |             last_segment->second += copy_amount; | ||||||
|         } |         } | ||||||
|         ++page_index; |     }; | ||||||
|         page_offset = 0; |     const auto extend_size_short = [this, &split, &old_page_addr, | ||||||
|         remaining_size -= num_bytes; |                                     &last_segment](std::size_t page_index, std::size_t offset, | ||||||
|         old_page_addr = page_addr; |                                                    std::size_t copy_amount) { | ||||||
|  |         const VAddr cpu_addr_base = | ||||||
|  |             (static_cast<VAddr>(page_table[page_index]) << cpu_page_bits) + offset; | ||||||
|  |         if (old_page_addr) { | ||||||
|  |             if (*old_page_addr != cpu_addr_base) { | ||||||
|  |                 split(0, 0, 0); | ||||||
|             } |             } | ||||||
|     split(); |         } | ||||||
|  |         old_page_addr = {cpu_addr_base + copy_amount}; | ||||||
|  |         if (!last_segment) { | ||||||
|  |             const GPUVAddr new_base_addr = (page_index << page_bits) + offset; | ||||||
|  |             last_segment = {new_base_addr, copy_amount}; | ||||||
|  |         } else { | ||||||
|  |             last_segment->second += copy_amount; | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     auto do_short_pages = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) { | ||||||
|  |         GPUVAddr base = (page_index << big_page_bits) + offset; | ||||||
|  |         MemoryOperation<false>(base, copy_amount, extend_size_short, split, split); | ||||||
|  |     }; | ||||||
|  |     MemoryOperation<true>(gpu_addr, size, extend_size_big, split, do_short_pages); | ||||||
|  |     split(0, 0, 0); | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -10,21 +10,26 @@ | |||||||
|  |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/multi_level_page_table.h" | #include "common/multi_level_page_table.h" | ||||||
|  | #include "common/virtual_buffer.h" | ||||||
|  |  | ||||||
| namespace VideoCore { | namespace VideoCore { | ||||||
| class RasterizerInterface; | class RasterizerInterface; | ||||||
| } | } | ||||||
|  |  | ||||||
| namespace Core { | namespace Core { | ||||||
|  | class DeviceMemory; | ||||||
|  | namespace Memory { | ||||||
|  | class Memory; | ||||||
|  | } // namespace Memory | ||||||
| class System; | class System; | ||||||
| } | } // namespace Core | ||||||
|  |  | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
|  |  | ||||||
| class MemoryManager final { | class MemoryManager final { | ||||||
| public: | public: | ||||||
|     explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, |     explicit MemoryManager(Core::System& system_, u64 address_space_bits_ = 40, | ||||||
|                            u64 page_bits_ = 16); |                            u64 big_page_bits_ = 16, u64 page_bits_ = 12); | ||||||
|     ~MemoryManager(); |     ~MemoryManager(); | ||||||
|  |  | ||||||
|     size_t GetID() const { |     size_t GetID() const { | ||||||
| @@ -93,12 +98,8 @@ public: | |||||||
|     std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, |     std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr, | ||||||
|                                                                     std::size_t size) const; |                                                                     std::size_t size) const; | ||||||
|  |  | ||||||
|     GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size); |     GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size, bool is_big_pages = true); | ||||||
|     GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size); |     GPUVAddr MapSparse(GPUVAddr gpu_addr, std::size_t size, bool is_big_pages = true); | ||||||
|     [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); |  | ||||||
|     [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); |  | ||||||
|     [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); |  | ||||||
|     [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); |  | ||||||
|     void Unmap(GPUVAddr gpu_addr, std::size_t size); |     void Unmap(GPUVAddr gpu_addr, std::size_t size); | ||||||
|  |  | ||||||
|     void FlushRegion(GPUVAddr gpu_addr, size_t size) const; |     void FlushRegion(GPUVAddr gpu_addr, size_t size) const; | ||||||
| @@ -107,26 +108,42 @@ private: | |||||||
|     [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, |     [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align, | ||||||
|                                                         bool start_32bit_address = false) const; |                                                         bool start_32bit_address = false) const; | ||||||
|  |  | ||||||
|     void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size, |     template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> | ||||||
|                        bool is_safe) const; |     inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, | ||||||
|     void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size, |                                 FuncReserved&& func_reserved, FuncUnmapped&& func_unmapped) const; | ||||||
|                         bool is_safe); |  | ||||||
|  |  | ||||||
|  |     template <bool is_safe> | ||||||
|  |     void ReadBlockImpl(GPUVAddr gpu_src_addr, void* dest_buffer, std::size_t size) const; | ||||||
|  |  | ||||||
|  |     template <bool is_safe> | ||||||
|  |     void WriteBlockImpl(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); | ||||||
|  |  | ||||||
|  |     template <bool is_big_page> | ||||||
|     [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { |     [[nodiscard]] inline std::size_t PageEntryIndex(GPUVAddr gpu_addr) const { | ||||||
|  |         if constexpr (is_big_page) { | ||||||
|  |             return (gpu_addr >> big_page_bits) & big_page_table_mask; | ||||||
|  |         } else { | ||||||
|             return (gpu_addr >> page_bits) & page_table_mask; |             return (gpu_addr >> page_bits) & page_table_mask; | ||||||
|         } |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
|  |     Core::Memory::Memory& memory; | ||||||
|  |     Core::DeviceMemory& device_memory; | ||||||
|  |  | ||||||
|     const u64 address_space_bits; |     const u64 address_space_bits; | ||||||
|     const u64 page_bits; |     const u64 page_bits; | ||||||
|     u64 address_space_size; |     u64 address_space_size; | ||||||
|     u64 allocate_start; |  | ||||||
|     u64 page_size; |     u64 page_size; | ||||||
|     u64 page_mask; |     u64 page_mask; | ||||||
|     u64 page_table_mask; |     u64 page_table_mask; | ||||||
|     static constexpr u64 cpu_page_bits{12}; |     static constexpr u64 cpu_page_bits{12}; | ||||||
|  |  | ||||||
|  |     const u64 big_page_bits; | ||||||
|  |     u64 big_page_size; | ||||||
|  |     u64 big_page_mask; | ||||||
|  |     u64 big_page_table_mask; | ||||||
|  |  | ||||||
|     VideoCore::RasterizerInterface* rasterizer = nullptr; |     VideoCore::RasterizerInterface* rasterizer = nullptr; | ||||||
|  |  | ||||||
|     enum class EntryType : u64 { |     enum class EntryType : u64 { | ||||||
| @@ -136,15 +153,23 @@ private: | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     std::vector<u64> entries; |     std::vector<u64> entries; | ||||||
|  |     std::vector<u64> big_entries; | ||||||
|  |  | ||||||
|     template <EntryType entry_type> |     template <EntryType entry_type> | ||||||
|     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); |     GPUVAddr PageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); | ||||||
|  |  | ||||||
|     EntryType GetEntry(size_t position) const; |     template <EntryType entry_type> | ||||||
|  |     GPUVAddr BigPageTableOp(GPUVAddr gpu_addr, [[maybe_unused]] VAddr cpu_addr, size_t size); | ||||||
|  |  | ||||||
|     void SetEntry(size_t position, EntryType entry); |     template <bool is_big_page> | ||||||
|  |     inline EntryType GetEntry(size_t position) const; | ||||||
|  |  | ||||||
|  |     template <bool is_big_page> | ||||||
|  |     inline void SetEntry(size_t position, EntryType entry); | ||||||
|  |  | ||||||
|     Common::MultiLevelPageTable<u32> page_table; |     Common::MultiLevelPageTable<u32> page_table; | ||||||
|  |     Common::VirtualBuffer<u32> big_page_table_cpu; | ||||||
|  |     Common::VirtualBuffer<u32> big_page_table_physical; | ||||||
|  |  | ||||||
|     const size_t unique_identifier; |     const size_t unique_identifier; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user