Implement Async downloads in normal and fix a few issues.
This commit is contained in:
		| @@ -22,6 +22,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||||||
|     void(slot_buffers.insert(runtime, NullBufferParams{})); |     void(slot_buffers.insert(runtime, NullBufferParams{})); | ||||||
|     common_ranges.clear(); |     common_ranges.clear(); | ||||||
|  |  | ||||||
|  |     active_async_buffers = IMPLEMENTS_ASYNC_DOWNLOADS && !Settings::IsGPULevelHigh(); | ||||||
|  |  | ||||||
|     if (!runtime.CanReportMemoryUsage()) { |     if (!runtime.CanReportMemoryUsage()) { | ||||||
|         minimum_memory = DEFAULT_EXPECTED_MEMORY; |         minimum_memory = DEFAULT_EXPECTED_MEMORY; | ||||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY; |         critical_memory = DEFAULT_CRITICAL_MEMORY; | ||||||
| @@ -72,6 +74,8 @@ void BufferCache<P>::TickFrame() { | |||||||
|     uniform_cache_hits[0] = 0; |     uniform_cache_hits[0] = 0; | ||||||
|     uniform_cache_shots[0] = 0; |     uniform_cache_shots[0] = 0; | ||||||
|  |  | ||||||
|  |     active_async_buffers = IMPLEMENTS_ASYNC_DOWNLOADS && !Settings::IsGPULevelHigh(); | ||||||
|  |  | ||||||
|     const bool skip_preferred = hits * 256 < shots * 251; |     const bool skip_preferred = hits * 256 < shots * 251; | ||||||
|     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||||||
|  |  | ||||||
| @@ -130,7 +134,7 @@ void BufferCache<P>::WaitOnAsyncFlushes(VAddr cpu_addr, u64 size) { | |||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | void BufferCache<P>::ClearDownload(IntervalType subtract_interval) { | ||||||
|     async_downloads -= std::make_pair(subtract_interval, std::numeric_limits<int>::max()); |     RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1024); | ||||||
|     uncommitted_ranges.subtract(subtract_interval); |     uncommitted_ranges.subtract(subtract_interval); | ||||||
|     pending_ranges.subtract(subtract_interval); |     pending_ranges.subtract(subtract_interval); | ||||||
|     for (auto& interval_set : committed_ranges) { |     for (auto& interval_set : committed_ranges) { | ||||||
| @@ -173,18 +177,14 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am | |||||||
|     }}; |     }}; | ||||||
|  |  | ||||||
|     boost::container::small_vector<IntervalType, 4> tmp_intervals; |     boost::container::small_vector<IntervalType, 4> tmp_intervals; | ||||||
|     const bool is_high_accuracy = |  | ||||||
|         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High; |  | ||||||
|     auto mirror = [&](VAddr base_address, VAddr base_address_end) { |     auto mirror = [&](VAddr base_address, VAddr base_address_end) { | ||||||
|         const u64 size = base_address_end - base_address; |         const u64 size = base_address_end - base_address; | ||||||
|         const VAddr diff = base_address - *cpu_src_address; |         const VAddr diff = base_address - *cpu_src_address; | ||||||
|         const VAddr new_base_address = *cpu_dest_address + diff; |         const VAddr new_base_address = *cpu_dest_address + diff; | ||||||
|         const IntervalType add_interval{new_base_address, new_base_address + size}; |         const IntervalType add_interval{new_base_address, new_base_address + size}; | ||||||
|         tmp_intervals.push_back(add_interval); |         tmp_intervals.push_back(add_interval); | ||||||
|         if (is_high_accuracy) { |  | ||||||
|         uncommitted_ranges.add(add_interval); |         uncommitted_ranges.add(add_interval); | ||||||
|         pending_ranges.add(add_interval); |         pending_ranges.add(add_interval); | ||||||
|         } |  | ||||||
|     }; |     }; | ||||||
|     ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); |     ForEachInRangeSet(common_ranges, *cpu_src_address, amount, mirror); | ||||||
|     // This subtraction in this order is important for overlapping copies. |     // This subtraction in this order is important for overlapping copies. | ||||||
| @@ -468,7 +468,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|     AccumulateFlushes(); |     AccumulateFlushes(); | ||||||
|  |  | ||||||
|     if (committed_ranges.empty()) { |     if (committed_ranges.empty()) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if (active_async_buffers) { | ||||||
|             async_buffers.emplace_back(std::optional<Async_Buffer>{}); |             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||||
|         } |         } | ||||||
|         return; |         return; | ||||||
| @@ -529,11 +529,12 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|     } |     } | ||||||
|     committed_ranges.clear(); |     committed_ranges.clear(); | ||||||
|     if (downloads.empty()) { |     if (downloads.empty()) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if (active_async_buffers) { | ||||||
|             async_buffers.emplace_back(std::optional<Async_Buffer>{}); |             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||||
|         } |         } | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|  |     if (active_async_buffers) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||||||
|             boost::container::small_vector<BufferCopy, 4> normalized_copies; |             boost::container::small_vector<BufferCopy, 4> normalized_copies; | ||||||
| @@ -554,6 +555,7 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|             runtime.PostCopyBarrier(); |             runtime.PostCopyBarrier(); | ||||||
|             pending_downloads.emplace_back(std::move(normalized_copies)); |             pending_downloads.emplace_back(std::move(normalized_copies)); | ||||||
|             async_buffers.emplace_back(download_staging); |             async_buffers.emplace_back(download_staging); | ||||||
|  |         } | ||||||
|     } else { |     } else { | ||||||
|         if constexpr (USE_MEMORY_MAPS) { |         if constexpr (USE_MEMORY_MAPS) { | ||||||
|             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||||||
| @@ -624,7 +626,8 @@ void BufferCache<P>::PopAsyncBuffers() { | |||||||
|                         common_ranges.subtract(base_interval); |                         common_ranges.subtract(base_interval); | ||||||
|                     } |                     } | ||||||
|                 }); |                 }); | ||||||
|             async_downloads -= std::make_pair(IntervalType(cpu_addr, cpu_addr + copy.size), 1); |             const IntervalType subtract_interval{cpu_addr, cpu_addr + copy.size}; | ||||||
|  |             RemoveEachInOverlapCounter(async_downloads, subtract_interval, -1); | ||||||
|         } |         } | ||||||
|         runtime.FreeDeferredStagingBuffer(*async_buffer); |         runtime.FreeDeferredStagingBuffer(*async_buffer); | ||||||
|         async_buffers.pop_front(); |         async_buffers.pop_front(); | ||||||
| @@ -1198,11 +1201,9 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s | |||||||
|  |  | ||||||
|     const IntervalType base_interval{cpu_addr, cpu_addr + size}; |     const IntervalType base_interval{cpu_addr, cpu_addr + size}; | ||||||
|     common_ranges.add(base_interval); |     common_ranges.add(base_interval); | ||||||
|     if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) { |  | ||||||
|     uncommitted_ranges.add(base_interval); |     uncommitted_ranges.add(base_interval); | ||||||
|     pending_ranges.add(base_interval); |     pending_ranges.add(base_interval); | ||||||
| } | } | ||||||
| } |  | ||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
| BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | BufferId BufferCache<P>::FindBuffer(VAddr cpu_addr, u32 size) { | ||||||
| @@ -1542,7 +1543,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si | |||||||
|                     .size = new_size, |                     .size = new_size, | ||||||
|                 }); |                 }); | ||||||
|                 // Align up to avoid cache conflicts |                 // Align up to avoid cache conflicts | ||||||
|                 constexpr u64 align = 8ULL; |                 constexpr u64 align = 64ULL; | ||||||
|                 constexpr u64 mask = ~(align - 1ULL); |                 constexpr u64 mask = ~(align - 1ULL); | ||||||
|                 total_size_bytes += (new_size + align - 1) & mask; |                 total_size_bytes += (new_size + align - 1) & mask; | ||||||
|                 largest_copy = std::max(largest_copy, new_size); |                 largest_copy = std::max(largest_copy, new_size); | ||||||
|   | |||||||
| @@ -345,13 +345,30 @@ private: | |||||||
|             if (inter_addr < start_address) { |             if (inter_addr < start_address) { | ||||||
|                 inter_addr = start_address; |                 inter_addr = start_address; | ||||||
|             } |             } | ||||||
|             if (it->second <= 0) { |  | ||||||
|                 __debugbreak(); |  | ||||||
|             } |  | ||||||
|             func(inter_addr, inter_addr_end, it->second); |             func(inter_addr, inter_addr_end, it->second); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     void RemoveEachInOverlapCounter(OverlapCounter& current_range, const IntervalType search_interval, int subtract_value) { | ||||||
|  |         bool any_removals = false; | ||||||
|  |         current_range.add(std::make_pair(search_interval, subtract_value)); | ||||||
|  |         do { | ||||||
|  |             any_removals = false; | ||||||
|  |             auto it = current_range.lower_bound(search_interval); | ||||||
|  |             if (it == current_range.end()) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |             auto end_it = current_range.upper_bound(search_interval); | ||||||
|  |             for (; it != end_it; it++) { | ||||||
|  |                 if (it->second <= 0) { | ||||||
|  |                     any_removals = true; | ||||||
|  |                     current_range.erase(it); | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } while (any_removals); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     static bool IsRangeGranular(VAddr cpu_addr, size_t size) { |     static bool IsRangeGranular(VAddr cpu_addr, size_t size) { | ||||||
|         return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == |         return (cpu_addr & ~Core::Memory::YUZU_PAGEMASK) == | ||||||
|                ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); |                ((cpu_addr + size) & ~Core::Memory::YUZU_PAGEMASK); | ||||||
| @@ -554,6 +571,8 @@ private: | |||||||
|     u64 minimum_memory = 0; |     u64 minimum_memory = 0; | ||||||
|     u64 critical_memory = 0; |     u64 critical_memory = 0; | ||||||
|  |  | ||||||
|  |     bool active_async_buffers = false; | ||||||
|  |  | ||||||
|     std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; |     std::array<BufferId, ((1ULL << 39) >> PAGE_BITS)> page_table; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -273,7 +273,7 @@ public: | |||||||
|                 untracked_words[word_index] &= ~bits; |                 untracked_words[word_index] &= ~bits; | ||||||
|                 NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); |                 NotifyRasterizer<true>(word_index, current_bits, ~u64{0}); | ||||||
|             } |             } | ||||||
|             const u64 word = current_word; |             const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0); | ||||||
|             u64 page = page_begin; |             u64 page = page_begin; | ||||||
|             page_begin = 0; |             page_begin = 0; | ||||||
|  |  | ||||||
| @@ -321,6 +321,7 @@ public: | |||||||
|     [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { |     [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { | ||||||
|         static_assert(type != Type::Untracked); |         static_assert(type != Type::Untracked); | ||||||
|  |  | ||||||
|  |         const u64* const untracked_words = Array<Type::Untracked>(); | ||||||
|         const u64* const state_words = Array<type>(); |         const u64* const state_words = Array<type>(); | ||||||
|         const u64 num_query_words = size / BYTES_PER_WORD + 1; |         const u64 num_query_words = size / BYTES_PER_WORD + 1; | ||||||
|         const u64 word_begin = offset / BYTES_PER_WORD; |         const u64 word_begin = offset / BYTES_PER_WORD; | ||||||
| @@ -328,7 +329,8 @@ public: | |||||||
|         const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); |         const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); | ||||||
|         u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; |         u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; | ||||||
|         for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { |         for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { | ||||||
|             const u64 word = state_words[word_index]; |             const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0; | ||||||
|  |             const u64 word = state_words[word_index] & ~off_word; | ||||||
|             if (word == 0) { |             if (word == 0) { | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user