Buffer cache: always use async buffer downloads and fix regression.
This commit is contained in:
		| @@ -535,12 +535,12 @@ TEST_CASE("MemoryTracker: Cached write downloads") { | |||||||
|     memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); |     memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | ||||||
|     int num = 0; |     int num = 0; | ||||||
|     memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); |     memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||||
|     REQUIRE(num == 1); |     REQUIRE(num == 0); | ||||||
|     num = 0; |     num = 0; | ||||||
|     memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); |     memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||||
|     REQUIRE(num == 0); |     REQUIRE(num == 0); | ||||||
|     REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); |     REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||||||
|     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |     REQUIRE(memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||||
|     memory_track->FlushCachedWrites(); |     memory_track->FlushCachedWrites(); | ||||||
|     REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); |     REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||||||
|     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||||
|   | |||||||
| @@ -23,8 +23,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||||||
|     common_ranges.clear(); |     common_ranges.clear(); | ||||||
|     inline_buffer_id = NULL_BUFFER_ID; |     inline_buffer_id = NULL_BUFFER_ID; | ||||||
|  |  | ||||||
|     active_async_buffers = !Settings::IsGPULevelHigh(); |  | ||||||
|  |  | ||||||
|     if (!runtime.CanReportMemoryUsage()) { |     if (!runtime.CanReportMemoryUsage()) { | ||||||
|         minimum_memory = DEFAULT_EXPECTED_MEMORY; |         minimum_memory = DEFAULT_EXPECTED_MEMORY; | ||||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY; |         critical_memory = DEFAULT_CRITICAL_MEMORY; | ||||||
| @@ -75,8 +73,6 @@ void BufferCache<P>::TickFrame() { | |||||||
|     uniform_cache_hits[0] = 0; |     uniform_cache_hits[0] = 0; | ||||||
|     uniform_cache_shots[0] = 0; |     uniform_cache_shots[0] = 0; | ||||||
|  |  | ||||||
|     active_async_buffers = !Settings::IsGPULevelHigh(); |  | ||||||
|  |  | ||||||
|     const bool skip_preferred = hits * 256 < shots * 251; |     const bool skip_preferred = hits * 256 < shots * 251; | ||||||
|     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||||||
|  |  | ||||||
| @@ -491,9 +487,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|  |  | ||||||
|     if (committed_ranges.empty()) { |     if (committed_ranges.empty()) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|             if (active_async_buffers) { |  | ||||||
|                 async_buffers.emplace_back(std::optional<Async_Buffer>{}); |             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| @@ -554,64 +549,65 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|     committed_ranges.clear(); |     committed_ranges.clear(); | ||||||
|     if (downloads.empty()) { |     if (downloads.empty()) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|             if (active_async_buffers) { |  | ||||||
|                 async_buffers.emplace_back(std::optional<Async_Buffer>{}); |             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||||
|             } |  | ||||||
|         } |         } | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     if (active_async_buffers) { |     if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||||||
|             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |         boost::container::small_vector<BufferCopy, 4> normalized_copies; | ||||||
|             boost::container::small_vector<BufferCopy, 4> normalized_copies; |         IntervalSet new_async_range{}; | ||||||
|             IntervalSet new_async_range{}; |         runtime.PreCopyBarrier(); | ||||||
|             runtime.PreCopyBarrier(); |         for (auto& [copy, buffer_id] : downloads) { | ||||||
|             for (auto& [copy, buffer_id] : downloads) { |             copy.dst_offset += download_staging.offset; | ||||||
|                 copy.dst_offset += download_staging.offset; |             const std::array copies{copy}; | ||||||
|                 const std::array copies{copy}; |             BufferCopy second_copy{copy}; | ||||||
|                 BufferCopy second_copy{copy}; |             Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|                 Buffer& buffer = slot_buffers[buffer_id]; |             second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; | ||||||
|                 second_copy.src_offset = static_cast<size_t>(buffer.CpuAddr()) + copy.src_offset; |             VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); | ||||||
|                 VAddr orig_cpu_addr = static_cast<VAddr>(second_copy.src_offset); |             const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; | ||||||
|                 const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; |             async_downloads += std::make_pair(base_interval, 1); | ||||||
|                 async_downloads += std::make_pair(base_interval, 1); |             runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); | ||||||
|                 runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); |             normalized_copies.push_back(second_copy); | ||||||
|                 normalized_copies.push_back(second_copy); |         } | ||||||
|             } |         runtime.PostCopyBarrier(); | ||||||
|             runtime.PostCopyBarrier(); |         pending_downloads.emplace_back(std::move(normalized_copies)); | ||||||
|             pending_downloads.emplace_back(std::move(normalized_copies)); |         async_buffers.emplace_back(download_staging); | ||||||
|             async_buffers.emplace_back(download_staging); |     } else { | ||||||
|         } else { |         if (!Settings::IsGPULevelHigh()) { | ||||||
|             committed_ranges.clear(); |             committed_ranges.clear(); | ||||||
|             uncommitted_ranges.clear(); |             uncommitted_ranges.clear(); | ||||||
|         } |  | ||||||
|     } else { |  | ||||||
|         if constexpr (USE_MEMORY_MAPS) { |  | ||||||
|             auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |  | ||||||
|             runtime.PreCopyBarrier(); |  | ||||||
|             for (auto& [copy, buffer_id] : downloads) { |  | ||||||
|                 // Have in mind the staging buffer offset for the copy |  | ||||||
|                 copy.dst_offset += download_staging.offset; |  | ||||||
|                 const std::array copies{copy}; |  | ||||||
|                 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); |  | ||||||
|             } |  | ||||||
|             runtime.PostCopyBarrier(); |  | ||||||
|             runtime.Finish(); |  | ||||||
|             for (const auto& [copy, buffer_id] : downloads) { |  | ||||||
|                 const Buffer& buffer = slot_buffers[buffer_id]; |  | ||||||
|                 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |  | ||||||
|                 // Undo the modified offset |  | ||||||
|                 const u64 dst_offset = copy.dst_offset - download_staging.offset; |  | ||||||
|                 const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; |  | ||||||
|                 cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); |  | ||||||
|             } |  | ||||||
|         } else { |         } else { | ||||||
|             const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |             if constexpr (USE_MEMORY_MAPS) { | ||||||
|             for (const auto& [copy, buffer_id] : downloads) { |                 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||||||
|                 Buffer& buffer = slot_buffers[buffer_id]; |                 runtime.PreCopyBarrier(); | ||||||
|                 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |                 for (auto& [copy, buffer_id] : downloads) { | ||||||
|                 const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |                     // Have in mind the staging buffer offset for the copy | ||||||
|                 cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); |                     copy.dst_offset += download_staging.offset; | ||||||
|  |                     const std::array copies{copy}; | ||||||
|  |                     runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | ||||||
|  |                                        false); | ||||||
|  |                 } | ||||||
|  |                 runtime.PostCopyBarrier(); | ||||||
|  |                 runtime.Finish(); | ||||||
|  |                 for (const auto& [copy, buffer_id] : downloads) { | ||||||
|  |                     const Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|  |                     const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||||
|  |                     // Undo the modified offset | ||||||
|  |                     const u64 dst_offset = copy.dst_offset - download_staging.offset; | ||||||
|  |                     const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; | ||||||
|  |                     cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||||||
|  |                 for (const auto& [copy, buffer_id] : downloads) { | ||||||
|  |                     Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|  |                     buffer.ImmediateDownload(copy.src_offset, | ||||||
|  |                                              immediate_buffer.subspan(0, copy.size)); | ||||||
|  |                     const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||||
|  |                     cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||||||
|  |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -572,8 +572,6 @@ private: | |||||||
|     u64 critical_memory = 0; |     u64 critical_memory = 0; | ||||||
|     BufferId inline_buffer_id; |     BufferId inline_buffer_id; | ||||||
|  |  | ||||||
|     bool active_async_buffers = false; |  | ||||||
|  |  | ||||||
|     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; |     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||||||
|     std::vector<u8> tmp_buffer; |     std::vector<u8> tmp_buffer; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -302,6 +302,9 @@ public: | |||||||
|                  (pending_pointer - pending_offset) * BYTES_PER_PAGE); |                  (pending_pointer - pending_offset) * BYTES_PER_PAGE); | ||||||
|         }; |         }; | ||||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { |         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||||
|  |             if constexpr (type == Type::GPU) { | ||||||
|  |                 mask &= ~untracked_words[index]; | ||||||
|  |             } | ||||||
|             const u64 word = state_words[index] & mask; |             const u64 word = state_words[index] & mask; | ||||||
|             if constexpr (clear) { |             if constexpr (clear) { | ||||||
|                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { |                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||||||
| @@ -350,8 +353,13 @@ public: | |||||||
|         static_assert(type != Type::Untracked); |         static_assert(type != Type::Untracked); | ||||||
|  |  | ||||||
|         const std::span<const u64> state_words = words.template Span<type>(); |         const std::span<const u64> state_words = words.template Span<type>(); | ||||||
|  |         [[maybe_unused]] const std::span<const u64> untracked_words = | ||||||
|  |             words.template Span<Type::Untracked>(); | ||||||
|         bool result = false; |         bool result = false; | ||||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { |         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||||
|  |             if constexpr (type == Type::GPU) { | ||||||
|  |                 mask &= ~untracked_words[index]; | ||||||
|  |             } | ||||||
|             const u64 word = state_words[index] & mask; |             const u64 word = state_words[index] & mask; | ||||||
|             if (word != 0) { |             if (word != 0) { | ||||||
|                 result = true; |                 result = true; | ||||||
| @@ -372,9 +380,14 @@ public: | |||||||
|     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { |     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||||||
|         static_assert(type != Type::Untracked); |         static_assert(type != Type::Untracked); | ||||||
|         const std::span<const u64> state_words = words.template Span<type>(); |         const std::span<const u64> state_words = words.template Span<type>(); | ||||||
|  |         [[maybe_unused]] const std::span<const u64> untracked_words = | ||||||
|  |             words.template Span<Type::Untracked>(); | ||||||
|         u64 begin = std::numeric_limits<u64>::max(); |         u64 begin = std::numeric_limits<u64>::max(); | ||||||
|         u64 end = 0; |         u64 end = 0; | ||||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { |         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||||
|  |             if constexpr (type == Type::GPU) { | ||||||
|  |                 mask &= ~untracked_words[index]; | ||||||
|  |             } | ||||||
|             const u64 word = state_words[index] & mask; |             const u64 word = state_words[index] & mask; | ||||||
|             if (word == 0) { |             if (word == 0) { | ||||||
|                 return; |                 return; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user