Buffer cache: always use async buffer downloads and fix regression.
This commit is contained in:
		| @@ -535,12 +535,12 @@ TEST_CASE("MemoryTracker: Cached write downloads") { | |||||||
|     memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); |     memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); | ||||||
|     int num = 0; |     int num = 0; | ||||||
|     memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); |     memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||||
|     REQUIRE(num == 1); |     REQUIRE(num == 0); | ||||||
|     num = 0; |     num = 0; | ||||||
|     memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); |     memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); | ||||||
|     REQUIRE(num == 0); |     REQUIRE(num == 0); | ||||||
|     REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); |     REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||||||
|     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |     REQUIRE(memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||||
|     memory_track->FlushCachedWrites(); |     memory_track->FlushCachedWrites(); | ||||||
|     REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); |     REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); | ||||||
|     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); |     REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); | ||||||
|   | |||||||
| @@ -23,8 +23,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||||||
|     common_ranges.clear(); |     common_ranges.clear(); | ||||||
|     inline_buffer_id = NULL_BUFFER_ID; |     inline_buffer_id = NULL_BUFFER_ID; | ||||||
|  |  | ||||||
|     active_async_buffers = !Settings::IsGPULevelHigh(); |  | ||||||
|  |  | ||||||
|     if (!runtime.CanReportMemoryUsage()) { |     if (!runtime.CanReportMemoryUsage()) { | ||||||
|         minimum_memory = DEFAULT_EXPECTED_MEMORY; |         minimum_memory = DEFAULT_EXPECTED_MEMORY; | ||||||
|         critical_memory = DEFAULT_CRITICAL_MEMORY; |         critical_memory = DEFAULT_CRITICAL_MEMORY; | ||||||
| @@ -75,8 +73,6 @@ void BufferCache<P>::TickFrame() { | |||||||
|     uniform_cache_hits[0] = 0; |     uniform_cache_hits[0] = 0; | ||||||
|     uniform_cache_shots[0] = 0; |     uniform_cache_shots[0] = 0; | ||||||
|  |  | ||||||
|     active_async_buffers = !Settings::IsGPULevelHigh(); |  | ||||||
|  |  | ||||||
|     const bool skip_preferred = hits * 256 < shots * 251; |     const bool skip_preferred = hits * 256 < shots * 251; | ||||||
|     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; |     uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; | ||||||
|  |  | ||||||
| @@ -491,10 +487,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|  |  | ||||||
|     if (committed_ranges.empty()) { |     if (committed_ranges.empty()) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|             if (active_async_buffers) { |  | ||||||
|             async_buffers.emplace_back(std::optional<Async_Buffer>{}); |             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||||
|         } |         } | ||||||
|         } |  | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     MICROPROFILE_SCOPE(GPU_DownloadMemory); |     MICROPROFILE_SCOPE(GPU_DownloadMemory); | ||||||
| @@ -554,13 +549,11 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|     committed_ranges.clear(); |     committed_ranges.clear(); | ||||||
|     if (downloads.empty()) { |     if (downloads.empty()) { | ||||||
|         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |         if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|             if (active_async_buffers) { |  | ||||||
|             async_buffers.emplace_back(std::optional<Async_Buffer>{}); |             async_buffers.emplace_back(std::optional<Async_Buffer>{}); | ||||||
|         } |         } | ||||||
|         } |  | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
|     if (active_async_buffers) { |  | ||||||
|     if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { |     if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { | ||||||
|         auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); |         auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); | ||||||
|         boost::container::small_vector<BufferCopy, 4> normalized_copies; |         boost::container::small_vector<BufferCopy, 4> normalized_copies; | ||||||
| @@ -582,9 +575,9 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|         pending_downloads.emplace_back(std::move(normalized_copies)); |         pending_downloads.emplace_back(std::move(normalized_copies)); | ||||||
|         async_buffers.emplace_back(download_staging); |         async_buffers.emplace_back(download_staging); | ||||||
|     } else { |     } else { | ||||||
|  |         if (!Settings::IsGPULevelHigh()) { | ||||||
|             committed_ranges.clear(); |             committed_ranges.clear(); | ||||||
|             uncommitted_ranges.clear(); |             uncommitted_ranges.clear(); | ||||||
|         } |  | ||||||
|         } else { |         } else { | ||||||
|             if constexpr (USE_MEMORY_MAPS) { |             if constexpr (USE_MEMORY_MAPS) { | ||||||
|                 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); |                 auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); | ||||||
| @@ -593,7 +586,8 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|                     // Have in mind the staging buffer offset for the copy |                     // Have in mind the staging buffer offset for the copy | ||||||
|                     copy.dst_offset += download_staging.offset; |                     copy.dst_offset += download_staging.offset; | ||||||
|                     const std::array copies{copy}; |                     const std::array copies{copy}; | ||||||
|                 runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); |                     runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, | ||||||
|  |                                        false); | ||||||
|                 } |                 } | ||||||
|                 runtime.PostCopyBarrier(); |                 runtime.PostCopyBarrier(); | ||||||
|                 runtime.Finish(); |                 runtime.Finish(); | ||||||
| @@ -609,12 +603,14 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||||||
|                 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); |                 const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy); | ||||||
|                 for (const auto& [copy, buffer_id] : downloads) { |                 for (const auto& [copy, buffer_id] : downloads) { | ||||||
|                     Buffer& buffer = slot_buffers[buffer_id]; |                     Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|                 buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); |                     buffer.ImmediateDownload(copy.src_offset, | ||||||
|  |                                              immediate_buffer.subspan(0, copy.size)); | ||||||
|                     const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; |                     const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; | ||||||
|                     cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); |                     cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| template <class P> | template <class P> | ||||||
|   | |||||||
| @@ -572,8 +572,6 @@ private: | |||||||
|     u64 critical_memory = 0; |     u64 critical_memory = 0; | ||||||
|     BufferId inline_buffer_id; |     BufferId inline_buffer_id; | ||||||
|  |  | ||||||
|     bool active_async_buffers = false; |  | ||||||
|  |  | ||||||
|     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; |     std::array<BufferId, ((1ULL << 39) >> CACHING_PAGEBITS)> page_table; | ||||||
|     std::vector<u8> tmp_buffer; |     std::vector<u8> tmp_buffer; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -302,6 +302,9 @@ public: | |||||||
|                  (pending_pointer - pending_offset) * BYTES_PER_PAGE); |                  (pending_pointer - pending_offset) * BYTES_PER_PAGE); | ||||||
|         }; |         }; | ||||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { |         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||||
|  |             if constexpr (type == Type::GPU) { | ||||||
|  |                 mask &= ~untracked_words[index]; | ||||||
|  |             } | ||||||
|             const u64 word = state_words[index] & mask; |             const u64 word = state_words[index] & mask; | ||||||
|             if constexpr (clear) { |             if constexpr (clear) { | ||||||
|                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { |                 if constexpr (type == Type::CPU || type == Type::CachedCPU) { | ||||||
| @@ -350,8 +353,13 @@ public: | |||||||
|         static_assert(type != Type::Untracked); |         static_assert(type != Type::Untracked); | ||||||
|  |  | ||||||
|         const std::span<const u64> state_words = words.template Span<type>(); |         const std::span<const u64> state_words = words.template Span<type>(); | ||||||
|  |         [[maybe_unused]] const std::span<const u64> untracked_words = | ||||||
|  |             words.template Span<Type::Untracked>(); | ||||||
|         bool result = false; |         bool result = false; | ||||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { |         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||||
|  |             if constexpr (type == Type::GPU) { | ||||||
|  |                 mask &= ~untracked_words[index]; | ||||||
|  |             } | ||||||
|             const u64 word = state_words[index] & mask; |             const u64 word = state_words[index] & mask; | ||||||
|             if (word != 0) { |             if (word != 0) { | ||||||
|                 result = true; |                 result = true; | ||||||
| @@ -372,9 +380,14 @@ public: | |||||||
|     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { |     [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept { | ||||||
|         static_assert(type != Type::Untracked); |         static_assert(type != Type::Untracked); | ||||||
|         const std::span<const u64> state_words = words.template Span<type>(); |         const std::span<const u64> state_words = words.template Span<type>(); | ||||||
|  |         [[maybe_unused]] const std::span<const u64> untracked_words = | ||||||
|  |             words.template Span<Type::Untracked>(); | ||||||
|         u64 begin = std::numeric_limits<u64>::max(); |         u64 begin = std::numeric_limits<u64>::max(); | ||||||
|         u64 end = 0; |         u64 end = 0; | ||||||
|         IterateWords(offset, size, [&](size_t index, u64 mask) { |         IterateWords(offset, size, [&](size_t index, u64 mask) { | ||||||
|  |             if constexpr (type == Type::GPU) { | ||||||
|  |                 mask &= ~untracked_words[index]; | ||||||
|  |             } | ||||||
|             const u64 word = state_words[index] & mask; |             const u64 word = state_words[index] & mask; | ||||||
|             if (word == 0) { |             if (word == 0) { | ||||||
|                 return; |                 return; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user