From 20496e8ca42115ea7ab50e43c6f19e9180309d51 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 28 Oct 2022 22:25:09 +0300 Subject: [PATCH] renderer_vulkan: Rewrite stream buffer, again... * The previous implemention was fine, but it wasted space. Buckets now are just ticks attached to a particular buffer region, which means we can flush/map arbitrary regions * A bug in the texture runtime is also fixed which commited to the same buffer twice --- .../renderer_vulkan/vk_stream_buffer.cpp | 75 ++++++++----------- .../renderer_vulkan/vk_stream_buffer.h | 15 +--- .../renderer_vulkan/vk_texture_runtime.cpp | 9 +-- 3 files changed, 38 insertions(+), 61 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 912422917..209ce18c3 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -120,28 +120,36 @@ StreamBuffer::~StreamBuffer() { std::tuple StreamBuffer::Map(u32 size, u32 alignment) { ASSERT(size <= total_size && alignment <= total_size); - Bucket& bucket = buckets[bucket_index]; if (alignment > 0) { - bucket.cursor = Common::AlignUp(bucket.cursor, alignment); + buffer_offset = Common::AlignUp(buffer_offset, alignment); } - // If we reach bucket boundaries move over to the next one - if (bucket.cursor + size > bucket_size) { - bucket.gpu_tick = scheduler.CurrentTick(); - MoveNextBucket(); - return Map(size, alignment); + bool invalidate = false; + const u32 new_offset = buffer_offset + size; + if (u32 new_index = new_offset / bucket_size; new_index != bucket_index) { + if (new_index >= BUCKET_COUNT) { + if (readback) { + Invalidate(); + } else { + Flush(); + } + buffer_offset = 0; + flush_offset = 0; + new_index = 0; + invalidate = true; + } + ticks[bucket_index] = scheduler.CurrentTick(); + scheduler.Wait(ticks[new_index]); + bucket_index = new_index; } - const bool invalidate = std::exchange(bucket.invalid, false); - const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor; u8* mapped = reinterpret_cast(staging.mapped.data() + buffer_offset); - return std::make_tuple(mapped, buffer_offset, invalidate); } void StreamBuffer::Commit(u32 size) { - buckets[bucket_index].cursor += size; + buffer_offset += size; } void StreamBuffer::Flush() { @@ -150,20 +158,18 @@ void StreamBuffer::Flush() { return; } - Bucket& bucket = buckets[bucket_index]; - const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor; - const u32 flush_size = bucket.cursor - bucket.flush_cursor; - ASSERT(flush_size <= bucket_size); - ASSERT(flush_start + flush_size <= total_size); + const u32 flush_size = buffer_offset - flush_offset; + ASSERT(flush_size <= total_size); + ASSERT(flush_offset + flush_size <= total_size); if (flush_size > 0) [[likely]] { // Ensure all staging writes are visible to the host memory domain VmaAllocator allocator = instance.GetAllocator(); - vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size); + vmaFlushAllocation(allocator, staging.allocation, flush_offset, flush_size); if (gpu_buffer) { - scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { + scheduler.Record([this, flush_offset = flush_offset, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) { const vk::BufferCopy copy_region = { - .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size}; + .srcOffset = flush_offset, .dstOffset = flush_offset, .size = flush_size}; upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region); @@ -174,7 +180,7 @@ void StreamBuffer::Flush() { .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .buffer = gpu_buffer, - .offset = flush_start, + .offset = flush_offset, .size = flush_size}; upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask, @@ -182,7 +188,7 @@ void StreamBuffer::Flush() { {}); }); } - bucket.flush_cursor += flush_size; + flush_offset = buffer_offset; } } @@ -191,33 +197,16 @@ void StreamBuffer::Invalidate() { return; } - Bucket& bucket = buckets[bucket_index]; - const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor; - const u32 flush_size = bucket.cursor - bucket.flush_cursor; - ASSERT(flush_size <= bucket_size); + const u32 flush_size = buffer_offset - flush_offset; + ASSERT(flush_size <= total_size); + ASSERT(flush_offset + flush_size <= total_size); if (flush_size > 0) [[likely]] { // Ensure the staging memory can be read by the host VmaAllocator allocator = instance.GetAllocator(); - vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size); - bucket.flush_cursor += flush_size; + vmaInvalidateAllocation(allocator, staging.allocation, flush_offset, flush_size); + flush_offset = buffer_offset; } } -void StreamBuffer::MoveNextBucket() { - // Flush and Invalidate are bucket local operations for simplicity so perform them here - if (readback) { - Invalidate(); - } else { - Flush(); - } - - bucket_index = (bucket_index + 1) % BUCKET_COUNT; - Bucket& next_bucket = buckets[bucket_index]; - scheduler.Wait(next_bucket.gpu_tick); - next_bucket.cursor = 0; - next_bucket.flush_cursor = 0; - next_bucket.invalid = true; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index d573c2f0e..2ed1b7498 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -71,17 +71,6 @@ public: return views[index]; } -private: - /// Moves to the next bucket - void MoveNextBucket(); - - struct Bucket { - bool invalid = false; - u32 gpu_tick = 0; - u32 cursor = 0; - u32 flush_cursor = 0; - }; - private: const Instance& instance; Scheduler& scheduler; @@ -90,12 +79,14 @@ private: VmaAllocation allocation{}; vk::BufferUsageFlagBits usage; std::array views{}; - std::array buckets; std::size_t view_count = 0; u32 total_size = 0; u32 bucket_size = 0; + u32 buffer_offset = 0; + u32 flush_offset = 0; u32 bucket_index = 0; bool readback = false; + std::array ticks{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 7c8ff4376..95f9656b9 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -708,12 +708,11 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa render_cmdbuf.copyBufferToImage(staging.buffer, image, vk::ImageLayout::eTransferDstOptimal, region_count, copy_regions.data()); }); + + runtime.upload_buffer.Commit(staging.size); } InvalidateAllWatcher(); - - // Lock this data until the next scheduler switch - runtime.upload_buffer.Commit(staging.size); } MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64)); @@ -751,10 +750,8 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, staging.buffer, copy_region); }); + runtime.download_buffer.Commit(staging.size); } - - // Lock this data until the next scheduler switch - runtime.download_buffer.Commit(staging.size); } u32 Surface::GetInternalBytesPerPixel() const {