renderer_vulkan: Rewrite stream buffer, again...

* The previous implemention was fine, but it wasted space. Buckets now are just ticks attached to a particular buffer region, which means we can flush/map arbitrary regions * A bug in the texture runtime is also fixed which commited to the same buffer twice
2022-10-28 22:25:09 +03:00
parent b4184a3c2b
commit 20496e8ca4
3 changed files with 38 additions and 61 deletions
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@ -120,28 +120,36 @@ StreamBuffer::~StreamBuffer() {
 std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
    ASSERT(size <= total_size && alignment <= total_size);
    Bucket& bucket = buckets[bucket_index];
    if (alignment > 0) {
-        bucket.cursor = Common::AlignUp(bucket.cursor, alignment);
+        buffer_offset = Common::AlignUp(buffer_offset, alignment);
    }
-    // If we reach bucket boundaries move over to the next one
+    bool invalidate = false;
-    if (bucket.cursor + size > bucket_size) {
+    const u32 new_offset = buffer_offset + size;
-        bucket.gpu_tick = scheduler.CurrentTick();
+    if (u32 new_index = new_offset / bucket_size; new_index != bucket_index) {
-        MoveNextBucket();
+        if (new_index >= BUCKET_COUNT) {
-        return Map(size, alignment);
+            if (readback) {
                Invalidate();
            } else {
                Flush();
            }
            buffer_offset = 0;
            flush_offset = 0;
            new_index = 0;
            invalidate = true;
        }
        ticks[bucket_index] = scheduler.CurrentTick();
        scheduler.Wait(ticks[new_index]);
        bucket_index = new_index;
    }
    const bool invalidate = std::exchange(bucket.invalid, false);
    const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
    u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
    return std::make_tuple(mapped, buffer_offset, invalidate);
 }
 void StreamBuffer::Commit(u32 size) {
-    buckets[bucket_index].cursor += size;
+    buffer_offset += size;
 }
 void StreamBuffer::Flush() {
@ -150,20 +158,18 @@ void StreamBuffer::Flush() {
        return;
    }
-    Bucket& bucket = buckets[bucket_index];
+    const u32 flush_size = buffer_offset - flush_offset;
-    const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
+    ASSERT(flush_size <= total_size);
-    const u32 flush_size = bucket.cursor - bucket.flush_cursor;
+    ASSERT(flush_offset + flush_size <= total_size);
    ASSERT(flush_size <= bucket_size);
    ASSERT(flush_start + flush_size <= total_size);
    if (flush_size > 0) [[likely]] {
        // Ensure all staging writes are visible to the host memory domain
        VmaAllocator allocator = instance.GetAllocator();
-        vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
+        vmaFlushAllocation(allocator, staging.allocation, flush_offset, flush_size);
        if (gpu_buffer) {
-            scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
+            scheduler.Record([this, flush_offset = flush_offset, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
                const vk::BufferCopy copy_region = {
-                    .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
+                    .srcOffset = flush_offset, .dstOffset = flush_offset, .size = flush_size};
                upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
@ -174,7 +180,7 @@ void StreamBuffer::Flush() {
                    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                    .buffer = gpu_buffer,
-                    .offset = flush_start,
+                    .offset = flush_offset,
                    .size = flush_size};
                upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
@ -182,7 +188,7 @@ void StreamBuffer::Flush() {
                                              {});
            });
        }
-        bucket.flush_cursor += flush_size;
+        flush_offset = buffer_offset;
    }
 }
@ -191,33 +197,16 @@ void StreamBuffer::Invalidate() {
        return;
    }
-    Bucket& bucket = buckets[bucket_index];
+    const u32 flush_size = buffer_offset - flush_offset;
-    const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
+    ASSERT(flush_size <= total_size);
-    const u32 flush_size = bucket.cursor - bucket.flush_cursor;
+    ASSERT(flush_offset + flush_size <= total_size);
    ASSERT(flush_size <= bucket_size);
    if (flush_size > 0) [[likely]] {
        // Ensure the staging memory can be read by the host
        VmaAllocator allocator = instance.GetAllocator();
-        vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
+        vmaInvalidateAllocation(allocator, staging.allocation, flush_offset, flush_size);
-        bucket.flush_cursor += flush_size;
+        flush_offset = buffer_offset;
    }
 }
 void StreamBuffer::MoveNextBucket() {
    // Flush and Invalidate are bucket local operations for simplicity so perform them here
    if (readback) {
        Invalidate();
    } else {
        Flush();
    }
    bucket_index = (bucket_index + 1) % BUCKET_COUNT;
    Bucket& next_bucket = buckets[bucket_index];
    scheduler.Wait(next_bucket.gpu_tick);
    next_bucket.cursor = 0;
    next_bucket.flush_cursor = 0;
    next_bucket.invalid = true;
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@ -71,17 +71,6 @@ public:
        return views[index];
    }
 private:
    /// Moves to the next bucket
    void MoveNextBucket();
    struct Bucket {
        bool invalid = false;
        u32 gpu_tick = 0;
        u32 cursor = 0;
        u32 flush_cursor = 0;
    };
 private:
    const Instance& instance;
    Scheduler& scheduler;
@ -90,12 +79,14 @@ private:
    VmaAllocation allocation{};
    vk::BufferUsageFlagBits usage;
    std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
    std::array<Bucket, BUCKET_COUNT> buckets;
    std::size_t view_count = 0;
    u32 total_size = 0;
    u32 bucket_size = 0;
    u32 buffer_offset = 0;
    u32 flush_offset = 0;
    u32 bucket_index = 0;
    bool readback = false;
    std::array<u64, BUCKET_COUNT> ticks{};
 };
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
@ -708,12 +708,11 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
            render_cmdbuf.copyBufferToImage(staging.buffer, image, vk::ImageLayout::eTransferDstOptimal,
                                            region_count, copy_regions.data());
        });
        runtime.upload_buffer.Commit(staging.size);
    }
    InvalidateAllWatcher();
    // Lock this data until the next scheduler switch
    runtime.upload_buffer.Commit(staging.size);
 }
 MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
@ -751,11 +750,9 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
            render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
                                             staging.buffer, copy_region);
        });
    }
    // Lock this data until the next scheduler switch
        runtime.download_buffer.Commit(staging.size);
    }
 }
 u32 Surface::GetInternalBytesPerPixel() const {
    // Request 5 bytes for D24S8 as well because we can use the