renderer_vulkan: Improve StreamBuffer API and use it in TextureRuntime

* Also use separate upload and download buffers optimized for write and readback respectively. This gives a huge 20+ FPS boost in most games which were bottlenecked by slow reads
2022-10-18 22:17:30 +03:00
parent a4dc6a55b7
commit 0a40a513a6
7 changed files with 142 additions and 109 deletions
--- a/src/video_core/rasterizer_cache/morton_swizzle.h
+++ b/src/video_core/rasterizer_cache/morton_swizzle.h
@@ -23,7 +23,7 @@ inline T MakeInt(const std::byte* bytes) {
 }

 template <PixelFormat format, bool converted>
-inline void DecodePixel(const std::byte* source, std::byte* dest) {
+constexpr void DecodePixel(const std::byte* source, std::byte* dest) {
    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;

    if constexpr (format == PixelFormat::D24S8) {
@@ -69,7 +69,7 @@ inline void DecodePixel(const std::byte* source, std::byte* dest) {
 }

 template <PixelFormat format>
-inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
+constexpr void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
    const u32 morton_offset = VideoCore::MortonInterleave(x, y);
    const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
    const u8 pixel = Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
@@ -84,7 +84,7 @@ inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte*
 }

 template <PixelFormat format>
-inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
+constexpr void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
    constexpr u32 subtile_width = 4;
    constexpr u32 subtile_height = 4;
    constexpr bool has_alpha = format == PixelFormat::ETC1A4;
@@ -114,7 +114,7 @@ inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byt
 }

 template <PixelFormat format, bool converted>
-inline void EncodePixel(const std::byte* source, std::byte* dest) {
+constexpr void EncodePixel(const std::byte* source, std::byte* dest) {
    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;

    if constexpr (format == PixelFormat::D24S8) {
@@ -146,8 +146,8 @@ inline void EncodePixel(const std::byte* source, std::byte* dest) {
 }

 template <bool morton_to_linear, PixelFormat format, bool converted>
-inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
-                           std::span<std::byte> linear_buffer) {
+constexpr void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
+                              std::span<std::byte> linear_buffer) {
    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
    constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
    constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
@@ -200,8 +200,9 @@ inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
 * in the linear_buffer.
 */
 template <bool morton_to_linear, PixelFormat format, bool converted = false>
-static void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
-                       std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer) {
+static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
+                                 std::span<std::byte> linear_buffer,
+                                 std::span<std::byte> tiled_buffer) {
    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
    constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
    constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
--- a/src/video_core/rasterizer_cache/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache/rasterizer_cache.h
@@ -948,22 +948,22 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval

    surface->Download(download, staging);

-    download_queue.push_back(
-        [this, surface, flush_start, flush_end, flush_info, mapped = staging.mapped]() {
-            MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
-            if (!dest_ptr) [[unlikely]] {
-                return;
-            }
+    MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
+    if (!dest_ptr) [[unlikely]] {
+        return;
+    }

-            const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
+    const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);

-            if (surface->is_tiled) {
-                SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
-                               runtime.NeedsConvertion(surface->pixel_format));
-            } else {
-                runtime.FormatConvert(*surface, false, mapped, download_dest);
-            }
-        });
+    download_queue.push_back([this, surface, flush_start, flush_end, flush_info,
+                              mapped = staging.mapped, download_dest]() {
+        if (surface->is_tiled) {
+            SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
+                           runtime.NeedsConvertion(surface->pixel_format));
+        } else {
+            runtime.FormatConvert(*surface, false, mapped, download_dest);
+        }
+    });
 }

 template <class T>
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -962,11 +962,10 @@ void RendererVulkan::SwapBuffers() {
 void RendererVulkan::FlushBuffers() {
    vertex_buffer.Flush();
    rasterizer->FlushBuffers();
-    renderpass_cache.ExitRenderpass();
+    runtime.FlushBuffers();
 }

 void RendererVulkan::OnSlotSwitch() {
-    runtime.OnSlotSwitch(scheduler.GetCurrentSlotIndex());
    renderpass_cache.OnSlotSwitch();
    rasterizer->pipeline_cache.MarkDirty();
 }
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -40,14 +40,18 @@ inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
    return result;
 }

-StagingBuffer::StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage)
+StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
    : instance{instance} {
+    const vk::BufferUsageFlags usage =
+        readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc;
    const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};

-    const VmaAllocationCreateInfo alloc_create_info = {
-        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
-                 VMA_ALLOCATION_CREATE_MAPPED_BIT,
-        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST};
+    const VmaAllocationCreateFlags flags =
+        readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
+                 : VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
+    const VmaAllocationCreateInfo alloc_create_info = {.flags =
+                                                           flags | VMA_ALLOCATION_CREATE_MAPPED_BIT,
+                                                       .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST};

    VkBuffer unsafe_buffer = VK_NULL_HANDLE;
    VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
@@ -66,9 +70,15 @@ StagingBuffer::~StagingBuffer() {
 }

 StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
-                           vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats)
+                           bool readback)
    : instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
-      staging{instance, total_size, vk::BufferUsageFlagBits::eTransferSrc}, usage{usage} {
+      staging{instance, total_size, readback}, bucket_size{size} {}
+
+StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
+                           vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
+                           bool readback)
+    : instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
+      staging{instance, total_size, readback}, usage{usage}, bucket_size{size} {

    const vk::BufferCreateInfo buffer_info = {
        .size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
@@ -97,7 +107,6 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u
    }

    view_count = view_formats.size();
-    bucket_size = size;
 }

 StreamBuffer::~StreamBuffer() {
@@ -141,36 +150,60 @@ void StreamBuffer::Commit(u32 size) {

 void StreamBuffer::Flush() {
    const u32 current_bucket = scheduler.GetCurrentSlotIndex();
+    const u32 flush_start = current_bucket * bucket_size;
    const u32 flush_size = buckets[current_bucket].offset;
    ASSERT(flush_size <= bucket_size);

-    if (flush_size > 0) {
-        vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
+    if (flush_size > 0) [[likely]] {
+        // Ensure all staging writes are visible to the host memory domain
        VmaAllocator allocator = instance.GetAllocator();
+        vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);

-        const u32 flush_start = current_bucket * bucket_size;
-        const vk::BufferCopy copy_region = {
-            .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
+        // Make the data available to the GPU if possible
+        if (buffer) {
+            const vk::BufferCopy copy_region = {
+                .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};

-        vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
-        command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
+            vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
+            command_buffer.copyBuffer(staging.buffer, buffer, copy_region);

-        // Add pipeline barrier for the flushed region
-        auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
-        const vk::BufferMemoryBarrier buffer_barrier = {
-            .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
-            .dstAccessMask = access_mask,
-            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
-            .buffer = buffer,
-            .offset = flush_start,
-            .size = flush_size};
+            // Add pipeline barrier for the flushed region
+            auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
+            const vk::BufferMemoryBarrier buffer_barrier = {
+                .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
+                .dstAccessMask = access_mask,
+                .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+                .buffer = buffer,
+                .offset = flush_start,
+                .size = flush_size};

-        command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
-                                       vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
+            command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
+                                           vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
+                                           {});
+        }
    }

-    // Reset the offset of the next bucket
+    SwitchBucket();
+}
+
+void StreamBuffer::Invalidate() {
+    const u32 current_bucket = scheduler.GetCurrentSlotIndex();
+    const u32 flush_start = current_bucket * bucket_size;
+    const u32 flush_size = buckets[current_bucket].offset;
+    ASSERT(flush_size <= bucket_size);
+
+    if (flush_size > 0) [[likely]] {
+        // Ensure the staging memory can be read by the host
+        VmaAllocator allocator = instance.GetAllocator();
+        vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
+    }
+
+    SwitchBucket();
+}
+
+void StreamBuffer::SwitchBucket() {
+    const u32 current_bucket = scheduler.GetCurrentSlotIndex();
    const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
    buckets[next_bucket].offset = 0;
    buckets[next_bucket].invalid = true;
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -19,13 +19,8 @@ class TaskScheduler;

 constexpr u32 MAX_BUFFER_VIEWS = 3;

-struct LockedRegion {
-    u32 size = 0;
-    u64 fence_counter = 0;
-};
-
 struct StagingBuffer {
-    StagingBuffer(const Instance& instance, u32 size, vk::BufferUsageFlags usage);
+    StagingBuffer(const Instance& instance, u32 size, bool readback);
    ~StagingBuffer();

    const Instance& instance;
@@ -36,10 +31,19 @@ struct StagingBuffer {

 class StreamBuffer {
 public:
+    /// Staging only constructor
    StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
-                 vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
+                 bool readback = false);
+    /// Staging + GPU streaming constructor
+    StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
+                 vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
+                 bool readback = false);
    ~StreamBuffer();

+    StreamBuffer(const StreamBuffer&) = delete;
+    StreamBuffer& operator=(const StreamBuffer&) = delete;
+
+    /// Maps aligned staging memory of size bytes
    std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);

    /// Commits size bytes from the currently mapped staging memory
@@ -48,24 +52,28 @@ public:
    /// Flushes staging memory to the GPU buffer
    void Flush();

-    /// Returns the Vulkan buffer handle
+    /// Invalidates staging memory for reading
+    void Invalidate();
+
+    /// Switches to the next available bucket
+    void SwitchBucket();
+
+    /// Returns the GPU buffer handle
    vk::Buffer GetHandle() const {
        return buffer;
    }

+    /// Returns the staging buffer handle
+    vk::Buffer GetStagingHandle() const {
+        return staging.buffer;
+    }
+
    /// Returns an immutable reference to the requested buffer view
    const vk::BufferView& GetView(u32 index = 0) const {
        ASSERT(index < view_count);
        return views[index];
    }

-private:
-    /// Invalidates the buffer offsets
-    void Invalidate();
-
-    /// Removes the lock on regions whose fence counter has been reached by the GPU
-    bool UnlockFreeRegions(u32 target_size);
-
 private:
    struct Bucket {
        bool invalid;
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
@@ -32,19 +32,14 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
    return vk::ImageAspectFlagBits::eColor;
 }

-constexpr u32 STAGING_BUFFER_SIZE = 64 * 1024 * 1024;
+constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
+constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;

 TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
                               RenderpassCache& renderpass_cache)
-    : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, blit_helper{
-                                                                                        instance,
-                                                                                        scheduler} {
-
-    for (auto& buffer : staging_buffers) {
-        buffer = std::make_unique<StagingBuffer>(instance, STAGING_BUFFER_SIZE,
-                                                 vk::BufferUsageFlagBits::eTransferSrc |
-                                                     vk::BufferUsageFlagBits::eTransferDst);
-    }
+    : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
+      blit_helper{instance, scheduler}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE},
+      download_buffer{instance, scheduler, DOWNLOAD_BUFFER_SIZE, true} {

    auto Register = [this](VideoCore::PixelFormat dest,
                           std::unique_ptr<FormatReinterpreterBase>&& obj) {
@@ -64,7 +59,9 @@ TextureRuntime::~TextureRuntime() {
    for (const auto& [key, alloc] : texture_recycler) {
        vmaDestroyImage(allocator, alloc.image, alloc.allocation);
        device.destroyImageView(alloc.image_view);
-        device.destroyImageView(alloc.base_view);
+        if (alloc.base_view) {
+            device.destroyImageView(alloc.base_view);
+        }
        if (alloc.depth_view) {
            device.destroyImageView(alloc.depth_view);
            device.destroyImageView(alloc.stencil_view);
@@ -82,29 +79,24 @@ TextureRuntime::~TextureRuntime() {
 }

 StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
-    const u32 current_slot = scheduler.GetCurrentSlotIndex();
-    u32& offset = staging_offsets[current_slot];
    // Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
-    offset = Common::AlignUp(offset, 4);
+    auto& buffer = upload ? upload_buffer : download_buffer;
+    auto [data, offset, invalidate] = buffer.Map(size, 4);

-    if (offset + size > STAGING_BUFFER_SIZE) {
-        LOG_CRITICAL(Render_Vulkan, "Staging buffer size exceeded!");
-        UNREACHABLE();
-    }
-
-    const auto& buffer = staging_buffers[current_slot];
-    return StagingData{.buffer = buffer->buffer,
+    return StagingData{.buffer = buffer.GetStagingHandle(),
                       .size = size,
-                       .mapped = buffer->mapped.subspan(offset, size),
+                       .mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
                       .buffer_offset = offset};
 }

-void TextureRuntime::Finish() {
-    scheduler.Submit(SubmitMode::Flush);
+void TextureRuntime::FlushBuffers() {
+    upload_buffer.Flush();
 }

-void TextureRuntime::OnSlotSwitch(u32 new_slot) {
-    staging_offsets[new_slot] = 0;
+void TextureRuntime::Finish() {
+    renderpass_cache.ExitRenderpass();
+    scheduler.Submit(SubmitMode::Flush);
+    download_buffer.Invalidate();
 }

 ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
@@ -112,6 +104,7 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
    const FormatTraits traits = instance.GetTraits(format);
    const vk::ImageAspectFlags aspect = ToVkAspect(VideoCore::GetFormatType(format));

+    // Depth buffers are not supposed to support blit by the spec so don't require it.
    const bool is_suitable = traits.transfer_support && traits.attachment_support &&
                             (traits.blit_support || aspect & vk::ImageAspectFlagBits::eDepth);
    const vk::Format vk_format = is_suitable ? traits.native : traits.fallback;
@@ -416,12 +409,14 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
                                                        .baseArrayLayer = blit.dst_layer,
                                                        .layerCount = 1},
                                     .dstOffsets = dest_offsets};
+
    // Don't use linear filtering on depth attachments
-    const vk::Filter filtering =
-        blit_area.srcSubresource.aspectMask & vk::ImageAspectFlagBits::eDepth ||
-                blit_area.dstSubresource.aspectMask & vk::ImageAspectFlagBits::eDepth
-            ? vk::Filter::eNearest
-            : vk::Filter::eLinear;
+    const VideoCore::PixelFormat format = source.pixel_format;
+    const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 ||
+                                         format == VideoCore::PixelFormat::D24 ||
+                                         format == VideoCore::PixelFormat::D16
+                                     ? vk::Filter::eNearest
+                                     : vk::Filter::eLinear;

    vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
    command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
@@ -682,8 +677,7 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
    InvalidateAllWatcher();

    // Lock this data until the next scheduler switch
-    const u32 current_slot = scheduler.GetCurrentSlotIndex();
-    runtime.staging_offsets[current_slot] += staging.size;
+    runtime.upload_buffer.Commit(staging.size);
 }

 MICROPROFILE_DEFINE(Vulkan_Download, "VulkanSurface", "Texture Download", MP_RGB(128, 192, 64));
@@ -724,8 +718,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
    }

    // Lock this data until the next scheduler switch
-    const u32 current_slot = scheduler.GetCurrentSlotIndex();
-    runtime.staging_offsets[current_slot] += staging.size;
+    runtime.download_buffer.Commit(staging.size);
 }

 u32 Surface::GetInternalBytesPerPixel() const {
@@ -811,8 +804,7 @@ void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download,

    // For depth downloads create an R32UI surface and use a compute shader for convert.
    // Then we blit and download that surface.
-    // NOTE: We don't need to set pixel format here since R32Uint automatically gives us
-    // a storage view. Also the D24S8 creates a unique cache key for it
+    // NOTE: We keep the pixel format to D24S8 to avoid linear filtering during scale
    SurfaceParams r32_params = *this;
    r32_params.width = scaled_rect.GetWidth();
    r32_params.stride = scaled_rect.GetWidth();
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.h
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h
@@ -104,6 +104,9 @@ public:
                                      VideoCore::TextureType type, vk::Format format,
                                      vk::ImageUsageFlags usage);

+    /// Flushes staging buffers
+    void FlushBuffers();
+
    /// Causes a GPU command flush
    void Finish();

@@ -138,9 +141,6 @@ public:
    /// Returns true if the provided pixel format needs convertion
    [[nodiscard]] bool NeedsConvertion(VideoCore::PixelFormat format) const;

-    /// Performs operations that need to be done on every scheduler slot switch
-    void OnSlotSwitch(u32 new_slot);
-
 private:
    /// Returns the current Vulkan instance
    const Instance& GetInstance() const {
@@ -157,9 +157,9 @@ private:
    TaskScheduler& scheduler;
    RenderpassCache& renderpass_cache;
    BlitHelper blit_helper;
+    StreamBuffer upload_buffer;
+    StreamBuffer download_buffer;
    std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
-    std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
-    std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
    std::unordered_multimap<HostTextureTag, ImageAlloc> texture_recycler;
    std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
 };