renderer_vulkan: Emulate 3-component vertex formats when unsupported

* This fixes the crashes on AMD
2022-11-02 21:46:51 +02:00
parent 6a4ff8fa24
commit 6057b18172
14 changed files with 145 additions and 68 deletions
--- a/src/android/build.gradle
+++ b/src/android/build.gradle
@@ -7,7 +7,7 @@ buildscript {
        jcenter()
    }
    dependencies {
-        classpath 'com.android.tools.build:gradle:7.2.0'
+        classpath 'com.android.tools.build:gradle:7.3.1'

        // NOTE: Do not place your application dependencies here; they belong
        // in the individual module build.gradle files
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -181,7 +181,8 @@ static std::array<float, 3 * 2> MakeOrthographicMatrix(float width, float height
 }

 RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
-    : RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this},
+    : RendererBase{window}, instance{window, Settings::values.physical_device},
+      scheduler{instance, renderpass_cache, *this},
      renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
      runtime{instance, scheduler, renderpass_cache, desc_manager},
      swapchain{instance, scheduler, renderpass_cache},
@@ -919,7 +920,6 @@ void RendererVulkan::SwapBuffers() {
    PrepareRendertarget();

    const auto RecreateSwapchain = [&] {
-        renderpass_cache.ExitRenderpass();
        scheduler.Finish();
        const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
        swapchain.Create(layout.width, layout.height);
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -17,21 +17,21 @@

 namespace Vulkan {

-u32 AttribBytes(VertexAttribute attrib) {
-    switch (attrib.type) {
+u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
+    switch (format) {
    case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
-        return sizeof(float) * attrib.size;
+        return sizeof(float) * size;
    case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
-        return sizeof(u16) * attrib.size;
+        return sizeof(u16) * size;
    case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
    case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
-        return sizeof(u8) * attrib.size;
+        return sizeof(u8) * size;
    }

    return 0;
 }

-vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
+vk::Format ToVkAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
    constexpr std::array attribute_formats = {
        std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint,
                   vk::Format::eR8G8B8A8Sint},
@@ -42,8 +42,8 @@ vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
        std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
                   vk::Format::eR32G32B32A32Sfloat}};

-    ASSERT(attrib.size <= 4);
-    return attribute_formats[static_cast<u32>(attrib.type.Value())][attrib.size.Value() - 1];
+    ASSERT(size <= 4);
+    return attribute_formats[static_cast<u32>(format)][size - 1];
 }

 vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
@@ -62,6 +62,13 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
    return vk::ShaderStageFlagBits::eVertex;
 }

+[[nodiscard]] bool IsAttribFormatSupported(const VertexAttribute& attrib, const Instance& instance) {
+    vk::PhysicalDevice physical_device = instance.GetPhysicalDevice();
+    const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
+    const vk::FormatFeatureFlags features = physical_device.getFormatProperties(format).bufferFeatures;
+    return (features & vk::FormatFeatureFlagBits::eVertexBuffer) == vk::FormatFeatureFlagBits::eVertexBuffer;
+};
+
 PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
                             RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
    : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} {
@@ -179,21 +186,26 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
    desc_manager.BindDescriptorSets();
 }

-MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
 bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
                                                Pica::Shader::ShaderSetup& setup,
                                                const VertexLayout& layout) {
-    MICROPROFILE_SCOPE(Vulkan_VS);
-
    PicaVSConfig config{regs.vs, setup};
+
+    u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
    for (u32 i = 0; i < layout.attribute_count; i++) {
        const auto& attrib = layout.attributes[i];
-        config.state.attrib_types[attrib.location.Value()] = attrib.type.Value();
+        const u32 location = attrib.location.Value();
+        const bool is_supported = IsAttribFormatSupported(attrib, instance);
+        ASSERT(is_supported || attrib.size == 3);
+
+        config.state.attrib_types[location] = attrib.type.Value();
+        config.state.emulated_attrib_locations[location] =
+                is_supported ? 0 : emulated_attrib_loc++;
    }

    auto [handle, result] =
        programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
-                                        instance.GetDevice(), ShaderOptimization::Debug);
+                                        instance.GetDevice(), ShaderOptimization::High);
    if (!handle) {
        LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
        return false;
@@ -410,20 +422,36 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
                                               : vk::VertexInputRate::eVertex};
    }

-    // Populate vertex attribute structures
-    std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
+    u32 emulated_attrib_count = 0;
+    std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES * 2> attributes;
    for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
-        const auto& attr = info.vertex_layout.attributes[i];
-        attributes[i] = vk::VertexInputAttributeDescription{.location = attr.location,
-                                                            .binding = attr.binding,
-                                                            .format = ToVkAttributeFormat(attr),
-                                                            .offset = attr.offset};
+        const VertexAttribute& attrib = info.vertex_layout.attributes[i];
+        const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
+        const bool is_supported = IsAttribFormatSupported(attrib, instance);
+        ASSERT_MSG(is_supported || attrib.size == 3);
+
+        attributes[i] = vk::VertexInputAttributeDescription{.location = attrib.location,
+                                                            .binding = attrib.binding,
+                                                            .format = is_supported ? format
+                                                                                   : ToVkAttributeFormat(attrib.type, 2),
+                                                            .offset = attrib.offset};
+
+        // When the requested 3-component vertex format is unsupported by the hardware
+        // is it emulated by breaking it into a vec2 + vec1. These are combined to a vec3
+        // by the vertex shader.
+        if (!is_supported) {
+            const u32 location = MAX_VERTEX_ATTRIBUTES + emulated_attrib_count++;
+            attributes[location] = vk::VertexInputAttributeDescription{.location = location,
+                                                                       .binding = attrib.binding,
+                                                                       .format = ToVkAttributeFormat(attrib.type, 1),
+                                                                       .offset = attrib.offset + AttribBytes(attrib.type, 2)};
+        }
    }

    const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
        .vertexBindingDescriptionCount = info.vertex_layout.binding_count,
        .pVertexBindingDescriptions = bindings.data(),
-        .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
+        .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count + emulated_attrib_count,
        .pVertexAttributeDescriptions = attributes.data()};

    const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -126,7 +126,6 @@ class DescriptorManager;

 /**
 * Stores a collection of rasterizer pipelines used during rendering.
- * In addition handles descriptor set management.
 */
 class PipelineCache {
 public:
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -109,7 +109,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
 }

 RasterizerVulkan::~RasterizerVulkan() {
-    renderpass_cache.ExitRenderpass();
    scheduler.Finish();

    vk::Device device = instance.GetDevice();
@@ -178,21 +177,24 @@ void RasterizerVulkan::SyncFixedState() {

 void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
                                        u32 vs_input_index_max) {
-    const u32 vertex_size = vs_input_size + sizeof(Common::Vec4f) * 16;
-    auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vertex_size, 4);
+    auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);

-    // The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
-    // how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
-    // address containing the vertex array data. The data for each attribute loader (i) can be found
-    // by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
-    // as something analogous to Vulkan bindings. The user can store attributes in separate loaders
-    // or interleave them in the same loader.
+    /**
+    * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
+    * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
+    * address containing the vertex array data. The data for each attribute loader (i) can be found
+    * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
+    * as something analogous to Vulkan bindings. The user can store attributes in separate loaders
+    * or interleave them in the same loader.
+    **/
    const auto& regs = Pica::g_state.regs;
    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
    PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE

-    std::array<bool, 16> enable_attributes{};
-    VertexLayout layout{};
+    VertexLayout& layout = pipeline_info.vertex_layout;
+    layout.attribute_count = 0;
+    layout.binding_count = 0;
+    enable_attributes.fill(false);

    u32 buffer_offset = 0;
    for (const auto& loader : vertex_attributes.attribute_loaders) {
@@ -250,12 +252,33 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
        buffer_offset += Common::AlignUp(data_size, 16);
    }

-    array_ptr += buffer_offset;
+    binding_offsets[layout.binding_count] = array_offset + buffer_offset;
+    vertex_buffer.Commit(buffer_offset);
+
+    // Assign the rest of the attributes to the last binding
+    SetupFixedAttribs();
+
+    // Bind the generated bindings
+    scheduler.Record([this, layout = pipeline_info.vertex_layout,
+                     offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
+        std::array<vk::Buffer, 16> buffers;
+        buffers.fill(vertex_buffer.GetHandle());
+        render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
+                                         offsets.data());
+    });
+}
+
+void RasterizerVulkan::SetupFixedAttribs() {
+    const auto& regs = Pica::g_state.regs;
+    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
+    VertexLayout& layout = pipeline_info.vertex_layout;
+
+    auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));

    // Reserve the last binding for fixed and default attributes
    // Place the default attrib at offset zero for easy access
-    const Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f);
-    std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
+    static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
+    std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));

    // Find all fixed attributes and assign them to the last binding
    u32 offset = sizeof(Common::Vec4f);
@@ -268,7 +291,7 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
                                         attr.w.ToFloat32()};

                const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
-                std::memcpy(array_ptr + offset, data.data(), data_size);
+                std::memcpy(fixed_ptr + offset, data.data(), data_size);

                VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
                attribute.binding.Assign(layout.binding_count);
@@ -299,26 +322,16 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi

    // Define the fixed+default binding
    VertexBinding& binding = layout.bindings[layout.binding_count];
-    binding.binding.Assign(layout.binding_count);
+    binding.binding.Assign(layout.binding_count++);
    binding.fixed.Assign(1);
    binding.stride.Assign(offset);

-    binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
-    ASSERT(buffer_offset + offset <= vertex_size);
-    vertex_buffer.Commit(buffer_offset + offset);
-
-    // Update the pipeline vertex layout
-    pipeline_info.vertex_layout = layout;
-
-    scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
-        std::array<vk::Buffer, 16> buffers;
-        buffers.fill(vertex_buffer.GetHandle());
-        render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
-                                         offsets.data());
-    });
+    vertex_buffer.Commit(offset);
 }

+MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
 bool RasterizerVulkan::SetupVertexShader() {
+    MICROPROFILE_SCOPE(Vulkan_VS);
    return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs,
                                                      pipeline_info.vertex_layout);
 }
@@ -354,7 +367,7 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
 bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
    const auto& regs = Pica::g_state.regs;

-    auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
+    const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);

    if (vs_input_size > VERTEX_BUFFER_SIZE) {
        LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size);
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -158,12 +158,12 @@ private:
    /// Internal implementation for AccelerateDrawBatch
    bool AccelerateDrawBatchInternal(bool is_indexed);

-    /// Copies vertex data performing needed convertions and casts
-    void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
-
    /// Setup vertex array for AccelerateDrawBatch
    void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);

+    /// Setup the fixed attribute emulation in vulkan
+    void SetupFixedAttribs();
+
    /// Setup vertex shader for AccelerateDrawBatch
    bool SetupVertexShader();

@@ -190,6 +190,7 @@ private:

    VertexLayout software_layout;
    std::array<u64, 16> binding_offsets{};
+    std::array<bool, 16> enable_attributes{};
    vk::Sampler default_sampler;
    Surface null_surface;
    Surface null_storage_surface;
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -121,11 +121,12 @@ void DescriptorPool::RefreshTick() {
 }

 void DescriptorPool::Allocate(std::size_t begin, std::size_t end) {
+    LOG_INFO(Render_Vulkan, "Allocating new descriptor pool");
    vk::DescriptorPool& pool = pools.emplace_back();

    // Choose a sane pool size good for most games
    static constexpr std::array<vk::DescriptorPoolSize, 5> pool_sizes = {{
-        {vk::DescriptorType::eUniformBuffer, 2048},
+        {vk::DescriptorType::eUniformBuffer, 4096},
        {vk::DescriptorType::eSampledImage, 4096},
        {vk::DescriptorType::eSampler, 4096},
        {vk::DescriptorType::eUniformTexelBuffer, 2048},
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -25,9 +25,9 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co
    last = nullptr;
 }

-Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer)
-    : instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore},
-      use_worker_thread{Settings::values.async_command_recording} {
+Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, RendererVulkan& renderer)
+    : instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer}, master_semaphore{instance},
+      command_pool{instance, master_semaphore}, use_worker_thread{Settings::values.async_command_recording} {
    AllocateWorkerCommandBuffers();
    if (use_worker_thread) {
        AcquireNewChunk();
@@ -120,6 +120,7 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
    const u64 signal_value = master_semaphore.NextTick();
    state = StateFlags::AllDirty;

+    renderpass_cache.ExitRenderpass();
    Record([signal_semaphore, wait_semaphore, signal_value, this]
           (vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
        MICROPROFILE_SCOPE(Vulkan_Submit);
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -27,13 +27,15 @@ enum class StateFlags {
 DECLARE_ENUM_FLAG_OPERATORS(StateFlags)

 class Instance;
+class RenderpassCache;
 class RendererVulkan;

 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class Scheduler {
 public:
-    explicit Scheduler(const Instance& instance, RendererVulkan& renderer);
+    explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
+                       RendererVulkan& renderer);
    ~Scheduler();

    /// Sends the current execution context to the GPU.
@@ -193,6 +195,7 @@ private:

 private:
    const Instance& instance;
+    RenderpassCache& renderpass_cache;
    RendererVulkan& renderer;
    MasterSemaphore master_semaphore;
    CommandPool command_pool;
--- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
@@ -1667,7 +1667,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
                prefix = "u";
                break;
            default:
-                LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]);
+                LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
                UNREACHABLE();
            }

@@ -1675,12 +1675,42 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
                fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix);
        }
    }
+
+    // Some 3-component attributes might be emulated by breaking them to vec2 + scalar.
+    // Define them here and combine them below
+    for (std::size_t i = 0; i < used_regs.size(); ++i) {
+        if (const u32 location = config.state.emulated_attrib_locations[i]; location != 0 && used_regs[i]) {
+            std::string_view type;
+            switch (config.state.attrib_types[i]) {
+            case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
+                type = "float";
+                break;
+            case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
+            case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
+                type = "int";
+                break;
+            case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
+                type = "uint";
+                break;
+            default:
+                LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
+                UNREACHABLE();
+            }
+
+            out += fmt::format("layout(location = {}) in {} vs_in_typed_reg{}_part2;\n", location, type, i);
+        }
+    }
+
    out += '\n';

    // cast input registers to float to avoid computational errors
    for (std::size_t i = 0; i < used_regs.size(); ++i) {
        if (used_regs[i]) {
-            out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
+            if (config.state.emulated_attrib_locations[i] != 0) {
+                out += fmt::format("vec4 vs_in_reg{0} = vec4(vec2(vs_in_typed_reg{0}), float(vs_in_typed_reg{0}_part2), 0.f);\n", i);
+            } else {
+                out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
+            }
        }
    }
    out += '\n';
--- a/src/video_core/renderer_vulkan/vk_shader_gen.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.h
@@ -157,6 +157,7 @@ struct PicaShaderConfigCommon {
    u32 main_offset;
    bool sanitize_mul;
    std::array<Pica::PipelineRegs::VertexAttributeFormat, 16> attrib_types;
+    std::array<u8, 16> emulated_attrib_locations;

    u32 num_outputs;

--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -5,6 +5,7 @@
 #include <algorithm>
 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/microprofile.h"
 #include "common/logging/log.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -114,7 +114,7 @@ void Swapchain::Present() {
                                                 .swapchainCount = 1,
                                                 .pSwapchains = &swapchain,
                                                 .pImageIndices = &index};
-
+        MICROPROFILE_SCOPE(Vulkan_Present);
        vk::Queue present_queue = instance.GetPresentQueue();
        try {
            [[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info);
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
@@ -60,7 +60,7 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
    return depth_offset;
 }

-constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
+constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
 constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;

 TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
@@ -124,7 +124,6 @@ void TextureRuntime::FlushBuffers() {
 MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
 void TextureRuntime::Finish() {
    MICROPROFILE_SCOPE(Vulkan_Finish);
-    renderpass_cache.ExitRenderpass();
    scheduler.Finish();
    download_buffer.Invalidate();
 }