From 0e987959a6a2e7d87a5c0704a51796fe95da4174 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Fri, 30 Dec 2022 11:06:15 +0200 Subject: [PATCH] renderer_vulkan: Rewrite data streaming * Most GPUs nowadays provide a device local/host visible memory heap which is useful for avoiding copies between staging and local memory and especially beneficial for mobile and APUs that are mostly the target of this backend. * This commit ports the old yuzu stream buffer with some changes to suit our needs and gets rid of the buffer flush methods --- src/video_core/rasterizer_accelerated.cpp | 3 +- .../renderer_vulkan/renderer_vulkan.cpp | 29 +- .../renderer_vulkan/renderer_vulkan.h | 1 - .../renderer_vulkan/vk_instance.cpp | 1 + src/video_core/renderer_vulkan/vk_instance.h | 8 +- .../renderer_vulkan/vk_rasterizer.cpp | 133 ++++---- .../renderer_vulkan/vk_rasterizer.h | 23 +- .../renderer_vulkan/vk_scheduler.cpp | 10 +- src/video_core/renderer_vulkan/vk_scheduler.h | 5 +- .../renderer_vulkan/vk_stream_buffer.cpp | 322 +++++++----------- .../renderer_vulkan/vk_stream_buffer.h | 122 +++---- .../renderer_vulkan/vk_texture_runtime.cpp | 85 ++--- .../renderer_vulkan/vk_texture_runtime.h | 7 +- 13 files changed, 319 insertions(+), 430 deletions(-) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 07e279fbb..a5be75431 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include "common/alignment.h" #include "core/memory.h" #include "video_core/pica_state.h" #include "video_core/rasterizer_accelerated.h" @@ -210,7 +211,7 @@ RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray u32 vs_input_size = 0; for (const auto& loader : vertex_attributes.attribute_loaders) { if (loader.component_count != 0) { - vs_input_size += loader.byte_count * vertex_num; + vs_input_size += Common::AlignUp(loader.byte_count * vertex_num, 4); } } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 937d78654..2a9475dfb 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -98,13 +98,12 @@ RendererVulkan::RendererVulkan(Frontend::EmuWindow& window, Frontend::EmuWindow* : RendererBase{window, secondary_window}, telemetry_session{Core::System::GetInstance().TelemetrySession()}, instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance, - renderpass_cache, - *this}, + renderpass_cache}, renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler}, runtime{instance, scheduler, renderpass_cache, desc_manager}, swapchain{instance, scheduler, renderpass_cache}, - vertex_buffer{ - instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, + vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer, + VERTEX_BUFFER_SIZE}, rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} { Report(); window.mailbox = nullptr; @@ -601,7 +600,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl const auto& texcoords = screen_info.display_texcoords; u32 size = sizeof(ScreenRectVertex) * 4; - auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16); const std::array vertices = { ScreenRectVertex{x, y, texcoords.bottom, texcoords.left}, @@ -633,7 +632,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl vk::ShaderStageFlagBits::eVertex, 0, sizeof(info), &info); - render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); }); } @@ -643,7 +642,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, const auto& texcoords = screen_info.display_texcoords; u32 size = sizeof(ScreenRectVertex) * 4; - auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16); const std::array vertices = { ScreenRectVertex{x, y, texcoords.bottom, texcoords.right}, @@ -672,7 +671,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, vk::ShaderStageFlagBits::eVertex, 0, sizeof(info), &info); - render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); }); } @@ -683,7 +682,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i const auto& texcoords = screen_info_l.display_texcoords; u32 size = sizeof(ScreenRectVertex) * 4; - auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16); const std::array vertices = {ScreenRectVertex{x, y, texcoords.bottom, texcoords.left}, ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right}, @@ -712,7 +711,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i vk::ShaderStageFlagBits::eVertex, 0, sizeof(info), &info); - render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); }); } @@ -723,7 +722,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl const auto& texcoords = screen_info_l.display_texcoords; u32 size = sizeof(ScreenRectVertex) * 4; - auto [ptr, offset, invalidate] = vertex_buffer.Map(size); + auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16); const std::array vertices = {{ ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), @@ -754,7 +753,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl vk::ShaderStageFlagBits::eVertex, 0, sizeof(info), &info); - render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0}); + render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0}); render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0); }); } @@ -967,12 +966,6 @@ void RendererVulkan::SwapBuffers() { } } -void RendererVulkan::FlushBuffers() { - vertex_buffer.Flush(); - rasterizer.FlushBuffers(); - runtime.FlushBuffers(); -} - void RendererVulkan::Report() const { const std::string vendor_name{instance.GetVendorName()}; const std::string model_name{instance.GetModelName()}; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 4a907a97e..695745a0e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -78,7 +78,6 @@ public: void PrepareVideoDumping() override {} void CleanupVideoDumping() override {} void Sync() override; - void FlushBuffers(); private: void ReloadSampler(); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 72fdad941..07f639c3f 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -238,6 +238,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) physical_device = physical_devices[physical_device_index]; properties = physical_device.getProperties(); + limits = properties.limits; LOG_INFO(Render_Vulkan, "Creating logical device for physical device: {}", properties.deviceName); diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index c5c0c374d..fda6a32c1 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -177,7 +177,12 @@ public: /// Returns the minimum required alignment for uniforms vk::DeviceSize UniformMinAlignment() const { - return properties.limits.minUniformBufferOffsetAlignment; + return limits.minUniformBufferOffsetAlignment; + } + + /// Returns the maximum supported elements in a texel buffer + u32 MaxTexelBufferElements() const { + return limits.maxTexelBufferElements; } private: @@ -204,6 +209,7 @@ private: vk::SurfaceKHR surface; vk::PhysicalDeviceProperties properties; vk::PhysicalDeviceFeatures features; + vk::PhysicalDeviceLimits limits; vk::DriverIdKHR driver_id; vk::DebugUtilsMessengerEXT debug_messenger; std::string vendor_name; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 53c20f46d..1e570c382 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -17,23 +17,16 @@ #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/video_core.h" -#include - namespace Vulkan { -constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; -constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024; -constexpr u32 TEXTURE_BUFFER_SIZE = 512 * 1024; +constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024; +constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024; -constexpr std::array TEXTURE_BUFFER_LF_FORMATS = { - vk::Format::eR32G32Sfloat, -}; +constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer; -constexpr std::array TEXTURE_BUFFER_FORMATS = { - vk::Format::eR32G32Sfloat, - vk::Format::eR32G32B32A32Sfloat, -}; +constexpr vk::BufferUsageFlags TEX_BUFFER_USAGE = vk::BufferUsageFlagBits::eUniformTexelBuffer; constexpr VideoCore::SurfaceParams NULL_PARAMS = { .width = 1, @@ -55,6 +48,13 @@ struct DrawParams { bool is_indexed; }; +[[nodiscard]] u64 TextureBufferSize(const Instance& instance) { + // Use the smallest texel size from the texel views + // which corresponds to eR32G32Sfloat + const u64 max_size = instance.MaxTexelBufferElements() * 8; + return std::min(max_size, TEXTURE_BUFFER_SIZE); +} + RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager, TextureRuntime& runtime, RenderpassCache& renderpass_cache) @@ -63,24 +63,17 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan pipeline_cache{instance, scheduler, renderpass_cache, desc_manager}, null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime}, null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime}, - vertex_buffer{ - instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}}, - uniform_buffer{ - instance, scheduler, UNIFORM_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformBuffer, {}}, - index_buffer{ - instance, scheduler, INDEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eIndexBuffer, {}}, - texture_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, - vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_FORMATS}, - texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, - vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { + stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE}, + texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)}, + texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} { - vertex_buffers.fill(vertex_buffer.GetHandle()); + vertex_buffers.fill(stream_buffer.Handle()); uniform_buffer_alignment = instance.UniformMinAlignment(); uniform_size_aligned_vs = - Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); + Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); uniform_size_aligned_fs = - Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); + Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); // Define vertex layout for software shaders MakeSoftwareVertexLayout(); @@ -96,15 +89,31 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan default_sampler = CreateSampler(default_sampler_info); + const vk::Device device = instance.GetDevice(); + texture_lf_view = device.createBufferView({ + .buffer = texture_lf_buffer.Handle(), + .format = vk::Format::eR32G32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + texture_rg_view = device.createBufferView({ + .buffer = texture_buffer.Handle(), + .format = vk::Format::eR32G32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + texture_rgba_view = device.createBufferView({ + .buffer = texture_buffer.Handle(), + .format = vk::Format::eR32G32B32A32Sfloat, + .offset = 0, + .range = VK_WHOLE_SIZE, + }); + // Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize // all descriptor sets even the ones we don't use. Use default_texture for this - const u32 vs_uniform_size = sizeof(Pica::Shader::VSUniformData); - const u32 fs_uniform_size = sizeof(Pica::Shader::UniformData); - pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), 0, vs_uniform_size); - pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), vs_uniform_size, fs_uniform_size); - pipeline_cache.BindTexelBuffer(2, texture_lf_buffer.GetView()); - pipeline_cache.BindTexelBuffer(3, texture_buffer.GetView(0)); - pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1)); + pipeline_cache.BindTexelBuffer(2, texture_lf_view); + pipeline_cache.BindTexelBuffer(3, texture_rg_view); + pipeline_cache.BindTexelBuffer(4, texture_rgba_view); for (u32 i = 0; i < 4; i++) { pipeline_cache.BindTexture(i, null_surface.GetImageView()); @@ -122,8 +131,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan RasterizerVulkan::~RasterizerVulkan() { scheduler.Finish(); - - vk::Device device = instance.GetDevice(); + const vk::Device device = instance.GetDevice(); for (auto& [key, sampler] : samplers) { device.destroySampler(sampler); @@ -134,6 +142,9 @@ RasterizerVulkan::~RasterizerVulkan() { } device.destroySampler(default_sampler); + device.destroyBufferView(texture_lf_view); + device.destroyBufferView(texture_rg_view); + device.destroyBufferView(texture_rgba_view); } void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading, @@ -189,7 +200,7 @@ void RasterizerVulkan::SyncFixedState() { void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max) { - auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size); + auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16); /** * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU @@ -262,11 +273,11 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi // Keep track of the binding offsets so we can bind the vertex buffer later binding_offsets[layout.binding_count++] = array_offset + buffer_offset; - buffer_offset += Common::AlignUp(data_size, 16); + buffer_offset += Common::AlignUp(data_size, 4); } binding_offsets[layout.binding_count] = array_offset + buffer_offset; - vertex_buffer.Commit(buffer_offset); + stream_buffer.Commit(buffer_offset); // Assign the rest of the attributes to the last binding SetupFixedAttribs(); @@ -283,7 +294,7 @@ void RasterizerVulkan::SetupFixedAttribs() { const auto& vertex_attributes = regs.pipeline.vertex_attributes; VertexLayout& layout = pipeline_info.vertex_layout; - auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f)); + auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0); // Reserve the last binding for fixed and default attributes // Place the default attrib at offset zero for easy access @@ -336,7 +347,7 @@ void RasterizerVulkan::SetupFixedAttribs() { binding.fixed.Assign(1); binding.stride.Assign(offset); - vertex_buffer.Commit(offset); + stream_buffer.Commit(offset); } MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128)); @@ -430,7 +441,7 @@ void RasterizerVulkan::SetupIndexArray() { regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + regs.pipeline.index_array.offset); - auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size); + auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2); if (index_u8 && !native_u8) { u16* index_ptr_u16 = reinterpret_cast(index_ptr); for (u32 i = 0; i < regs.pipeline.num_vertices; i++) { @@ -440,11 +451,11 @@ void RasterizerVulkan::SetupIndexArray() { std::memcpy(index_ptr, index_data, index_buffer_size); } - index_buffer.Commit(index_buffer_size); + stream_buffer.Commit(index_buffer_size); scheduler.Record([this, index_offset = index_offset, index_type = index_type](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type); + render_cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type); }); } @@ -759,13 +770,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { const u32 vertex_size = vertices * sizeof(HardwareVertex); // Copy vertex data - auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size); + auto [array_ptr, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex)); std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size); - vertex_buffer.Commit(vertex_size); + stream_buffer.Commit(vertex_size); scheduler.Record([this, vertices, base_vertex, offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset); + render_cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset); render_cmdbuf.draw(vertices, 1, base_vertex, 0); }); } @@ -787,11 +798,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface); } - static int submit_threshold = 50; + static int submit_threshold = 20; submit_threshold--; if (!submit_threshold) { - submit_threshold = 50; - scheduler.DispatchWork(); + submit_threshold = 20; + scheduler.Flush(); } return succeeded; @@ -1152,14 +1163,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info) return device.createFramebuffer(framebuffer_info); } -void RasterizerVulkan::FlushBuffers() { - vertex_buffer.Flush(); - uniform_buffer.Flush(); - index_buffer.Flush(); - texture_buffer.Flush(); - texture_lf_buffer.Flush(); -} - void RasterizerVulkan::SyncClipEnabled() { uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0; } @@ -1294,7 +1297,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { } std::size_t bytes_used = 0; - auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size); + auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); // Sync the lighting luts if (uniform_block_data.lighting_lut_dirty_any || invalidate) { @@ -1360,7 +1363,7 @@ void RasterizerVulkan::SyncAndUploadLUTs() { } std::size_t bytes_used = 0; - auto [buffer, offset, invalidate] = texture_buffer.Map(max_size); + auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap auto SyncProcTexValueLUT = @@ -1460,16 +1463,16 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { return; } - u32 used_bytes = 0; - const u32 uniform_size = static_cast(uniform_size_aligned_vs + uniform_size_aligned_fs); - auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size); + const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + auto [uniforms, offset, invalidate] = stream_buffer.Map(uniform_size, uniform_buffer_alignment); + u32 used_bytes = 0; if (sync_vs) { Pica::Shader::VSUniformData vs_uniforms; vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); - pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), offset + used_bytes, + pipeline_cache.BindBuffer(0, stream_buffer.Handle(), offset + used_bytes, sizeof(vs_uniforms)); used_bytes += static_cast(uniform_size_aligned_vs); } @@ -1478,13 +1481,13 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { std::memcpy(uniforms + used_bytes, &uniform_block_data.data, sizeof(Pica::Shader::UniformData)); - pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), offset + used_bytes, + pipeline_cache.BindBuffer(1, stream_buffer.Handle(), offset + used_bytes, sizeof(uniform_block_data.data)); uniform_block_data.dirty = false; used_bytes += static_cast(uniform_size_aligned_fs); } - uniform_buffer.Commit(used_bytes); + stream_buffer.Commit(used_bytes); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index cdc806af5..71b0f50f4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -7,6 +7,7 @@ #include "core/hw/gpu.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" @@ -101,9 +102,6 @@ public: /// Sync fixed function pipeline state void SyncFixedState(); - /// Flushes all rasterizer owned buffers - void FlushBuffers(); - private: void NotifyFixedFunctionPicaRegisterChanged(u32 id) override; @@ -201,16 +199,17 @@ private: SamplerInfo texture_cube_sampler; std::unordered_map samplers; std::unordered_map framebuffers; - - StreamBuffer vertex_buffer; - StreamBuffer uniform_buffer; - StreamBuffer index_buffer; - StreamBuffer texture_buffer; - StreamBuffer texture_lf_buffer; PipelineInfo pipeline_info; - std::size_t uniform_buffer_alignment; - std::size_t uniform_size_aligned_vs; - std::size_t uniform_size_aligned_fs; + + StreamBuffer stream_buffer; ///< Vertex+Index+Uniform buffer + StreamBuffer texture_buffer; ///< Texture buffer + StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer + vk::BufferView texture_lf_view; + vk::BufferView texture_rg_view; + vk::BufferView texture_rgba_view; + u64 uniform_buffer_alignment; + u64 uniform_size_aligned_vs; + u64 uniform_size_aligned_fs; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 795da61fc..9e5ff76d1 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -26,10 +26,9 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, last = nullptr; } -Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, - RendererVulkan& renderer) - : instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer}, - master_semaphore{instance}, command_pool{instance, master_semaphore}, stop_requested{false}, +Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache) + : instance{instance}, renderpass_cache{renderpass_cache}, master_semaphore{instance}, + command_pool{instance, master_semaphore}, stop_requested{false}, use_worker_thread{Settings::values.async_command_recording} { AllocateWorkerCommandBuffers(); if (use_worker_thread) { @@ -133,10 +132,9 @@ void Scheduler::AllocateWorkerCommandBuffers() { MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255)); void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { - const auto handle = master_semaphore.Handle(); + const vk::Semaphore handle = master_semaphore.Handle(); const u64 signal_value = master_semaphore.NextTick(); state = StateFlags::AllDirty; - renderer.FlushBuffers(); renderpass_cache.ExitRenderpass(); Record([signal_semaphore, wait_semaphore, handle, signal_value, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 13357ca34..aab35bedc 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -28,14 +28,12 @@ DECLARE_ENUM_FLAG_OPERATORS(StateFlags) class Instance; class RenderpassCache; -class RendererVulkan; /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class Scheduler { public: - explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, - RendererVulkan& renderer); + explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache); ~Scheduler(); /// Sends the current execution context to the GPU. @@ -198,7 +196,6 @@ private: private: const Instance& instance; RenderpassCache& renderpass_cache; - RendererVulkan& renderer; MasterSemaphore master_semaphore; CommandPool command_pool; std::unique_ptr chunk; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 84c9aeab9..a7772873c 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -1,243 +1,155 @@ -// Copyright 2022 Citra Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include +#include #include "common/alignment.h" #include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" -#include - namespace Vulkan { -[[nodiscard]] vk::AccessFlags MakeAccessFlags(vk::BufferUsageFlagBits usage) { - switch (usage) { - case vk::BufferUsageFlagBits::eVertexBuffer: - return vk::AccessFlagBits::eVertexAttributeRead; - case vk::BufferUsageFlagBits::eIndexBuffer: - return vk::AccessFlagBits::eIndexRead; - case vk::BufferUsageFlagBits::eUniformBuffer: - return vk::AccessFlagBits::eUniformRead; - case vk::BufferUsageFlagBits::eUniformTexelBuffer: - return vk::AccessFlagBits::eShaderRead; - default: - LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage); - UNREACHABLE(); +namespace { + +constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; +constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; + +/// Find a memory type with the passed requirements +std::optional FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, + vk::MemoryPropertyFlags wanted, + u32 filter = std::numeric_limits::max()) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + const auto flags = properties.memoryTypes[i].propertyFlags; + if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) { + return i; + } } - return vk::AccessFlagBits::eNone; + return std::nullopt; } -[[nodiscard]] vk::PipelineStageFlags MakePipelineStage(vk::BufferUsageFlagBits usage) { - switch (usage) { - case vk::BufferUsageFlagBits::eVertexBuffer: - return vk::PipelineStageFlagBits::eVertexInput; - case vk::BufferUsageFlagBits::eIndexBuffer: - return vk::PipelineStageFlagBits::eVertexInput; - case vk::BufferUsageFlagBits::eUniformBuffer: - return vk::PipelineStageFlagBits::eVertexShader | - vk::PipelineStageFlagBits::eFragmentShader; - case vk::BufferUsageFlagBits::eUniformTexelBuffer: - return vk::PipelineStageFlagBits::eFragmentShader; - default: - LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage); - UNREACHABLE(); +/// Get the preferred host visible memory type. +u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, bool readback, + u32 filter = std::numeric_limits::max()) { + // Prefer device local host visible allocations. Both AMD and Nvidia now provide one. + // Otherwise search for a host visible allocation. + const vk::MemoryPropertyFlags HOST_MEMORY = + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent; + const vk::MemoryPropertyFlags DYNAMIC_MEMORY = + HOST_MEMORY | (readback ? vk::MemoryPropertyFlagBits::eHostCached + : vk::MemoryPropertyFlagBits::eDeviceLocal); + + std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY); + if (!preferred_type) { + preferred_type = FindMemoryType(properties, HOST_MEMORY); + ASSERT_MSG(preferred_type, "No host visible and coherent memory type found"); } - return vk::PipelineStageFlagBits::eNone; + return preferred_type.value_or(0); } -StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback) - : instance{instance} { - const vk::BufferUsageFlags usage = - readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc; - const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage}; +} // Anonymous namespace - const VmaAllocationCreateFlags flags = - readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT - : VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; - const VmaAllocationCreateInfo alloc_create_info = { - .flags = flags | VMA_ALLOCATION_CREATE_MAPPED_BIT, - .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST, - }; - - VkBuffer unsafe_buffer = VK_NULL_HANDLE; - VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); - VmaAllocationInfo alloc_info; - VmaAllocator allocator = instance.GetAllocator(); - - vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation, - &alloc_info); - - buffer = vk::Buffer{unsafe_buffer}; - mapped = std::span{reinterpret_cast(alloc_info.pMappedData), size}; -} - -StagingBuffer::~StagingBuffer() { - vmaDestroyBuffer(instance.GetAllocator(), static_cast(buffer), allocation); -} - -StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback) - : instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, total_size{size}, - bucket_size{size / BUCKET_COUNT}, readback{readback} {} - -StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, - vk::BufferUsageFlagBits usage, std::span view_formats, - bool readback) - : instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, usage{usage}, - total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} { - - const vk::BufferCreateInfo buffer_info = { - .size = total_size, - .usage = usage | vk::BufferUsageFlagBits::eTransferDst, - }; - - const VmaAllocationCreateInfo alloc_create_info = { - .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, - }; - - VkBuffer unsafe_buffer = VK_NULL_HANDLE; - VkBufferCreateInfo unsafe_buffer_info = static_cast(buffer_info); - VmaAllocationInfo alloc_info; - VmaAllocator allocator = instance.GetAllocator(); - - vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation, - &alloc_info); - - gpu_buffer = vk::Buffer{unsafe_buffer}; - - ASSERT(view_formats.size() < MAX_BUFFER_VIEWS); - - vk::Device device = instance.GetDevice(); - for (std::size_t i = 0; i < view_formats.size(); i++) { - const vk::BufferViewCreateInfo view_info = { - .buffer = gpu_buffer, - .format = view_formats[i], - .offset = 0, - .range = total_size, - }; - - views[i] = device.createBufferView(view_info); - } - - view_count = view_formats.size(); +StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_, + vk::BufferUsageFlags usage_, u64 size, bool readback_) + : instance{instance_}, scheduler{scheduler_}, usage{usage_}, readback{readback_} { + CreateBuffers(size); + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } StreamBuffer::~StreamBuffer() { - if (gpu_buffer) { - vk::Device device = instance.GetDevice(); - vmaDestroyBuffer(instance.GetAllocator(), static_cast(gpu_buffer), allocation); - for (std::size_t i = 0; i < view_count; i++) { - device.destroyBufferView(views[i]); - } - } + const vk::Device device = instance.GetDevice(); + device.unmapMemory(memory); + device.destroyBuffer(buffer); + device.freeMemory(memory); } -std::tuple StreamBuffer::Map(u32 size) { - ASSERT(size <= total_size); - size = Common::AlignUp(size, 16); +std::tuple StreamBuffer::Map(u64 size, u64 alignment) { + ASSERT(size <= stream_buffer_size); + mapped_size = size; - Bucket& bucket = buckets[bucket_index]; - - if (bucket.cursor + size > bucket_size) { - bucket.gpu_tick = scheduler.CurrentTick(); - MoveNextBucket(); - return Map(size); + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); } - const bool invalidate = std::exchange(bucket.invalid, false); - const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor; - u8* mapped = reinterpret_cast(staging.mapped.data() + buffer_offset); + WaitPendingOperations(offset); - return std::make_tuple(mapped, buffer_offset, invalidate); + bool invalidate{false}; + if (offset + size > stream_buffer_size) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidate = true; + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; + offset = 0; + + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; + } + + return std::make_tuple(mapped + offset, offset, invalidate); } -void StreamBuffer::Commit(u32 size) { - size = Common::AlignUp(size, 16); - buckets[bucket_index].cursor += size; +void StreamBuffer::Commit(u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size, + size); + + offset += size; + + if (current_watch_cursor + 1 >= current_watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); + } + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.tick = scheduler.CurrentTick(); } -void StreamBuffer::Flush() { - if (readback) { - LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer"); +void StreamBuffer::CreateBuffers(u64 prefered_size) { + const vk::Device device = instance.GetDevice(); + const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties(); + const u32 preferred_type = GetMemoryType(memory_properties, readback); + const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; + + // Substract from the preferred heap size some bytes to avoid getting out of memory. + const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; + // As per DXVK's example, using `heap_size / 2` + const VkDeviceSize allocable_size = heap_size / 2; + buffer = device.createBuffer({ + .size = std::min(prefered_size, allocable_size), + .usage = usage, + }); + + const auto requirements = device.getBufferMemoryRequirements(buffer); + const u32 required_flags = requirements.memoryTypeBits; + stream_buffer_size = static_cast(requirements.size); + + memory = device.allocateMemory({ + .allocationSize = requirements.size, + .memoryTypeIndex = GetMemoryType(memory_properties, required_flags), + }); + + device.bindBufferMemory(buffer, memory, 0); + mapped = reinterpret_cast(device.mapMemory(memory, 0, VK_WHOLE_SIZE)); +} + +void StreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { return; } - - Bucket& bucket = buckets[bucket_index]; - const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor; - const u32 flush_size = bucket.cursor - bucket.flush_cursor; - ASSERT(flush_size <= bucket_size); - ASSERT(flush_start + flush_size <= total_size); - - // Ensure all staging writes are visible to the host memory domain - if (flush_size > 0) [[likely]] { - VmaAllocator allocator = instance.GetAllocator(); - vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size); - if (gpu_buffer) { - scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, - vk::CommandBuffer upload_cmdbuf) { - const vk::BufferCopy copy_region = { - .srcOffset = flush_start, - .dstOffset = flush_start, - .size = flush_size, - }; - - upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region); - - const vk::BufferMemoryBarrier buffer_barrier = { - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = MakeAccessFlags(usage), - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = gpu_buffer, - .offset = flush_start, - .size = flush_size, - }; - - upload_cmdbuf.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, MakePipelineStage(usage), - vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {}); - }); - } - bucket.flush_cursor += flush_size; + while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + scheduler.Wait(watch.tick); + ++wait_cursor; } } -void StreamBuffer::Invalidate() { - if (!readback) { - return; - } - - Bucket& bucket = buckets[bucket_index]; - const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor; - const u32 flush_size = bucket.cursor - bucket.flush_cursor; - ASSERT(flush_size <= bucket_size); - - if (flush_size > 0) [[likely]] { - // Ensure the staging memory can be read by the host - VmaAllocator allocator = instance.GetAllocator(); - vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size); - bucket.flush_cursor += flush_size; - } -} - -void StreamBuffer::MoveNextBucket() { - // Flush and Invalidate are bucket local operations for simplicity so perform them here - if (readback) { - Invalidate(); - } else { - Flush(); - } - - bucket_index = (bucket_index + 1) % BUCKET_COUNT; - Bucket& next_bucket = buckets[bucket_index]; - scheduler.Wait(next_bucket.gpu_tick); - next_bucket.cursor = 0; - next_bucket.flush_cursor = 0; - next_bucket.invalid = true; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index eb329478d..f95797dc7 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -1,101 +1,81 @@ -// Copyright 2022 Citra Emulator Project +// Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once -#include -#include +#include #include -#include "common/assert.h" +#include +#include #include "video_core/renderer_vulkan/vk_common.h" -VK_DEFINE_HANDLE(VmaAllocation) - namespace Vulkan { class Instance; class Scheduler; -struct StagingBuffer { - StagingBuffer(const Instance& instance, u32 size, bool readback); - ~StagingBuffer(); - - const Instance& instance; - vk::Buffer buffer{}; - VmaAllocation allocation{}; - std::span mapped{}; -}; - -class StreamBuffer { - static constexpr u32 MAX_BUFFER_VIEWS = 3; - static constexpr u32 BUCKET_COUNT = 2; +class StreamBuffer final { + static constexpr std::size_t MAX_BUFFER_VIEWS = 3; public: - /// Staging only constructor - StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback = false); - /// Staging + GPU streaming constructor - StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, - vk::BufferUsageFlagBits usage, std::span views, - bool readback = false); + explicit StreamBuffer(const Instance& instance, Scheduler& scheduler, + vk::BufferUsageFlags usage, u64 size, bool readback = false); ~StreamBuffer(); - StreamBuffer(const StreamBuffer&) = delete; - StreamBuffer& operator=(const StreamBuffer&) = delete; + /** + * Reserves a region of memory from the stream buffer. + * @param size Size to reserve. + * @returns A pair of a raw memory pointer (with offset added), and the buffer offset + */ + std::tuple Map(u64 size, u64 alignment); - /// Maps aligned staging memory of size bytes - std::tuple Map(u32 size); + /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. + void Commit(u64 size); - /// Commits size bytes from the currently mapped staging memory - void Commit(u32 size = 0); - - /// Flushes staging memory to the GPU buffer - void Flush(); - - /// Invalidates staging memory for reading - void Invalidate(); - - /// Returns the GPU buffer handle - [[nodiscard]] vk::Buffer GetHandle() const { - return gpu_buffer; + vk::Buffer Handle() const noexcept { + return buffer; } - /// Returns the staging buffer handle - [[nodiscard]] vk::Buffer GetStagingHandle() const { - return staging.buffer; - } - - /// Returns an immutable reference to the requested buffer view - [[nodiscard]] const vk::BufferView& GetView(u32 index = 0) const { - ASSERT(index < view_count); - return views[index]; + u64 Address() const noexcept { + return 0; } private: - /// Moves to the next bucket - void MoveNextBucket(); - - struct Bucket { - bool invalid = false; - u32 gpu_tick = 0; - u32 cursor = 0; - u32 flush_cursor = 0; + struct Watch { + u64 tick{}; + u64 upper_bound{}; }; + /// Creates Vulkan buffer handles committing the required the required memory. + void CreateBuffers(u64 prefered_size); + + /// Increases the amount of watches available. + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + void WaitPendingOperations(u64 requested_upper_bound); + private: - const Instance& instance; - Scheduler& scheduler; - StagingBuffer staging; - vk::Buffer gpu_buffer{}; - VmaAllocation allocation{}; - vk::BufferUsageFlagBits usage; - std::array views{}; - std::array buckets; - std::size_t view_count = 0; - u32 total_size = 0; - u32 bucket_size = 0; - u32 bucket_index = 0; - bool readback = false; + const Instance& instance; ///< Vulkan instance. + Scheduler& scheduler; ///< Command scheduler. + + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. + u8* mapped{}; ///< Pointer to the mapped memory + u64 stream_buffer_size{}; ///< Stream buffer size. + vk::BufferUsageFlags usage{}; + bool readback{}; ///< Flag indicating if the buffer should use cached memory + + u64 offset{}; ///< Buffer iterator. + u64 mapped_size{}; ///< Size reserved for the current copy. + + std::vector current_watches; ///< Watches recorded in the current iteration. + std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. + std::optional invalidation_mark; ///< Number of watches used in the previous cycle. + + std::vector previous_watches; ///< Watches used in the previous iteration. + std::size_t wait_cursor{}; ///< Last watch being waited for completion. + u64 wait_bound{}; ///< Highest offset being watched for completion. }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 7c4b53c6a..cae2ff03c 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -103,16 +103,16 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { return depth_offset; } -constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024; -constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr u64 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager}, - upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE}, download_buffer{instance, scheduler, - DOWNLOAD_BUFFER_SIZE, - true} { + upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE}, + download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst, + DOWNLOAD_BUFFER_SIZE, true} { auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { @@ -153,25 +153,20 @@ TextureRuntime::~TextureRuntime() { StagingData TextureRuntime::FindStaging(u32 size, bool upload) { auto& buffer = upload ? upload_buffer : download_buffer; - auto [data, offset, invalidate] = buffer.Map(size); + auto [data, offset, invalidate] = buffer.Map(size, 4); return StagingData{ - .buffer = buffer.GetStagingHandle(), + .buffer = buffer.Handle(), .size = size, .mapped = std::span{reinterpret_cast(data), size}, .buffer_offset = offset, }; } -void TextureRuntime::FlushBuffers() { - upload_buffer.Flush(); -} - MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235)); void TextureRuntime::Finish() { MICROPROFILE_SCOPE(Vulkan_Finish); scheduler.Finish(); - download_buffer.Invalidate(); } ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, @@ -415,7 +410,8 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea }; if (clear.texture_rect == surface.GetScaledRect()) { - scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf, + vk::CommandBuffer) { const vk::ImageSubresourceRange range = { .aspectMask = params.aspect, .baseMipLevel = clear.texture_level, @@ -458,20 +454,25 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea }, }; - render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + render_cmdbuf.pipelineBarrier(params.pipeline_flags, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); - const bool is_color = static_cast(params.aspect & vk::ImageAspectFlagBits::eColor); + const bool is_color = + static_cast(params.aspect & vk::ImageAspectFlagBits::eColor); if (is_color) { - render_cmdbuf.clearColorImage(params.src_image, vk::ImageLayout::eTransferDstOptimal, + render_cmdbuf.clearColorImage(params.src_image, + vk::ImageLayout::eTransferDstOptimal, MakeClearColorValue(value), range); } else { - render_cmdbuf.clearDepthStencilImage(params.src_image, vk::ImageLayout::eTransferDstOptimal, + render_cmdbuf.clearDepthStencilImage(params.src_image, + vk::ImageLayout::eTransferDstOptimal, MakeClearDepthStencilValue(value), range); } - render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags, - vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier); + render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, + params.pipeline_flags, vk::DependencyFlagBits::eByRegion, + {}, {}, post_barrier); }); return true; } @@ -528,34 +529,34 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface, .src_image = surface.alloc.image, }; - scheduler.Record( - [params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - const vk::ImageMemoryBarrier pre_barrier = { - .srcAccessMask = params.src_access, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - .oldLayout = vk::ImageLayout::eGeneral, - .newLayout = vk::ImageLayout::eGeneral, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = params.src_image, - .subresourceRange{ - .aspectMask = params.aspect, - .baseMipLevel = level, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; + scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf, + vk::CommandBuffer) { + const vk::ImageMemoryBarrier pre_barrier = { + .srcAccessMask = params.src_access, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + .oldLayout = vk::ImageLayout::eGeneral, + .newLayout = vk::ImageLayout::eGeneral, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = params.src_image, + .subresourceRange{ + .aspectMask = params.aspect, + .baseMipLevel = level, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; - render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, - vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); - }); + render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer, + vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier); + }); renderpass_cache.EnterRenderpass(clear_info); renderpass_cache.ExitRenderpass(); - scheduler.Record([params, level = clear.texture_level] - (vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf, + vk::CommandBuffer) { const vk::ImageMemoryBarrier post_barrier = { .srcAccessMask = vk::AccessFlagBits::eTransferWrite, .dstAccessMask = params.src_access, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 93137f691..f241dfb82 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -15,13 +15,15 @@ #include "video_core/renderer_vulkan/vk_layout_tracker.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +VK_DEFINE_HANDLE(VmaAllocation) + namespace Vulkan { struct StagingData { vk::Buffer buffer; u32 size = 0; std::span mapped{}; - u32 buffer_offset = 0; + u64 buffer_offset = 0; }; struct ImageAlloc { @@ -127,9 +129,6 @@ public: /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(Surface& surface, u32 max_level); - /// Flushes staging buffers - void FlushBuffers(); - /// Returns all source formats that support reinterpretation to the dest format [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( VideoCore::PixelFormat dest_format) const;