From a657ef33e6dd51768ad7e6d5a92f2814ff1bd6f8 Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Mon, 13 Jun 2022 16:34:19 +0300 Subject: [PATCH] More fixes --- src/citra_qt/main.cpp | 16 -- .../renderer_vulkan/renderer_vulkan.cpp | 65 ++++--- .../renderer_vulkan/renderer_vulkan.h | 2 +- src/video_core/renderer_vulkan/vk_buffer.cpp | 74 ++++--- src/video_core/renderer_vulkan/vk_buffer.h | 10 +- .../renderer_vulkan/vk_instance.cpp | 15 +- src/video_core/renderer_vulkan/vk_instance.h | 2 +- .../renderer_vulkan/vk_pipeline_builder.cpp | 25 ++- .../renderer_vulkan/vk_pipeline_builder.h | 5 +- .../renderer_vulkan/vk_rasterizer.cpp | 108 ++++------- .../renderer_vulkan/vk_rasterizer.h | 5 +- .../renderer_vulkan/vk_rasterizer_cache.cpp | 55 +++--- .../renderer_vulkan/vk_shader_gen.cpp | 2 +- .../renderer_vulkan/vk_shader_state.h | 2 +- src/video_core/renderer_vulkan/vk_state.cpp | 183 +++++++++++++++--- src/video_core/renderer_vulkan/vk_state.h | 58 +++++- .../renderer_vulkan/vk_swapchain.cpp | 5 +- .../renderer_vulkan/vk_task_scheduler.cpp | 84 ++++++-- .../renderer_vulkan/vk_task_scheduler.h | 21 +- src/video_core/renderer_vulkan/vk_texture.cpp | 45 ++--- src/video_core/renderer_vulkan/vk_texture.h | 4 +- 21 files changed, 526 insertions(+), 260 deletions(-) diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index bb0bedf8d..314bb5f8e 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -917,15 +916,6 @@ bool GMainWindow::LoadROM(const QString& filename) { Frontend::ScopeAcquireContext scope(*render_window); - const QString below_gl33_title = tr("OpenGL 3.3 Unsupported"); - const QString below_gl33_message = tr("Your GPU may not support OpenGL 3.3, or you do not " - "have the latest graphics driver."); - - if (!QOpenGLContext::globalShareContext()->versionFunctions()) { - QMessageBox::critical(this, below_gl33_title, below_gl33_message); - return false; - } - Core::System& system{Core::System::GetInstance()}; const Core::System::ResultStatus result{system.Load(*render_window, filename.toStdString())}; @@ -992,10 +982,6 @@ bool GMainWindow::LoadROM(const QString& filename) { "proper drivers for your graphics card from the manufacturer's website.")); break; - case Core::System::ResultStatus::ErrorVideoCore_ErrorBelowGL33: - QMessageBox::critical(this, below_gl33_title, below_gl33_message); - break; - default: QMessageBox::critical( this, tr("Error while loading ROM!"), @@ -2448,8 +2434,6 @@ int main(int argc, char* argv[]) { std::string bin_path = FileUtil::GetBundleDirectory() + DIR_SEP + ".."; chdir(bin_path.c_str()); #endif - QCoreApplication::setAttribute(Qt::AA_DontCheckOpenGLContextThreadAffinity); - QCoreApplication::setAttribute(Qt::AA_ShareOpenGLContexts); QApplication app(argc, argv); // Qt changes the locale and causes issues in float conversion using std::to_string() when diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 605770915..ceaa6474d 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -294,7 +294,8 @@ void RendererVulkan::ConfigureFramebufferTexture(ScreenInfo& screen, const GPU:: .type = vk::ImageType::e2D, .view_type = vk::ImageViewType::e2D, .usage = vk::ImageUsageFlagBits::eColorAttachment | - vk::ImageUsageFlagBits::eTransferDst + vk::ImageUsageFlagBits::eTransferDst | + vk::ImageUsageFlagBits::eSampled }; switch (format) { @@ -327,7 +328,9 @@ void RendererVulkan::ConfigureFramebufferTexture(ScreenInfo& screen, const GPU:: auto& texture = screen.texture; texture.Destroy(); texture.Create(texture_info); - texture.Transition(vk::ImageLayout::eShaderReadOnlyOptimal); + + auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); + texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); } /** @@ -337,18 +340,16 @@ void RendererVulkan::ConfigureFramebufferTexture(ScreenInfo& screen, const GPU:: void RendererVulkan::DrawSingleScreenRotated(const ScreenInfo& screen_info, float x, float y, float w, float h) { const auto& texcoords = screen_info.display_texcoords; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - auto& state = VulkanState::Get(); - state.EndRendering(); - - const std::array vertices = {{ + const std::array vertices{ ScreenRectVertex(x, y, texcoords.bottom, texcoords.left), ScreenRectVertex(x + w, y, texcoords.bottom, texcoords.right), ScreenRectVertex(x, y + h, texcoords.top, texcoords.left), ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.right), - }}; - cmdbuffer.updateBuffer(vertex_buffer.GetBuffer(), 0, sizeof(vertices), vertices.data()); + }; + + auto data = std::as_bytes(std::span(vertices)); + vertex_buffer.Upload(data, 0); // As this is the "DrawSingleScreenRotated" function, the output resolution dimensions have been // swapped. If a non-rotated draw-screen function were to be added for book-mode games, those @@ -362,11 +363,14 @@ void RendererVulkan::DrawSingleScreenRotated(const ScreenInfo& screen_info, floa draw_info.o_resolution = glm::vec4{h, w, 1.0f / h, 1.0f / w}; auto& image = swapchain->GetCurrentImage(); - state.BeginRendering(image, std::nullopt, clear_color, vk::AttachmentLoadOp::eClear); + auto& state = VulkanState::Get(); + + state.BeginRendering(image, std::nullopt, false, clear_color, vk::AttachmentLoadOp::eClear); state.SetPresentData(draw_info); state.SetPresentTexture(*screen_info.display_texture); state.ApplyPresentState(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); cmdbuffer.bindVertexBuffers(0, vertex_buffer.GetBuffer(), {0}); cmdbuffer.draw(4, 1, 0, 0); } @@ -374,18 +378,16 @@ void RendererVulkan::DrawSingleScreenRotated(const ScreenInfo& screen_info, floa void RendererVulkan::DrawSingleScreen(const ScreenInfo& screen_info, float x, float y, float w, float h) { const auto& texcoords = screen_info.display_texcoords; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - auto& state = VulkanState::Get(); - state.EndRendering(); - - const std::array vertices = {{ + const std::array vertices{ ScreenRectVertex(x, y, texcoords.bottom, texcoords.right), ScreenRectVertex(x + w, y, texcoords.top, texcoords.right), ScreenRectVertex(x, y + h, texcoords.bottom, texcoords.left), ScreenRectVertex(x + w, y + h, texcoords.top, texcoords.left), - }}; - cmdbuffer.updateBuffer(vertex_buffer.GetBuffer(), 0, sizeof(vertices), vertices.data()); + }; + + auto data = std::as_bytes(std::span(vertices)); + vertex_buffer.Upload(data, 0); const u16 scale_factor = VideoCore::GetResolutionScaleFactor(); auto [width, height] = screen_info.texture.GetArea().extent; @@ -397,11 +399,14 @@ void RendererVulkan::DrawSingleScreen(const ScreenInfo& screen_info, float x, fl draw_info.o_resolution = glm::vec4{h, w, 1.0f / h, 1.0f / w}; auto& image = swapchain->GetCurrentImage(); - state.BeginRendering(image, std::nullopt, clear_color, vk::AttachmentLoadOp::eClear); + auto& state = VulkanState::Get(); + + state.BeginRendering(image, std::nullopt, false, clear_color, vk::AttachmentLoadOp::eClear); state.SetPresentData(draw_info); state.SetPresentTexture(*screen_info.display_texture); state.ApplyPresentState(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); cmdbuffer.bindVertexBuffers(0, vertex_buffer.GetBuffer(), {0}); cmdbuffer.draw(4, 1, 0, 0); } @@ -658,17 +663,18 @@ void RendererVulkan::SwapBuffers() { bool RendererVulkan::BeginPresent() { swapchain->AcquireNextImage(); - // Swap chain images start in undefined auto& image = swapchain->GetCurrentImage(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); + + // Swap chain images start in undefined image.OverrideImageLayout(vk::ImageLayout::eUndefined); - image.Transition(vk::ImageLayout::eColorAttachmentOptimal); + image.Transition(cmdbuffer, vk::ImageLayout::eColorAttachmentOptimal); // Update viewport and scissor const auto [width, height] = image.GetArea().extent; const vk::Viewport viewport{0.0f, 0.0f, static_cast(width), static_cast(height), 0.0f, 1.0f}; const vk::Rect2D scissor{{0, 0}, {width, height}}; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); cmdbuffer.setViewport(0, viewport); cmdbuffer.setScissor(0, scissor); @@ -680,7 +686,8 @@ void RendererVulkan::EndPresent() { state.EndRendering(); auto& image = swapchain->GetCurrentImage(); - image.Transition(vk::ImageLayout::ePresentSrcKHR); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); + image.Transition(cmdbuffer, vk::ImageLayout::ePresentSrcKHR); g_vk_task_scheduler->Submit(false, true, swapchain.get()); } @@ -688,7 +695,7 @@ void RendererVulkan::EndPresent() { /// Initialize the renderer VideoCore::ResultStatus RendererVulkan::Init() { // Create vulkan instance - vk::ApplicationInfo app_info{"Citra", VK_MAKE_VERSION(1, 0, 0), nullptr, 0, VK_API_VERSION_1_2}; + vk::ApplicationInfo app_info{"Citra", VK_MAKE_VERSION(1, 0, 0), nullptr, 0, VK_API_VERSION_1_3}; // Get required extensions auto extensions = RequiredExtensions(render_window.GetWindowInfo().type, true); @@ -706,17 +713,17 @@ VideoCore::ResultStatus RendererVulkan::Init() { auto surface = CreateSurface(instance, render_window); g_vk_instace = std::make_unique(); g_vk_task_scheduler = std::make_unique(); - g_vk_instace->Create(instance, physical_devices[1], surface, true); + g_vk_instace->Create(instance, physical_devices[0], surface, true); g_vk_task_scheduler->Create(); - // Create Vulkan state - VulkanState::Create(); - g_vk_task_scheduler->BeginTask(); - auto& layout = render_window.GetFramebufferLayout(); - swapchain = std::make_unique(surface); + swapchain = std::make_shared(surface); swapchain->Create(layout.width, layout.height, false); + // Create Vulkan state + VulkanState::Create(swapchain); + g_vk_task_scheduler->BeginTask(); + auto& telemetry_session = Core::System::GetInstance().TelemetrySession(); constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA"); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 745849150..56b788d64 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -75,7 +75,7 @@ private: /// Display information for top and bottom screens respectively std::array screen_infos; - std::unique_ptr swapchain; + std::shared_ptr swapchain; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_buffer.cpp b/src/video_core/renderer_vulkan/vk_buffer.cpp index a4818e762..ce7287aa2 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer.cpp @@ -59,7 +59,7 @@ void VKBuffer::Destroy() { device.destroyBuffer(buffer); device.freeMemory(buffer_memory); - for (int i = 0; i < view_count; i++) { + for (u32 i = 0; i < view_count; i++) { device.destroyBufferView(views[i]); } }; @@ -68,24 +68,6 @@ void VKBuffer::Destroy() { } } -void VKBuffer::CopyBuffer(const VKBuffer& src_buffer, const VKBuffer& dst_buffer, vk::BufferCopy region, vk::AccessFlags access_to_block) { - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - command_buffer.copyBuffer(src_buffer.buffer, dst_buffer.buffer, region); - - vk::BufferMemoryBarrier barrier{ - vk::AccessFlagBits::eTransferWrite, access_to_block, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - dst_buffer.buffer, region.dstOffset, region.size - }; - - // Add a pipeline barrier for the region modified - command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eVertexShader | - vk::PipelineStageFlagBits::eFragmentShader, - vk::DependencyFlagBits::eByRegion, - 0, nullptr, 1, &barrier, 0, nullptr); -} - u32 VKBuffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties) { vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties(); @@ -100,6 +82,40 @@ u32 VKBuffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties UNREACHABLE(); } +void VKBuffer::Upload(std::span data, u32 offset, + vk::AccessFlags access_to_block, + vk::PipelineStageFlags stage_to_block) { + auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); + // For small data uploads use vkCmdUpdateBuffer + if (data.size_bytes() < 1024) { + cmdbuffer.updateBuffer(buffer, 0, data.size_bytes(), data.data()); + } + else { + auto [ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(data.size()); + if (!ptr) { + LOG_ERROR(Render_Vulkan, "Cannot upload data without staging buffer!"); + } + + // Copy pixels to staging buffer + std::memcpy(ptr, data.data(), data.size_bytes()); + + auto region = vk::BufferCopy{staging_offset, offset, data.size_bytes()}; + auto& staging = g_vk_task_scheduler->GetStaging(); + cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, region); + } + + vk::BufferMemoryBarrier barrier{ + vk::AccessFlagBits::eTransferWrite, access_to_block, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + buffer, offset, data.size_bytes() + }; + + // Add a pipeline barrier for the region modified + cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block, + vk::DependencyFlagBits::eByRegion, + 0, nullptr, 1, &barrier, 0, nullptr); +} + std::tuple StreamBuffer::Map(u32 size, u32 alignment) { ASSERT(size <= buffer_info.size); ASSERT(alignment <= buffer_info.size); @@ -120,11 +136,25 @@ std::tuple StreamBuffer::Map(u32 size, u32 alignment) { return std::make_tuple(staging_ptr + buffer_pos, buffer_pos, invalidate); } -void StreamBuffer::Commit(u32 size) { - auto& staging = g_vk_task_scheduler->GetStaging(); +void StreamBuffer::Commit(u32 size, vk::AccessFlags access_to_block, + vk::PipelineStageFlags stage_to_block) { mapped_chunk.size = size; - VKBuffer::CopyBuffer(staging, *this, mapped_chunk); + auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); + auto& staging = g_vk_task_scheduler->GetStaging(); + cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, mapped_chunk); + + vk::BufferMemoryBarrier barrier{ + vk::AccessFlagBits::eTransferWrite, access_to_block, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, + buffer, mapped_chunk.srcOffset, mapped_chunk.size + }; + + // Add a pipeline barrier for the region modified + cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block, + vk::DependencyFlagBits::eByRegion, + 0, nullptr, 1, &barrier, 0, nullptr); + buffer_pos += size; } diff --git a/src/video_core/renderer_vulkan/vk_buffer.h b/src/video_core/renderer_vulkan/vk_buffer.h index 5bc2b0d00..02cfbfa6a 100644 --- a/src/video_core/renderer_vulkan/vk_buffer.h +++ b/src/video_core/renderer_vulkan/vk_buffer.h @@ -40,8 +40,6 @@ public: /// Global utility functions used by other objects static u32 FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties); - static void CopyBuffer(const VKBuffer& src_buffer, const VKBuffer& dst_buffer, vk::BufferCopy region, - vk::AccessFlags access_to_block = vk::AccessFlagBits::eUniformRead); /// Return a pointer to the mapped memory if the buffer is host mapped u8* GetHostPointer() const { return reinterpret_cast(host_ptr); } @@ -49,6 +47,10 @@ public: vk::Buffer GetBuffer() const { return buffer; } u32 GetSize() const { return buffer_info.size; } + void Upload(std::span data, u32 offset, + vk::AccessFlags access_to_block = vk::AccessFlagBits::eVertexAttributeRead, + vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexInput); + protected: Info buffer_info; vk::Buffer buffer; @@ -69,7 +71,9 @@ public: * The actual used size must be specified on unmapping the chunk. */ std::tuple Map(u32 size, u32 alignment = 0); - void Commit(u32 size); + void Commit(u32 size, vk::AccessFlags access_to_block = vk::AccessFlagBits::eUniformRead, + vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexShader | + vk::PipelineStageFlagBits::eFragmentShader); private: u32 buffer_pos{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index d31a31605..e503991cb 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -73,10 +73,10 @@ bool VKInstance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) { static constexpr float queue_priorities[] = {1.0f}; - std::array layers{"VK_LAYER_KHRONOS_validation"}; - std::array queue_infos{ - vk::DeviceQueueCreateInfo({}, graphics_queue_family_index, 1, queue_priorities), - vk::DeviceQueueCreateInfo({}, present_queue_family_index, 1, queue_priorities) + const std::array layers{"VK_LAYER_KHRONOS_validation"}; + const std::array queue_infos{ + vk::DeviceQueueCreateInfo{{}, graphics_queue_family_index, 1, queue_priorities}, + vk::DeviceQueueCreateInfo{{}, present_queue_family_index, 1, queue_priorities} }; vk::DeviceCreateInfo device_info({}, 1, queue_infos.data(), 0, nullptr, @@ -127,15 +127,14 @@ bool VKInstance::FindFeatures() { // Enable newer Vulkan features vk12_features.timelineSemaphore = true; - dynamic_rendering_features.dynamicRendering = true; + vk13_features.dynamicRendering = true; dynamic_state_features.extendedDynamicState = true; dynamic_state2_features.extendedDynamicState2 = true; dynamic_state2_features.extendedDynamicState2LogicOp = true; - dynamic_state2_features.extendedDynamicState2PatchControlPoints = true; // Include features in device creation - vk12_features.pNext = &dynamic_rendering_features; - dynamic_rendering_features.pNext = &dynamic_state_features; + vk12_features.pNext = &vk13_features; + vk13_features.pNext = &dynamic_state_features; dynamic_state_features.pNext = &dynamic_state2_features; features = vk::PhysicalDeviceFeatures2{vk_features, &vk12_features}; diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 37a55169a..7b2d8c5a9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -57,10 +57,10 @@ public: // Features per vulkan version vk::PhysicalDeviceFeatures vk_features{}; + vk::PhysicalDeviceVulkan13Features vk13_features{}; vk::PhysicalDeviceVulkan12Features vk12_features{}; vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state_features{}; vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state2_features{}; - vk::PhysicalDeviceDynamicRenderingFeatures dynamic_rendering_features{}; }; extern std::unique_ptr g_vk_instace; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp b/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp index 6a9caee07..043486656 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp @@ -215,7 +215,7 @@ void PipelineBuilder::SetNoBlendingState() { vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); } -void PipelineBuilder::SetDynamicStates(std::span states) { +void PipelineBuilder::SetDynamicStates(const std::span states) { if (states.size() > MAX_DYNAMIC_STATES) { LOG_ERROR(Render_Vulkan, "Cannot include more dynamic states!"); UNREACHABLE(); @@ -230,8 +230,29 @@ void PipelineBuilder::SetDynamicStates(std::span states) { return; } +void PipelineBuilder::SetRenderingFormats(vk::Format color, vk::Format depth_stencil) { + color_format = color; + depth_stencil_format = depth_stencil; + + auto IsStencil = [](vk::Format format) -> bool { + switch (format) { + case vk::Format::eD16UnormS8Uint: + case vk::Format::eD24UnormS8Uint: + case vk::Format::eD32SfloatS8Uint: + return true; + default: + return false; + }; + }; + + const u32 color_attachment_count = color == vk::Format::eUndefined ? 0 : 1; + rendering_info = vk::PipelineRenderingCreateInfoKHR{0, color_attachment_count, &color_format, depth_stencil_format, + IsStencil(depth_stencil) ? depth_stencil : vk::Format::eUndefined}; + pipeline_info.pNext = &rendering_info; +} + void PipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, float max_depth) { - viewport = vk::Viewport{ x, y, width, height, min_depth, max_depth }; + viewport = vk::Viewport{x, y, width, height, min_depth, max_depth}; viewport_state.pViewports = &viewport; viewport_state.viewportCount = 1; pipeline_info.pViewportState = &viewport_state; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_builder.h b/src/video_core/renderer_vulkan/vk_pipeline_builder.h index 8ad289d38..997971e66 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_builder.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_builder.h @@ -69,7 +69,8 @@ public: void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth); void SetScissorRect(s32 x, s32 y, u32 width, u32 height); - void SetDynamicStates(std::span states); + void SetDynamicStates(const std::span states); + void SetRenderingFormats(vk::Format color, vk::Format depth_stencil = vk::Format::eUndefined); private: static constexpr u32 MAX_DYNAMIC_STATES = 14; @@ -100,6 +101,8 @@ private: // Multisampling vk::PipelineMultisampleStateCreateInfo multisample_info; + vk::PipelineRenderingCreateInfo rendering_info; + vk::Format color_format, depth_stencil_format; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5ac185e08..074b97cb9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -239,7 +239,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { const bool has_stencil = regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; - const bool write_depth_fb = depth_test_enabled || (has_stencil && stencil_test_enabled); + const bool write_depth_fb = state.DepthTestEnabled() || (has_stencil && state.StencilTestEnabled()); const bool using_color_fb = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0; @@ -282,14 +282,10 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { viewport_rect_unscaled.bottom * res_scale, surfaces_rect.bottom, surfaces_rect.top))}; // Bottom - // Bind the framebuffer surfaces - state.BeginRendering(color_surface->texture, depth_surface->texture); - // Sync the viewport vk::Viewport viewport{0, 0, static_cast(viewport_rect_unscaled.GetWidth() * res_scale), static_cast(viewport_rect_unscaled.GetHeight() * res_scale)}; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - cmdbuffer.setViewport(0, viewport); + state.SetViewport(viewport); if (uniform_block_data.data.framebuffer_scale != res_scale) { uniform_block_data.data.framebuffer_scale = res_scale; @@ -359,41 +355,21 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { // outside of the framebuffer region vk::Rect2D scissor{vk::Offset2D(draw_rect.left, draw_rect.bottom), vk::Extent2D(draw_rect.GetHeight(), draw_rect.GetHeight())}; - cmdbuffer.setScissor(0, scissor); + state.SetScissor(scissor); - // Apply pending state + // Bind the framebuffer surfaces + state.BeginRendering(color_surface->texture, depth_surface->texture, true); state.ApplyRenderState(Pica::g_state.regs); + state.SetVertexBuffer(vertex_buffer, 0); - std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); - for (std::size_t base_vertex = 0; base_vertex < vertex_batch.size(); base_vertex += max_vertices) { - const std::size_t vertices = std::min(max_vertices, vertex_batch.size() - base_vertex); - const std::size_t vertex_size = vertices * sizeof(HardwareVertex); + ASSERT(vertex_batch.size() <= VERTEX_BUFFER_SIZE); - auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(vertex_size); - std::memcpy(buffer, vertex_batch.data() + base_vertex, vertex_size); + std::size_t vertices = vertex_batch.size(); + auto data = std::as_bytes(std::span(vertex_batch.data(), vertex_batch.size())); + vertex_buffer.Upload(data, 0); - // Copy the vertex data - auto& staging = g_vk_task_scheduler->GetStaging(); - vk::BufferCopy copy_region(offset, 0, vertex_size); - - state.SetVertexBuffer(vertex_buffer, offset); - cmdbuffer.copyBuffer(staging.GetBuffer(), vertex_buffer.GetBuffer(), copy_region); - - // Issue a pipeline barrier and draw command - vk::BufferMemoryBarrier barrier { - vk::AccessFlagBits::eTransferWrite, vk::AccessFlagBits::eVertexAttributeRead, - VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, - vertex_buffer.GetBuffer(), 0, vertex_size - }; - - // Add a pipeline barrier for each region modified - cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eVertexInput, - vk::DependencyFlagBits::eByRegion, - 0, nullptr, 1, &barrier, 0, nullptr); - - cmdbuffer.draw(vertices, 1, 0, 0); - } + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); + cmdbuffer.draw(vertices, 1, 0, 0); vertex_batch.clear(); @@ -1138,20 +1114,20 @@ void RasterizerVulkan::SyncClipCoef() { void RasterizerVulkan::SyncCullMode() { const auto& regs = Pica::g_state.regs; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); + auto& state = VulkanState::Get(); switch (regs.rasterizer.cull_mode) { case Pica::RasterizerRegs::CullMode::KeepAll: - cmdbuffer.setCullMode(vk::CullModeFlagBits::eNone); + state.SetCullMode(vk::CullModeFlagBits::eNone); break; case Pica::RasterizerRegs::CullMode::KeepClockWise: - cmdbuffer.setCullMode(vk::CullModeFlagBits::eBack); - cmdbuffer.setFrontFace(vk::FrontFace::eClockwise); + state.SetCullMode(vk::CullModeFlagBits::eBack); + state.SetFrontFace(vk::FrontFace::eClockwise); break; case Pica::RasterizerRegs::CullMode::KeepCounterClockWise: - cmdbuffer.setCullMode(vk::CullModeFlagBits::eBack); - cmdbuffer.setFrontFace(vk::FrontFace::eCounterClockwise); + state.SetCullMode(vk::CullModeFlagBits::eBack); + state.SetFrontFace(vk::FrontFace::eCounterClockwise); break; default: @@ -1200,10 +1176,9 @@ void RasterizerVulkan::SyncBlendFuncs() { void RasterizerVulkan::SyncBlendColor() { auto color = PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw); - auto blend_consts = std::array{color.r, color.g, color.b, color.a}; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - cmdbuffer.setBlendConstants(blend_consts.data()); + auto& state = VulkanState::Get(); + state.SetBlendCostants(color.r, color.g, color.b, color.a); } void RasterizerVulkan::SyncFogColor() { @@ -1274,27 +1249,25 @@ void RasterizerVulkan::SyncColorWriteMask() { void RasterizerVulkan::SyncStencilWriteMask() { const auto& regs = Pica::g_state.regs; - auto mask = ((regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) - ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) - : 0); - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - cmdbuffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, mask); + auto& state = VulkanState::Get(); + state.SetStencilWrite((regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0); } void RasterizerVulkan::SyncDepthWriteMask() { const auto& regs = Pica::g_state.regs; - bool enable = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && - regs.framebuffer.output_merger.depth_write_enable); - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - cmdbuffer.setDepthWriteEnable(enable); + auto& state = VulkanState::Get(); + state.SetDepthWrite(regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); } void RasterizerVulkan::SyncStencilTest() { const auto& regs = Pica::g_state.regs; - stencil_test_enabled = regs.framebuffer.output_merger.stencil_test.enable && + bool enabled = regs.framebuffer.output_merger.stencil_test.enable && regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; auto func = PicaToVK::CompareFunc(regs.framebuffer.output_merger.stencil_test.func); auto ref = regs.framebuffer.output_merger.stencil_test.reference_value; @@ -1303,24 +1276,21 @@ void RasterizerVulkan::SyncStencilTest() { auto depth_fail = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_fail); auto depth_pass = PicaToVK::StencilOp(regs.framebuffer.output_merger.stencil_test.action_depth_pass); - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - cmdbuffer.setStencilTestEnable(stencil_test_enabled); - cmdbuffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref); - cmdbuffer.setStencilOp(vk::StencilFaceFlagBits::eFrontAndBack, stencil_fail, depth_pass, depth_fail, func); - cmdbuffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, mask); + auto& state = VulkanState::Get(); + state.SetStencilTest(enabled, stencil_fail, depth_pass, depth_fail, func, ref); + state.SetStencilInput(mask); } void RasterizerVulkan::SyncDepthTest() { const auto& regs = Pica::g_state.regs; - depth_test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + bool test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || regs.framebuffer.output_merger.depth_write_enable == 1; auto test_func = regs.framebuffer.output_merger.depth_test_enable == 1 ? PicaToVK::CompareFunc(regs.framebuffer.output_merger.depth_test_func) : vk::CompareOp::eAlways; - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); - cmdbuffer.setDepthTestEnable(depth_test_enabled); - cmdbuffer.setDepthCompareOp(test_func); + auto& state = VulkanState::Get(); + state.SetDepthTest(test_enabled, test_func); } void RasterizerVulkan::SyncCombinerColor() { @@ -1459,7 +1429,8 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() { } std::size_t bytes_used = 0; - auto [buffer, offset, invalidate] = texture_buffer_lut_lf.Map(max_size, sizeof(glm::vec4)); + u8* buffer = nullptr; u32 offset = 0; bool invalidate = false; + std::tie(buffer, offset, invalidate) = texture_buffer_lut_lf.Map(max_size, sizeof(glm::vec4)); // Sync the lighting luts if (uniform_block_data.lighting_lut_dirty_any || invalidate) { @@ -1523,7 +1494,8 @@ void RasterizerVulkan::SyncAndUploadLUTs() { } std::size_t bytes_used = 0; - auto [buffer, offset, invalidate] = texture_buffer_lut.Map(max_size, sizeof(glm::vec4)); + u8* buffer = nullptr; u32 offset = 0; bool invalidate = false; + std::tie(buffer, offset, invalidate) = texture_buffer_lut.Map(max_size, sizeof(glm::vec4)); // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used]( @@ -1619,8 +1591,10 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) { return; std::size_t uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs; + std::size_t used_bytes = 0; - auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); + u8* uniforms = nullptr; u32 offset = 0; bool invalidate = false; + std::tie(uniforms, offset, invalidate) = uniform_buffer.Map(uniform_size, uniform_buffer_alignment); auto& state = VulkanState::Get(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index f5383aaa5..770977b42 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -271,8 +271,8 @@ private: } uniform_block_data = {}; // They shall be big enough for about one frame. - static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024; - static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024; + static constexpr std::size_t VERTEX_BUFFER_SIZE = 64 * 1024 * 1024; + static constexpr std::size_t INDEX_BUFFER_SIZE = 16 * 1024 * 1024; static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024; static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024; @@ -292,7 +292,6 @@ private: std::array proctex_diff_lut_data{}; bool allow_shadow{}; - bool depth_test_enabled{}, stencil_test_enabled{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp index ec1a123ff..366a2e0f1 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp @@ -239,7 +239,7 @@ inline vk::ImageSubresourceRange SubResourceLayersToRange(const vk::ImageSubreso static bool BlitTextures(const Surface& src_surface, const Common::Rectangle& src_rect, const Surface& dst_surface, const Common::Rectangle& dst_rect, SurfaceType type) { - vk::ImageSubresourceLayers image_range({}, {}, 0, 1); + vk::ImageSubresourceLayers image_range{{}, {}, 0, 1}; switch (src_surface->type) { case SurfaceParams::SurfaceType::Color: case SurfaceParams::SurfaceType::Texture: @@ -257,28 +257,32 @@ static bool BlitTextures(const Surface& src_surface, const Common::Rectangletexture.GetLayout(); - auto old_dst_layout = dst_surface->texture.GetLayout(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); - src_surface->texture.Transition(vk::ImageLayout::eTransferSrcOptimal); - dst_surface->texture.Transition(vk::ImageLayout::eTransferDstOptimal); + auto& src_texture = src_surface->texture; + src_texture.Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal); - vk::ImageBlit blit_area; - blit_area.srcSubresource = image_range; - blit_area.srcOffsets[0] = vk::Offset3D(src_rect.left, src_rect.bottom, 0); - blit_area.srcOffsets[1] = vk::Offset3D(src_rect.right, src_rect.top, 1); - blit_area.dstSubresource = image_range; - blit_area.dstOffsets[0] = vk::Offset3D(dst_rect.left, dst_rect.bottom, 0); - blit_area.dstOffsets[1] = vk::Offset3D(dst_rect.right, dst_rect.top, 1); + auto& dst_texture = dst_surface->texture; + dst_texture.Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - command_buffer.blitImage(src_surface->texture.GetHandle(), vk::ImageLayout::eTransferSrcOptimal, - dst_surface->texture.GetHandle(), vk::ImageLayout::eTransferDstOptimal, - {blit_area}, vk::Filter::eNearest); + const std::array src_offsets{ + vk::Offset3D{static_cast(src_rect.left), static_cast(src_rect.bottom), 0}, + vk::Offset3D{static_cast(src_rect.right), static_cast(src_rect.top), 1} + }; + + const std::array dst_offsets{ + vk::Offset3D{static_cast(dst_rect.left), static_cast(dst_rect.bottom), 0}, + vk::Offset3D{static_cast(dst_rect.right), static_cast(dst_rect.top), 1} + }; + + vk::ImageBlit blit_area{image_range, src_offsets, image_range, dst_offsets}; + cmdbuffer.blitImage(src_texture.GetHandle(), vk::ImageLayout::eTransferSrcOptimal, + dst_texture.GetHandle(), vk::ImageLayout::eTransferDstOptimal, + {blit_area}, vk::Filter::eNearest); // Revert changes to the layout - src_surface->texture.Transition(old_src_layout); - dst_surface->texture.Transition(old_dst_layout); + src_texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); + dst_texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); return true; } @@ -290,8 +294,7 @@ static vk::Rect2D FromRect(Common::Rectangle rect) { } // Allocate an uninitialized texture of appropriate size and format for the surface -VKTexture RasterizerCacheVulkan::AllocateSurfaceTexture(vk::Format format, u32 width, u32 height) -{ +VKTexture RasterizerCacheVulkan::AllocateSurfaceTexture(vk::Format format, u32 width, u32 height) { // First check if the texture can be recycled auto recycled_tex = host_texture_recycler.find({format, width, height}); if (recycled_tex != host_texture_recycler.end()) { @@ -308,12 +311,16 @@ VKTexture RasterizerCacheVulkan::AllocateSurfaceTexture(vk::Format format, u32 w .format = format, .type = vk::ImageType::e2D, .view_type = vk::ImageViewType::e2D, + .usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst | + vk::ImageUsageFlagBits::eTransferSrc, .levels = levels }; VKTexture texture; texture.Create(texture_info); - texture.Transition(vk::ImageLayout::eShaderReadOnlyOptimal); + + auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); + texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); return texture; } @@ -383,7 +390,7 @@ void RasterizerCacheVulkan::CopySurface(const Surface& src_surface, const Surfac // This is only called when CanCopy is true, no need to run checks here if (src_surface->type == SurfaceType::Fill) { // NO-OP Vulkan does not allow easy clearing for arbitary textures with rectangle - printf("bad!"); + return; } if (src_surface->CanSubRect(subrect_params)) { auto srect = src_surface->GetScaledSubRect(subrect_params); @@ -452,7 +459,7 @@ void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) { } } -MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); +MICROPROFILE_DEFINE(Vulkan_SurfaceFlush, "Vulkan", "Surface Flush", MP_RGB(128, 192, 64)); void CachedSurface::FlushGPUBuffer(PAddr flush_start, PAddr flush_end) { u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); if (dst_buffer == nullptr) @@ -468,7 +475,7 @@ void CachedSurface::FlushGPUBuffer(PAddr flush_start, PAddr flush_end) { if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR) flush_start = Memory::VRAM_VADDR; - MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); + MICROPROFILE_SCOPE(Vulkan_SurfaceFlush); ASSERT(flush_start >= addr && flush_end <= end); const u32 start_offset = flush_start - addr; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 195752aa3..0a31c90b0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1295,7 +1295,7 @@ std::string GenerateFragmentShader(const PicaFSConfig& config) { in vec4 gl_FragCoord; #endif // CITRA_GLES - out vec4 color; + layout (location = 0) out vec4 color; layout(set = 1, binding = 0) uniform sampler2D tex0; layout(set = 1, binding = 1) uniform sampler2D tex1; diff --git a/src/video_core/renderer_vulkan/vk_shader_state.h b/src/video_core/renderer_vulkan/vk_shader_state.h index 437348ed1..648ceaf80 100644 --- a/src/video_core/renderer_vulkan/vk_shader_state.h +++ b/src/video_core/renderer_vulkan/vk_shader_state.h @@ -289,7 +289,7 @@ struct PicaFixedGSConfig : Common::HashableStruct { }; struct PipelineCacheKey { - vk::PipelineRenderingCreateInfo color_attachments; + vk::Format color, depth_stencil; vk::PipelineColorBlendAttachmentState blend_config; vk::LogicOp blend_logic_op; PicaFSConfig fragment_config; diff --git a/src/video_core/renderer_vulkan/vk_state.cpp b/src/video_core/renderer_vulkan/vk_state.cpp index 61e8fee91..10138c645 100644 --- a/src/video_core/renderer_vulkan/vk_state.cpp +++ b/src/video_core/renderer_vulkan/vk_state.cpp @@ -52,14 +52,16 @@ void DescriptorUpdater::PushBufferUpdate(vk::DescriptorSet set, u32 binding, }; } -VulkanState::VulkanState() { +VulkanState::VulkanState(const std::shared_ptr& swapchain) : swapchain(swapchain) { // Create a placeholder texture which can be used in place of a real binding. - VKTexture::Info info = { + VKTexture::Info info{ .width = 1, .height = 1, .format = vk::Format::eR8G8B8A8Srgb, .type = vk::ImageType::e2D, - .view_type = vk::ImageViewType::e2D + .view_type = vk::ImageViewType::e2D, + .usage = vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eTransferDst }; placeholder.Create(info); @@ -115,9 +117,9 @@ VulkanState::~VulkanState() { device.destroySampler(present_sampler); } -void VulkanState::Create() { +void VulkanState::Create(const std::shared_ptr& swapchain) { if (!s_vulkan_state) { - s_vulkan_state = std::make_unique(); + s_vulkan_state = std::make_unique(swapchain); } } @@ -127,7 +129,7 @@ VulkanState& VulkanState::Get() { } void VulkanState::SetVertexBuffer(const VKBuffer& buffer, vk::DeviceSize offset) { - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); cmdbuffer.bindVertexBuffers(0, buffer.GetBuffer(), offset); } @@ -190,7 +192,7 @@ void VulkanState::UnbindTexture(u32 unit) { descriptors_dirty = true; } -void VulkanState::BeginRendering(OptRef color, OptRef depth, +void VulkanState::BeginRendering(OptRef color, OptRef depth, bool update_pipeline_formats, vk::ClearColorValue color_clear, vk::AttachmentLoadOp color_load_op, vk::AttachmentStoreOp color_store_op, vk::ClearDepthStencilValue depth_clear, vk::AttachmentLoadOp depth_load_op, vk::AttachmentStoreOp depth_store_op, @@ -202,9 +204,10 @@ void VulkanState::BeginRendering(OptRef color, OptRef dept vk::RenderingInfo render_info{{}, color->get().GetArea(), 1, {}}; std::array infos{}; + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); if (color.has_value()) { auto& image = color->get(); - image.Transition(vk::ImageLayout::eColorAttachmentOptimal); + image.Transition(cmdbuffer, vk::ImageLayout::eColorAttachmentOptimal); infos[0] = vk::RenderingAttachmentInfo{ image.GetView(), image.GetLayout(), {}, {}, {}, @@ -217,7 +220,7 @@ void VulkanState::BeginRendering(OptRef color, OptRef dept if (depth.has_value()) { auto& image = depth->get(); - image.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal); + image.Transition(cmdbuffer, vk::ImageLayout::eDepthStencilAttachmentOptimal); infos[1] = vk::RenderingAttachmentInfo{ image.GetView(), image.GetLayout(), {}, {}, {}, @@ -233,8 +236,12 @@ void VulkanState::BeginRendering(OptRef color, OptRef dept render_info.pStencilAttachment = &infos[2]; } + if (update_pipeline_formats) { + render_pipeline_key.color = color.has_value() ? color->get().GetFormat() : vk::Format::eUndefined; + render_pipeline_key.depth_stencil = depth.has_value() ? depth->get().GetFormat() : vk::Format::eUndefined; + } + // Begin rendering - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); cmdbuffer.beginRendering(render_info); rendering = true; } @@ -244,11 +251,39 @@ void VulkanState::EndRendering() { return; } - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); cmdbuffer.endRendering(); rendering = false; } +void VulkanState::SetViewport(vk::Viewport new_viewport) { + if (new_viewport != viewport) { + viewport = new_viewport; + dirty_flags.set(DynamicStateFlags::Viewport); + } +} + +void VulkanState::SetScissor(vk::Rect2D new_scissor) { + if (new_scissor != scissor) { + scissor = new_scissor; + dirty_flags.set(DynamicStateFlags::Scissor); + } +} + +void VulkanState::SetCullMode(vk::CullModeFlags flags) { + if (cull_mode != flags) { + cull_mode = flags; + dirty_flags.set(DynamicStateFlags::CullMode); + } +} + +void VulkanState::SetFrontFace(vk::FrontFace face) { + if (front_face != face) { + front_face = face; + dirty_flags.set(DynamicStateFlags::FrontFace); + } +} + void VulkanState::SetColorMask(bool red, bool green, bool blue, bool alpha) { auto mask = static_cast(red | (green << 1) | (blue << 2) | (alpha << 3)); render_pipeline_key.blend_config.colorWriteMask = mask; @@ -262,6 +297,14 @@ void VulkanState::SetBlendEnable(bool enable) { render_pipeline_key.blend_config.blendEnable = enable; } +void VulkanState::SetBlendCostants(float red, float green, float blue, float alpha) { + std::array color{red, green, blue, alpha}; + if (color != blend_constants) { + blend_constants = color; + dirty_flags.set(DynamicStateFlags::BlendConstants); + } +} + void VulkanState::SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::BlendFactor src_color, vk::BlendFactor dst_color, vk::BlendFactor src_alpha, vk::BlendFactor dst_alpha) { auto& blend = render_pipeline_key.blend_config; @@ -273,6 +316,45 @@ void VulkanState::SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::Blend blend.dstAlphaBlendFactor = dst_alpha; } +void VulkanState::SetStencilWrite(u32 mask) { + if (mask != stencil_write_mask) { + stencil_write_mask = mask; + dirty_flags.set(DynamicStateFlags::StencilMask); + } +} + +void VulkanState::SetStencilInput(u32 mask) { + if (mask != stencil_input_mask) { + stencil_input_mask = mask; + dirty_flags.set(DynamicStateFlags::StencilMask); + } +} + +void VulkanState::SetStencilTest(bool enable, vk::StencilOp fail, vk::StencilOp pass, vk::StencilOp depth_fail, + vk::CompareOp compare, u32 ref) { + stencil_enabled = enable; + stencil_ref = ref; + fail_op = fail; + pass_op = pass; + depth_fail_op = depth_fail; + stencil_op = compare; + dirty_flags.set(DynamicStateFlags::StencilTest); +} + +void VulkanState::SetDepthWrite(bool enable) { + if (enable != depth_writes) { + depth_writes = enable; + dirty_flags.set(DynamicStateFlags::DepthWrite); + } +} + +void VulkanState::SetDepthTest(bool enable, vk::CompareOp compare) { + depth_enabled = enable; + depth_op = compare; + dirty_flags.set(DynamicStateFlags::DepthTest); +} + + void VulkanState::InitDescriptorSets() { auto pool = g_vk_task_scheduler->GetDescriptorPool(); auto device = g_vk_instace->GetDevice(); @@ -312,12 +394,12 @@ void VulkanState::ApplyRenderState(const Pica::Regs& regs) { // Bind an appropriate render pipeline render_pipeline_key.fragment_config = PicaFSConfig::BuildFromRegs(regs); - auto it1 = render_pipelines.find(render_pipeline_key); + auto result = render_pipelines.find(render_pipeline_key); // Try to use an already complete pipeline vk::Pipeline pipeline; - if (it1 != render_pipelines.end()) { - pipeline = it1->second.get(); + if (result != render_pipelines.end()) { + pipeline = result->second.get(); } else { // Maybe the shader has been compiled but the pipeline state changed? @@ -330,15 +412,16 @@ void VulkanState::ApplyRenderState(const Pica::Regs& regs) { auto code = GenerateFragmentShader(render_pipeline_key.fragment_config); auto module = CompileShader(code, vk::ShaderStageFlagBits::eFragment); render_fragment_shaders.emplace(render_pipeline_key.fragment_config, vk::UniqueShaderModule{module}); - render_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eFragment, shader->second.get()); } // Update pipeline builder auto& att = render_pipeline_key.blend_config; + render_pipeline_builder.SetRenderingFormats(render_pipeline_key.color, render_pipeline_key.depth_stencil); render_pipeline_builder.SetBlendLogicOp(render_pipeline_key.blend_logic_op); - render_pipeline_builder.SetBlendAttachment(att.blendEnable, att.srcColorBlendFactor, att.dstColorBlendFactor, att.colorBlendOp, - att.srcAlphaBlendFactor, att.dstAlphaBlendFactor, att.alphaBlendOp, att.colorWriteMask); + render_pipeline_builder.SetBlendAttachment(att.blendEnable, att.srcColorBlendFactor, att.dstColorBlendFactor, + att.colorBlendOp, att.srcAlphaBlendFactor, att.dstAlphaBlendFactor, + att.alphaBlendOp, att.colorWriteMask); // Cache the resulted pipeline pipeline = render_pipeline_builder.Build(); @@ -346,9 +429,11 @@ void VulkanState::ApplyRenderState(const Pica::Regs& regs) { } // Bind the render pipeline - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); cmdbuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline); + ApplyCommonState(true); + // Bind render descriptor sets if (descriptor_sets[1]) { cmdbuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, render_pipeline_layout, @@ -368,11 +453,13 @@ void VulkanState::ApplyPresentState() { } // Bind present pipeline and descriptors - auto cmdbuffer = g_vk_task_scheduler->GetCommandBuffer(); + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); cmdbuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, present_pipeline.get()); cmdbuffer.pushConstants(present_pipeline_layout, vk::ShaderStageFlagBits::eFragment | vk::ShaderStageFlagBits::eVertex, 0, sizeof(present_data), &present_data); + ApplyCommonState(false); + if (descriptor_sets[3]) { cmdbuffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0, 1, &descriptor_sets[3], 0, nullptr); @@ -383,6 +470,53 @@ void VulkanState::ApplyPresentState() { UNREACHABLE(); } +void VulkanState::ApplyCommonState(bool extended) { + // Re-apply dynamic parts of the pipeline + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); + if (dirty_flags.test(DynamicStateFlags::Viewport)) { + cmdbuffer.setViewport(0, viewport); + } + + if (dirty_flags.test(DynamicStateFlags::Scissor)) { + cmdbuffer.setScissor(0, scissor); + } + + if (dirty_flags.test(DynamicStateFlags::DepthTest) && extended) { + cmdbuffer.setDepthTestEnable(depth_enabled); + cmdbuffer.setDepthCompareOp(depth_op); + } + + if (dirty_flags.test(DynamicStateFlags::StencilTest) && extended) { + cmdbuffer.setStencilTestEnable(stencil_enabled); + cmdbuffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, stencil_ref); + cmdbuffer.setStencilOp(vk::StencilFaceFlagBits::eFrontAndBack, fail_op, pass_op, + depth_fail_op, stencil_op); + } + + if (dirty_flags.test(DynamicStateFlags::CullMode) && extended) { + cmdbuffer.setCullMode(cull_mode); + } + + if (dirty_flags.test(DynamicStateFlags::FrontFace) && extended) { + cmdbuffer.setFrontFace(front_face); + } + + if (dirty_flags.test(DynamicStateFlags::BlendConstants) && extended) { + cmdbuffer.setBlendConstants(blend_constants.data()); + } + + if (dirty_flags.test(DynamicStateFlags::StencilMask) && extended) { + cmdbuffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, stencil_write_mask); + cmdbuffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, stencil_input_mask); + } + + if (dirty_flags.test(DynamicStateFlags::DepthWrite) && extended) { + cmdbuffer.setDepthWriteEnable(depth_writes); + } + + dirty_flags.reset(); +} + void VulkanState::BuildDescriptorLayouts() { // Render descriptor layouts std::array ubo_set{{ @@ -430,6 +564,7 @@ void VulkanState::ConfigureRenderPipeline() { render_pipeline_builder.SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleList); render_pipeline_builder.SetLineWidth(1.0f); render_pipeline_builder.SetNoCullRasterizationState(); + render_pipeline_builder.SetRenderingFormats(render_pipeline_key.color, render_pipeline_key.depth_stencil); // Set depth, stencil tests and blending render_pipeline_builder.SetNoDepthTestState(); @@ -441,7 +576,7 @@ void VulkanState::ConfigureRenderPipeline() { vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA); // Enable every required dynamic state - std::array dynamic_states{ + std::array dynamic_states{ vk::DynamicState::eDepthCompareOp, vk::DynamicState::eLineWidth, vk::DynamicState::eDepthTestEnable, vk::DynamicState::eColorWriteEnableEXT, vk::DynamicState::eStencilTestEnable, vk::DynamicState::eStencilOp, @@ -459,7 +594,6 @@ void VulkanState::ConfigureRenderPipeline() { // Add trivial vertex shader auto code = GenerateTrivialVertexShader(true); - std::cout << code << '\n'; render_vertex_shader = CompileShader(code, vk::ShaderStageFlagBits::eVertex); render_pipeline_builder.SetShaderStage(vk::ShaderStageFlagBits::eVertex, render_vertex_shader); } @@ -475,8 +609,9 @@ void VulkanState::ConfigurePresentPipeline() { present_pipeline_builder.Clear(); present_pipeline_builder.SetPipelineLayout(present_pipeline_layout); present_pipeline_builder.SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleStrip); - render_pipeline_builder.SetLineWidth(1.0f); - render_pipeline_builder.SetNoCullRasterizationState(); + present_pipeline_builder.SetLineWidth(1.0f); + present_pipeline_builder.SetNoCullRasterizationState(); + present_pipeline_builder.SetRenderingFormats(swapchain->GetCurrentImage().GetFormat()); // Set depth, stencil tests and blending present_pipeline_builder.SetNoDepthTestState(); @@ -484,7 +619,7 @@ void VulkanState::ConfigurePresentPipeline() { present_pipeline_builder.SetNoBlendingState(); // Enable every required dynamic state - std::array dynamic_states{ + std::array dynamic_states{ vk::DynamicState::eLineWidth, vk::DynamicState::eViewport, vk::DynamicState::eScissor, diff --git a/src/video_core/renderer_vulkan/vk_state.h b/src/video_core/renderer_vulkan/vk_state.h index 832f9cda6..7eaec5d12 100644 --- a/src/video_core/renderer_vulkan/vk_state.h +++ b/src/video_core/renderer_vulkan/vk_state.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "video_core/regs.h" #include "video_core/renderer_vulkan/vk_shader_state.h" #include "video_core/renderer_vulkan/vk_pipeline_builder.h" @@ -53,26 +54,43 @@ private: u32 update_count{}; }; +class VKSwapChain; + /// Tracks global Vulkan state class VulkanState { public: - VulkanState(); + VulkanState(const std::shared_ptr& swapchain); ~VulkanState(); /// Initialize object to its initial state - static void Create(); + static void Create(const std::shared_ptr& swapchain); static VulkanState& Get(); + /// Query state + bool DepthTestEnabled() const { return depth_enabled && depth_writes; } + bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; } + /// Configure drawing state void SetVertexBuffer(const VKBuffer& buffer, vk::DeviceSize offset); - void SetColorMask(bool red, bool green, bool blue, bool alpha); + void SetViewport(vk::Viewport viewport); + void SetScissor(vk::Rect2D scissor); + void SetCullMode(vk::CullModeFlags flags); + void SetFrontFace(vk::FrontFace face); void SetLogicOp(vk::LogicOp logic_op); + void SetStencilWrite(u32 mask); + void SetStencilInput(u32 mask); + void SetStencilTest(bool enable, vk::StencilOp fail, vk::StencilOp pass, vk::StencilOp depth_fail, + vk::CompareOp compare, u32 ref); + void SetDepthWrite(bool enable); + void SetDepthTest(bool enable, vk::CompareOp compare); + void SetColorMask(bool red, bool green, bool blue, bool alpha); void SetBlendEnable(bool enable); + void SetBlendCostants(float red, float green, float blue, float alpha); void SetBlendOp(vk::BlendOp rgb_op, vk::BlendOp alpha_op, vk::BlendFactor src_color, vk::BlendFactor dst_color, vk::BlendFactor src_alpha, vk::BlendFactor dst_alpha); /// Rendering - void BeginRendering(OptRef color, OptRef depth, + void BeginRendering(OptRef color, OptRef depth, bool update_pipeline_formats = false, vk::ClearColorValue color_clear = {}, vk::AttachmentLoadOp color_load_op = vk::AttachmentLoadOp::eLoad, vk::AttachmentStoreOp color_store_op = vk::AttachmentStoreOp::eStore, @@ -97,6 +115,7 @@ public: void InitDescriptorSets(); void ApplyRenderState(const Pica::Regs& config); void ApplyPresentState(); + void ApplyCommonState(bool extended); private: void BuildDescriptorLayouts(); @@ -105,6 +124,7 @@ private: private: // Render targets + std::shared_ptr swapchain; bool rendering{}; VKTexture* color_render_target{}, *depth_render_target{}; vk::ImageView present_view; @@ -120,7 +140,7 @@ private: std::array descriptor_sets; // Pipeline caches - PipelineCacheKey render_pipeline_key; + PipelineCacheKey render_pipeline_key{}; PipelineBuilder render_pipeline_builder, present_pipeline_builder; vk::PipelineLayout render_pipeline_layout, present_pipeline_layout; std::unordered_map render_pipelines; @@ -129,6 +149,34 @@ private: // Shader caches vk::ShaderModule render_vertex_shader, present_vertex_shader, present_fragment_shader; std::unordered_map render_fragment_shaders; + + // Dynamic state + enum DynamicStateFlags : u32 { + Viewport, + Scissor, + LineWidth, + DepthTest, + DepthWrite, + StencilTest, + StencilMask, + ColorWrite, + CullMode, + BlendConstants, + FrontFace + }; + + std::bitset<16> dirty_flags; + u32 stencil_write_mask{}, stencil_input_mask{}, stencil_ref{}; + bool depth_enabled{}, depth_writes{}, stencil_enabled{}, stencil_writes{}; + vk::StencilOp fail_op, pass_op, depth_fail_op; + vk::CompareOp depth_op, stencil_op; + + vk::Viewport viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f}; + vk::CullModeFlags cull_mode{}; + vk::FrontFace front_face{}; + vk::Rect2D scissor{}; + vk::LogicOp logic_op{}; + std::array blend_constants{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 933a1d496..b0a6bfeb5 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -40,7 +40,7 @@ bool VKSwapChain::Create(u32 width, u32 height, bool vsync_enabled) { vk::SwapchainCreateInfoKHR swapchain_info{{}, surface, details.image_count, details.format.format, details.format.colorSpace, details.extent, 1, vk::ImageUsageFlagBits::eColorAttachment, vk::SharingMode::eExclusive, 1, indices.data(), details.transform, - vk::CompositeAlphaFlagBitsKHR::eOpaque, details.present_mode, true, swapchain}; + vk::CompositeAlphaFlagBitsKHR::eOpaque, details.present_mode, true, VK_NULL_HANDLE}; // For dedicated present queues, select concurrent sharing mode if (indices[0] != indices[1]) { @@ -190,7 +190,8 @@ void VKSwapChain::SetupImages() { .height = details.extent.height, .format = details.format.format, .type = vk::ImageType::e2D, - .view_type = vk::ImageViewType::e2D + .view_type = vk::ImageViewType::e2D, + .usage = vk::ImageUsageFlagBits::eColorAttachment }; // Create the swapchain buffers containing the image and imageview diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index 90b692ae6..268771ff3 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -12,6 +12,15 @@ namespace Vulkan { VKTaskScheduler::~VKTaskScheduler() { SyncToGPU(); + + // Destroy Vulkan resources + auto device = g_vk_instace->GetDevice(); + device.destroyCommandPool(command_pool); + device.destroySemaphore(timeline); + + for (auto& task : tasks) { + device.destroyDescriptorPool(task.pool); + } } std::tuple VKTaskScheduler::RequestStaging(u32 size) { @@ -26,24 +35,29 @@ std::tuple VKTaskScheduler::RequestStaging(u32 size) { } u8* ptr = task.staging.GetHostPointer() + task.current_offset; - task.current_offset += size; + std::memset(ptr, 0, size); + task.current_offset += size; return std::make_tuple(ptr, task.current_offset - size); } +VKBuffer& VKTaskScheduler::GetStaging() { + return tasks[current_task].staging; +} + bool VKTaskScheduler::Create() { auto device = g_vk_instace->GetDevice(); // Create command pool vk::CommandPoolCreateInfo pool_info(vk::CommandPoolCreateFlagBits::eResetCommandBuffer, g_vk_instace->GetGraphicsQueueFamilyIndex()); - command_pool = device.createCommandPoolUnique(pool_info); + command_pool = device.createCommandPool(pool_info); // Create timeline semaphore for syncronization vk::SemaphoreTypeCreateInfo timeline_info{vk::SemaphoreType::eTimeline, 0}; vk::SemaphoreCreateInfo semaphore_info{{}, &timeline_info}; - timeline = device.createSemaphoreUnique(semaphore_info); + timeline = device.createSemaphore(semaphore_info); VKBuffer::Info staging_info{ .size = STAGING_BUFFER_SIZE, @@ -56,24 +70,43 @@ bool VKTaskScheduler::Create() { const vk::DescriptorPoolSize pool_size{vk::DescriptorType::eCombinedImageSampler, 64}; vk::DescriptorPoolCreateInfo pool_create_info{{}, 1024, pool_size}; - // Create global descriptor pool - global_pool = device.createDescriptorPoolUnique(pool_create_info); - for (auto& task : tasks) { // Create command buffers - vk::CommandBufferAllocateInfo buffer_info{command_pool.get(), vk::CommandBufferLevel::ePrimary, 1}; - task.command_buffer = device.allocateCommandBuffers(buffer_info)[0]; + vk::CommandBufferAllocateInfo buffer_info{command_pool, vk::CommandBufferLevel::ePrimary, 2}; + auto buffers = device.allocateCommandBuffers(buffer_info); + std::ranges::copy_n(buffers.begin(), 2, task.command_buffers.begin()); // Create staging buffer task.staging.Create(staging_info); // Create descriptor pool - task.pool = device.createDescriptorPoolUnique(pool_create_info); + task.pool = device.createDescriptorPool(pool_create_info); } return true; } +vk::CommandBuffer VKTaskScheduler::GetRenderCommandBuffer() const { + const auto& task = tasks[current_task]; + return task.command_buffers[1]; +} + +vk::CommandBuffer VKTaskScheduler::GetUploadCommandBuffer() { + auto& task = tasks[current_task]; + if (!task.use_upload_buffer) { + auto& cmdbuffer = task.command_buffers[0]; + cmdbuffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); + task.use_upload_buffer = true; + } + + return task.command_buffers[0]; +} + +vk::DescriptorPool VKTaskScheduler::GetDescriptorPool() const { + const auto& task = tasks[current_task]; + return task.pool; +} + void VKTaskScheduler::SyncToGPU(u64 task_index) { // No need to sync if the GPU already has finished the task if (tasks[task_index].task_id <= GetGPUTick()) { @@ -83,7 +116,7 @@ void VKTaskScheduler::SyncToGPU(u64 task_index) { auto last_completed_task_id = GetGPUTick(); // Wait for the task to complete - vk::SemaphoreWaitInfo wait_info({}, timeline.get(), tasks[task_index].task_id); + vk::SemaphoreWaitInfo wait_info{{}, timeline, tasks[task_index].task_id}; auto result = g_vk_instace->GetDevice().waitSemaphores(wait_info, UINT64_MAX); if (result != vk::Result::eSuccess) { @@ -106,18 +139,32 @@ void VKTaskScheduler::SyncToGPU() { SyncToGPU(current_task); } +u64 VKTaskScheduler::GetCPUTick() const { + return current_task_id; +} + +u64 VKTaskScheduler::GetGPUTick() const { + auto device = g_vk_instace->GetDevice(); + return device.getSemaphoreCounterValue(timeline); +} + void VKTaskScheduler::Submit(bool wait_completion, bool present, VKSwapChain* swapchain) { // End the current task recording. auto& task = tasks[current_task]; - task.command_buffer.end(); + + // End command buffers + task.command_buffers[1].end(); + if (task.use_upload_buffer) { + task.command_buffers[0].end(); + } const u32 num_signal_semaphores = present ? 2U : 1U; const std::array signal_values{task.task_id, u64(0)}; - std::array signal_semaphores{timeline.get(), vk::Semaphore{}}; + std::array signal_semaphores{timeline, vk::Semaphore{}}; const u32 num_wait_semaphores = present ? 2U : 1U; const std::array wait_values{task.task_id - 1, u64(1)}; - std::array wait_semaphores{timeline.get(), vk::Semaphore{}}; + std::array wait_semaphores{timeline, vk::Semaphore{}}; // When the task completes the timeline will increment to the task id const vk::TimelineSemaphoreSubmitInfoKHR timeline_si{num_wait_semaphores, wait_values.data(), @@ -128,9 +175,11 @@ void VKTaskScheduler::Submit(bool wait_completion, bool present, VKSwapChain* sw vk::PipelineStageFlagBits::eColorAttachmentOutput, }; - const vk::SubmitInfo submit_info{num_wait_semaphores, wait_semaphores.data(), wait_stage_masks.data(), 1, - &task.command_buffer, num_signal_semaphores, signal_semaphores.data(), + const u32 cmdbuffer_count = task.use_upload_buffer ? 2u : 1u; + const vk::SubmitInfo submit_info{num_wait_semaphores, wait_semaphores.data(), wait_stage_masks.data(), cmdbuffer_count, + &task.command_buffers[2 - cmdbuffer_count], num_signal_semaphores, signal_semaphores.data(), &timeline_si}; + // Wait for new swapchain image if (present) { signal_semaphores[1] = swapchain->GetRenderSemaphore(); @@ -167,13 +216,14 @@ void VKTaskScheduler::BeginTask() { // Wait for the GPU to finish with all resources for this task. SyncToGPU(next_task_index); - device.resetDescriptorPool(task.pool.get()); - task.command_buffer.begin({vk::CommandBufferUsageFlagBits::eSimultaneousUse}); + device.resetDescriptorPool(task.pool); + task.command_buffers[1].begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}); // Move to the next command buffer. current_task = next_task_index; task.task_id = current_task_id++; task.current_offset = 0; + task.use_upload_buffer = false; auto& state = VulkanState::Get(); state.InitDescriptorSets(); diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h index 5ec4b58ff..228d7b442 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -39,16 +39,17 @@ public: bool Create(); /// Retrieve either of the current frame's command buffers - vk::CommandBuffer GetCommandBuffer() const { return tasks[current_task].command_buffer; } - vk::DescriptorPool GetDescriptorPool() const { return tasks[current_task].pool.get(); } + vk::CommandBuffer GetRenderCommandBuffer() const; + vk::CommandBuffer GetUploadCommandBuffer(); + vk::DescriptorPool GetDescriptorPool() const; /// Access the staging buffer of the current task std::tuple RequestStaging(u32 size); - VKBuffer& GetStaging() { return tasks[current_task].staging; } + VKBuffer& GetStaging(); /// Query and/or synchronization CPU and GPU - u64 GetCPUTick() const { return current_task_id; } - u64 GetGPUTick() const { return g_vk_instace->GetDevice().getSemaphoreCounterValue(timeline.get()); } + u64 GetCPUTick() const; + u64 GetGPUTick() const; void SyncToGPU(); void SyncToGPU(u64 task_index); @@ -59,16 +60,16 @@ public: private: struct Task { + bool use_upload_buffer{false}; u64 current_offset{}, task_id{}; - vk::CommandBuffer command_buffer; - vk::UniqueDescriptorPool pool; + std::array command_buffers; std::vector> cleanups; + vk::DescriptorPool pool; VKBuffer staging; }; - vk::UniqueDescriptorPool global_pool; - vk::UniqueSemaphore timeline; - vk::UniqueCommandPool command_pool; + vk::Semaphore timeline; + vk::CommandPool command_pool; u64 current_task_id = 1; // Each task contains unique resources diff --git a/src/video_core/renderer_vulkan/vk_texture.cpp b/src/video_core/renderer_vulkan/vk_texture.cpp index 5491f4c23..09e34f701 100644 --- a/src/video_core/renderer_vulkan/vk_texture.cpp +++ b/src/video_core/renderer_vulkan/vk_texture.cpp @@ -76,8 +76,7 @@ void VKTexture::Create(const Info& create_info) { flags, info.type, info.format, { info.width, info.height, 1 }, info.levels, info.layers, static_cast(info.multisamples), - vk::ImageTiling::eOptimal, - vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled + vk::ImageTiling::eOptimal, info.usage }; texture = device.createImage(image_info); @@ -145,7 +144,12 @@ void VKTexture::Destroy() { } } -void VKTexture::Transition(vk::ImageLayout new_layout) { +void VKTexture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout) { + Transition(cmdbuffer, new_layout, 0, info.levels, 0, info.layers); +} + +void VKTexture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, + u32 start_level, u32 level_count, u32 start_layer, u32 layer_count) { if (new_layout == layout) { return; } @@ -222,11 +226,10 @@ void VKTexture::Transition(vk::ImageLayout new_layout) { source.layout, dst.layout, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, texture, - vk::ImageSubresourceRange{aspect, 0, 1, 0, 1} + vk::ImageSubresourceRange{aspect, start_level, level_count, start_layer, layer_count} }; - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); - command_buffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier); + cmdbuffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier); layout = new_layout; } @@ -241,7 +244,7 @@ void VKTexture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, } // Copy pixels to staging buffer - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); + auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer(); std::memcpy(buffer, pixels.data(), pixels.size()); vk::BufferImageCopy copy_region{ @@ -251,19 +254,15 @@ void VKTexture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, {region.extent.width, region.extent.height, 1} }; - // Exit rendering for transfer operations - auto& state = VulkanState::Get(); - state.EndRendering(); - // Transition image to transfer format - Transition(vk::ImageLayout::eTransferDstOptimal); + Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal); - command_buffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(), + cmdbuffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(), texture, vk::ImageLayout::eTransferDstOptimal, copy_region); // Prepare image for shader reads - Transition(vk::ImageLayout::eShaderReadOnlyOptimal); + Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal); } void VKTexture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span memory) { @@ -273,7 +272,13 @@ void VKTexture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region LOG_ERROR(Render_Vulkan, "Cannot download texture without staging buffer!"); } - auto command_buffer = g_vk_task_scheduler->GetCommandBuffer(); + // Downloads can happen after the image has been rendered to or changed by blitting + // so we must perform it in the render command buffer. However there is no guarantee + // of the rendering context so terminate the current renderpass to be sure + auto& state = VulkanState::Get(); + state.EndRendering(); + + auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer(); // Copy pixels to staging buffer vk::BufferImageCopy download_region{ @@ -292,15 +297,11 @@ void VKTexture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region std::memcpy(buffer, memory.data(), memory.size()); } - // Exit rendering for transfer operations - auto& state = VulkanState::Get(); - state.EndRendering(); - // Transition image to transfer format auto old_layout = GetLayout(); - Transition(vk::ImageLayout::eTransferSrcOptimal); + Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal); - command_buffer.copyImageToBuffer(texture, vk::ImageLayout::eTransferSrcOptimal, + cmdbuffer.copyImageToBuffer(texture, vk::ImageLayout::eTransferSrcOptimal, g_vk_task_scheduler->GetStaging().GetBuffer(), download_region); @@ -310,7 +311,7 @@ void VKTexture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region std::memcpy(memory.data(), buffer, memory.size_bytes()); // Restore layout - Transition(old_layout); + Transition(cmdbuffer, old_layout); } std::vector VKTexture::RGBToRGBA(std::span data) { diff --git a/src/video_core/renderer_vulkan/vk_texture.h b/src/video_core/renderer_vulkan/vk_texture.h index e13178999..60698c431 100644 --- a/src/video_core/renderer_vulkan/vk_texture.h +++ b/src/video_core/renderer_vulkan/vk_texture.h @@ -55,7 +55,9 @@ public: void Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span dst); /// Used to transition the image to an optimal layout during transfers - void Transition(vk::ImageLayout new_layout); + void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout); + void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, u32 start_level, u32 level_count, + u32 start_layer, u32 layer_count); void OverrideImageLayout(vk::ImageLayout new_layout); private: