From 98e0ecf6a76855e20c61c5f7c24b38e6fcda8d16 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Wed, 28 Dec 2022 17:42:37 +0200 Subject: [PATCH] renderer_vulkan: Add fallback path for VK_EXT_index_type_uint8 * Also remove some flush barriers --- src/android/app/src/main/jni/config.cpp | 4 +- .../renderer_vulkan/renderer_vulkan.cpp | 4 +- .../renderer_vulkan/vk_instance.cpp | 39 ++++--- src/video_core/renderer_vulkan/vk_instance.h | 19 ++- .../renderer_vulkan/vk_master_semaphore.h | 2 + .../renderer_vulkan/vk_rasterizer.cpp | 108 +++++++++--------- .../renderer_vulkan/vk_rasterizer.h | 4 + .../renderer_vulkan/vk_scheduler.cpp | 16 +-- .../renderer_vulkan/vk_texture_runtime.cpp | 31 ----- 9 files changed, 108 insertions(+), 119 deletions(-) diff --git a/src/android/app/src/main/jni/config.cpp b/src/android/app/src/main/jni/config.cpp index 53a6986ad..b02b2bc2c 100644 --- a/src/android/app/src/main/jni/config.cpp +++ b/src/android/app/src/main/jni/config.cpp @@ -117,9 +117,9 @@ void Config::ReadValues() { Settings::values.graphics_api = static_cast(sdl2_config->GetInteger("Renderer", "graphics_api", 2)); Settings::values.async_command_recording = - sdl2_config->GetBoolean("Renderer", "async_command_recording", false); + sdl2_config->GetBoolean("Renderer", "async_command_recording", true); Settings::values.spirv_shader_gen = sdl2_config->GetBoolean("Renderer", "spirv_shader_gen", true); - Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "renderer_debug", true); + Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "renderer_debug", false); Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true); Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true); Settings::values.shaders_accurate_mul = diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 05cb21660..c8f07b4c6 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -888,7 +888,7 @@ void RendererVulkan::SwapBuffers() { DrawScreens(layout, false); - renderpass_cache.ExitRenderpass(); + /*renderpass_cache.ExitRenderpass(); scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { const vk::MemoryBarrier memory_write_barrier = { @@ -900,7 +900,7 @@ void RendererVulkan::SwapBuffers() { vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, memory_write_barrier, {}, {}); - }); + });*/ const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore(); const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore(); diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 46432c73c..33e59b692 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -164,7 +164,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr); // Enable the instance extensions the backend uses - auto extensions = GetInstanceExtensions(window_info.type, false); + auto extensions = GetInstanceExtensions(window_info.type, enable_validation); // Use required platform-specific flags auto flags = GetInstanceFlags(); @@ -350,12 +350,12 @@ bool Instance::CreateDevice() { physical_device.getFeatures2(); + vk::PhysicalDeviceCustomBorderColorFeaturesEXT, + vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>(); // Not having geometry shaders will cause issues with accelerated rendering. - const vk::PhysicalDeviceFeatures available = feature_chain.get().features; - features = available; - if (!available.geometryShader) { + features = feature_chain.get().features; + if (!features.geometryShader) { LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!"); } @@ -390,6 +390,7 @@ bool Instance::CreateDevice() { extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); custom_border_color = AddExtension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); + index_type_uint8 = AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); // Search queue families for graphics and present queues auto family_properties = physical_device.getQueueFamilyProperties(); @@ -446,21 +447,21 @@ bool Instance::CreateDevice() { .ppEnabledExtensionNames = enabled_extensions.data(), }, vk::PhysicalDeviceFeatures2{ - .features = {.robustBufferAccess = available.robustBufferAccess, - .geometryShader = available.geometryShader, - .dualSrcBlend = available.dualSrcBlend, - .logicOp = available.logicOp, - .depthClamp = available.depthClamp, - .largePoints = available.largePoints, - .samplerAnisotropy = available.samplerAnisotropy, - .fragmentStoresAndAtomics = available.fragmentStoresAndAtomics, - .shaderStorageImageMultisample = available.shaderStorageImageMultisample, - .shaderClipDistance = available.shaderClipDistance}}, - vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true}, - //feature_chain.get(), + .features = {.robustBufferAccess = features.robustBufferAccess, + .geometryShader = features.geometryShader, + .dualSrcBlend = features.dualSrcBlend, + .logicOp = features.logicOp, + .depthClamp = features.depthClamp, + .largePoints = features.largePoints, + .samplerAnisotropy = features.samplerAnisotropy, + .fragmentStoresAndAtomics = features.fragmentStoresAndAtomics, + .shaderStorageImageMultisample = features.shaderStorageImageMultisample, + .shaderClipDistance = features.shaderClipDistance}}, + feature_chain.get(), + feature_chain.get(), feature_chain.get(), - //feature_chain.get() - }; + feature_chain.get() + }; // Create logical device try { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 751030fb6..c5c0c374d 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -125,6 +125,11 @@ public: return custom_border_color; } + /// Returns true when VK_EXT_index_type_uint8 is supported + bool IsIndexTypeUint8Supported() const { + return index_type_uint8; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -210,12 +215,14 @@ private: std::vector available_extensions; u32 present_queue_family_index{0}; u32 graphics_queue_family_index{0}; - bool timeline_semaphores{false}; - bool extended_dynamic_state{false}; - bool push_descriptors{false}; - bool custom_border_color{false}; - bool enable_validation{false}; - bool dump_command_buffers{false}; + + bool timeline_semaphores{}; + bool extended_dynamic_state{}; + bool push_descriptors{}; + bool custom_border_color{}; + bool index_type_uint8{}; + bool enable_validation{}; + bool dump_command_buffers{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h index c8a9da766..d8af68c3b 100644 --- a/src/video_core/renderer_vulkan/vk_master_semaphore.h +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_common.h" diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 2fd4b428c..95c621187 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -43,6 +43,12 @@ constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst; constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage; +struct DrawParams { + u32 vertex_count; + s32 vertex_offset; + bool is_indexed; +}; + RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager, TextureRuntime& runtime, RenderpassCache& renderpass_cache) @@ -62,6 +68,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} { + vertex_buffers.fill(vertex_buffer.GetHandle()); + uniform_buffer_alignment = instance.UniformMinAlignment(); uniform_size_aligned_vs = Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment); @@ -257,11 +265,9 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi SetupFixedAttribs(); // Bind the generated bindings - scheduler.Record([this, layout = pipeline_info.vertex_layout, offsets = binding_offsets]( + scheduler.Record([this, vertex_offsets = binding_offsets]( vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - std::array buffers; - buffers.fill(vertex_buffer.GetHandle()); - render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(), offsets.data()); + render_cmdbuf.bindVertexBuffers(0, vertex_buffers, vertex_offsets); }); } @@ -373,6 +379,9 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { } SetupVertexArray(vs_input_size, vs_input_index_min, vs_input_index_max); + if (is_indexed) { + SetupIndexArray(); + } if (!SetupVertexShader()) { return false; @@ -385,42 +394,53 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); pipeline_cache.BindPipeline(pipeline_info); - if (is_indexed) { - bool index_u16 = regs.pipeline.index_array.format != 0; - const u32 index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + const DrawParams params = { + .vertex_count = regs.pipeline.num_vertices, + .vertex_offset = -static_cast(vs_input_index_min), + .is_indexed = is_indexed, + }; - if (index_buffer_size > INDEX_BUFFER_SIZE) { - LOG_WARNING(Render_Vulkan, "Too large index input size {}", index_buffer_size); - return false; + scheduler.Record([params](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + if (params.is_indexed) { + render_cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0); + } else { + render_cmdbuf.draw(params.vertex_count, 1, 0, 0); } - - const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( - regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + - regs.pipeline.index_array.offset); - - // Upload index buffer data to the GPU - auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size); - std::memcpy(index_ptr, index_data, index_buffer_size); - index_buffer.Commit(index_buffer_size); - - scheduler.Record([this, offset = index_offset, num_vertices = regs.pipeline.num_vertices, - index_u16, vertex_offset = vs_input_index_min]( - vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - const vk::IndexType index_type = - index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT; - render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), offset, index_type); - render_cmdbuf.drawIndexed(num_vertices, 1, 0, -vertex_offset, 0); - }); - } else { - scheduler.Record([num_vertices = regs.pipeline.num_vertices]( - vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - render_cmdbuf.draw(num_vertices, 1, 0, 0); - }); - } + }); return true; } +void RasterizerVulkan::SetupIndexArray() { + const auto& regs = Pica::g_state.regs; + + const bool index_u8 = regs.pipeline.index_array.format == 0; + const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported(); + const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16; + const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2); + + const u8* index_data = VideoCore::g_memory->GetPhysicalPointer( + regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() + + regs.pipeline.index_array.offset); + + auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size); + if (index_u8 && !native_u8) { + u16* index_ptr_u16 = reinterpret_cast(index_ptr); + for (u32 i = 0; i < regs.pipeline.num_vertices; i++) { + index_ptr_u16[i] = index_data[i]; + } + } else { + std::memcpy(index_ptr, index_data, index_buffer_size); + } + + index_buffer.Commit(index_buffer_size); + + scheduler.Record([this, index_offset = index_offset, index_type = index_type]( + vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { + render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type); + }); +} + void RasterizerVulkan::DrawTriangles() { if (vertex_batch.empty()) { return; @@ -707,20 +727,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { .clear = {}, }; - renderpass_cache.ExitRenderpass(); - - scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) { - const vk::MemoryBarrier memory_write_barrier = { - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - - render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, - memory_write_barrier, {}, {}); - }); - renderpass_cache.EnterRenderpass(renderpass_info); // Draw the vertex batch @@ -769,11 +775,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface); } - static int submit_threshold = 120; + static int submit_threshold = 80; submit_threshold--; if (!submit_threshold) { - submit_threshold = 120; - scheduler.Flush(); + submit_threshold = 80; + scheduler.DispatchWork(); } return succeeded; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index d7d518036..cdc806af5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -156,6 +156,9 @@ private: /// Internal implementation for AccelerateDrawBatch bool AccelerateDrawBatchInternal(bool is_indexed); + /// Setup index array for AccelerateDrawBatch + void SetupIndexArray(); + /// Setup vertex array for AccelerateDrawBatch void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max); @@ -189,6 +192,7 @@ private: VertexLayout software_layout; std::array binding_offsets{}; std::array enable_attributes{}; + std::array vertex_buffers; vk::Sampler default_sampler; Surface null_surface; Surface null_storage_surface; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f1003cbb4..49df0425c 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -80,7 +80,7 @@ void Scheduler::WaitWorker() { } void Scheduler::DispatchWork() { - if (chunk->Empty()) { + if (!use_worker_thread || chunk->Empty()) { return; } @@ -133,26 +133,26 @@ void Scheduler::AllocateWorkerCommandBuffers() { MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255)); void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) { - renderer.FlushBuffers(); + const auto handle = master_semaphore.Handle(); const u64 signal_value = master_semaphore.NextTick(); state = StateFlags::AllDirty; + renderer.FlushBuffers(); renderpass_cache.ExitRenderpass(); - Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer render_cmdbuf, - vk::CommandBuffer upload_cmdbuf) { + Record([signal_semaphore, wait_semaphore, + handle, signal_value, this](vk::CommandBuffer render_cmdbuf, + vk::CommandBuffer upload_cmdbuf) { MICROPROFILE_SCOPE(Vulkan_Submit); upload_cmdbuf.end(); render_cmdbuf.end(); - const vk::Semaphore timeline_semaphore = master_semaphore.Handle(); - const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U; const std::array signal_values{signal_value, u64(0)}; - const std::array signal_semaphores{timeline_semaphore, signal_semaphore}; + const std::array signal_semaphores{handle, signal_semaphore}; const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U; const std::array wait_values{signal_value - 1, u64(1)}; - const std::array wait_semaphores{timeline_semaphore, wait_semaphore}; + const std::array wait_semaphores{handle, wait_semaphore}; static constexpr std::array wait_stage_masks = { vk::PipelineStageFlagBits::eAllCommands, diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index b46a41d50..e9a3ae9eb 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -663,16 +663,6 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest, render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers); - - const vk::MemoryBarrier memory_write_barrier = { - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - - render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, - memory_write_barrier, {}, {}); }); return true; @@ -790,17 +780,6 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest, render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers); - - const vk::MemoryBarrier memory_write_barrier = { - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - - render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, - memory_write_barrier, {}, {}); - }); return true; @@ -983,16 +962,6 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier); - - const vk::MemoryBarrier memory_write_barrier = { - .srcAccessMask = vk::AccessFlagBits::eMemoryWrite, - .dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite, - }; - - render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, - vk::PipelineStageFlagBits::eAllCommands, - vk::DependencyFlagBits::eByRegion, - memory_write_barrier, {}, {}); }); runtime.upload_buffer.Commit(staging.size);