vk_graphics_pipeline: Use VK_KHR_push_descriptor when available
~51% faster on Nvidia compared to previous method.
This commit is contained in:
		| @@ -16,38 +16,50 @@ | ||||
| #include "video_core/texture_cache/texture_cache.h" | ||||
| #include "video_core/texture_cache/types.h" | ||||
| #include "video_core/textures/texture.h" | ||||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| class DescriptorLayoutBuilder { | ||||
| public: | ||||
|     DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} | ||||
|     DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} | ||||
|  | ||||
|     vk::DescriptorSetLayout CreateDescriptorSetLayout() const { | ||||
|     bool CanUsePushDescriptor() const noexcept { | ||||
|         return device->IsKhrPushDescriptorSupported() && | ||||
|                num_descriptors <= device->MaxPushDescriptors(); | ||||
|     } | ||||
|  | ||||
|     vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const { | ||||
|         if (bindings.empty()) { | ||||
|             return nullptr; | ||||
|         } | ||||
|         return device->CreateDescriptorSetLayout({ | ||||
|         const VkDescriptorSetLayoutCreateFlags flags = | ||||
|             use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0; | ||||
|         return device->GetLogical().CreateDescriptorSetLayout({ | ||||
|             .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||||
|             .pNext = nullptr, | ||||
|             .flags = 0, | ||||
|             .flags = flags, | ||||
|             .bindingCount = static_cast<u32>(bindings.size()), | ||||
|             .pBindings = bindings.data(), | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout, | ||||
|                                                    VkPipelineLayout pipeline_layout) const { | ||||
|                                                    VkPipelineLayout pipeline_layout, | ||||
|                                                    bool use_push_descriptor) const { | ||||
|         if (entries.empty()) { | ||||
|             return nullptr; | ||||
|         } | ||||
|         return device->CreateDescriptorUpdateTemplateKHR({ | ||||
|         const VkDescriptorUpdateTemplateType type = | ||||
|             use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR | ||||
|                                 : VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR; | ||||
|         return device->GetLogical().CreateDescriptorUpdateTemplateKHR({ | ||||
|             .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR, | ||||
|             .pNext = nullptr, | ||||
|             .flags = 0, | ||||
|             .descriptorUpdateEntryCount = static_cast<u32>(entries.size()), | ||||
|             .pDescriptorUpdateEntries = entries.data(), | ||||
|             .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR, | ||||
|             .templateType = type, | ||||
|             .descriptorSetLayout = descriptor_set_layout, | ||||
|             .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, | ||||
|             .pipelineLayout = pipeline_layout, | ||||
| @@ -56,7 +68,7 @@ public: | ||||
|     } | ||||
|  | ||||
|     vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const { | ||||
|         return device->CreatePipelineLayout({ | ||||
|         return device->GetLogical().CreatePipelineLayout({ | ||||
|             .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, | ||||
|             .pNext = nullptr, | ||||
|             .flags = 0, | ||||
| @@ -97,14 +109,16 @@ private: | ||||
|                 .stride = sizeof(DescriptorUpdateEntry), | ||||
|             }); | ||||
|             ++binding; | ||||
|             num_descriptors += descriptors[i].count; | ||||
|             offset += sizeof(DescriptorUpdateEntry); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     const vk::Device* device{}; | ||||
|     const Device* device{}; | ||||
|     boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings; | ||||
|     boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries; | ||||
|     u32 binding{}; | ||||
|     u32 num_descriptors{}; | ||||
|     size_t offset{}; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript | ||||
|                 uniform_buffer_sizes.begin()); | ||||
|  | ||||
|     auto func{[this, &descriptor_pool, shader_notify] { | ||||
|         DescriptorLayoutBuilder builder{device.GetLogical()}; | ||||
|         DescriptorLayoutBuilder builder{device}; | ||||
|         builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT); | ||||
|  | ||||
|         descriptor_set_layout = builder.CreateDescriptorSetLayout(); | ||||
|         descriptor_set_layout = builder.CreateDescriptorSetLayout(false); | ||||
|         pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout); | ||||
|         descriptor_update_template = | ||||
|             builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout); | ||||
|             builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false); | ||||
|         descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info); | ||||
|  | ||||
|         const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ | ||||
|             .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, | ||||
|             .pNext = nullptr, | ||||
| @@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | ||||
|     const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||||
|     scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||||
|  | ||||
|         if (!descriptor_set_layout) { | ||||
|             return; | ||||
|         } | ||||
|   | ||||
| @@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage; | ||||
| constexpr size_t MAX_IMAGE_ELEMENTS = 64; | ||||
|  | ||||
| DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) { | ||||
|     DescriptorLayoutBuilder builder{device.GetLogical()}; | ||||
|     DescriptorLayoutBuilder builder{device}; | ||||
|     for (size_t index = 0; index < infos.size(); ++index) { | ||||
|         static constexpr std::array stages{ | ||||
|             VK_SHADER_STAGE_VERTEX_BIT, | ||||
| @@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline( | ||||
|     } | ||||
|     auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] { | ||||
|         DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; | ||||
|         descriptor_set_layout = builder.CreateDescriptorSetLayout(); | ||||
|         descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); | ||||
|  | ||||
|         uses_push_descriptor = builder.CanUsePushDescriptor(); | ||||
|         descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor); | ||||
|         if (!uses_push_descriptor) { | ||||
|             descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos); | ||||
|         } | ||||
|         const VkDescriptorSetLayout set_layout{*descriptor_set_layout}; | ||||
|         pipeline_layout = builder.CreatePipelineLayout(set_layout); | ||||
|         descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout); | ||||
|         descriptor_update_template = | ||||
|             builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor); | ||||
|  | ||||
|         const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))}; | ||||
|         Validate(); | ||||
| @@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() { | ||||
|         if (!descriptor_set_layout) { | ||||
|             return; | ||||
|         } | ||||
|         const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||||
|         const vk::Device& dev{device.GetLogical()}; | ||||
|         dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | ||||
|         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, | ||||
|                                   descriptor_set, nullptr); | ||||
|         if (uses_push_descriptor) { | ||||
|             cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout, | ||||
|                                                     0, descriptor_data); | ||||
|         } else { | ||||
|             const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; | ||||
|             const vk::Device& dev{device.GetLogical()}; | ||||
|             dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data); | ||||
|             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, | ||||
|                                       descriptor_set, nullptr); | ||||
|         } | ||||
|     }); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -148,6 +148,7 @@ private: | ||||
|     std::condition_variable build_condvar; | ||||
|     std::mutex build_mutex; | ||||
|     std::atomic_bool is_built{false}; | ||||
|     bool uses_push_descriptor{false}; | ||||
| }; | ||||
|  | ||||
| } // namespace Vulkan | ||||
|   | ||||
| @@ -767,6 +767,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|         test(khr_uniform_buffer_standard_layout, | ||||
|              VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true); | ||||
|         test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true); | ||||
|         test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true); | ||||
|         test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false); | ||||
|         test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true); | ||||
|         test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true); | ||||
| @@ -932,6 +933,16 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|             khr_workgroup_memory_explicit_layout = true; | ||||
|         } | ||||
|     } | ||||
|     if (khr_push_descriptor) { | ||||
|         VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor; | ||||
|         push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR; | ||||
|         push_descriptor.pNext = nullptr; | ||||
|  | ||||
|         physical_properties.pNext = &push_descriptor; | ||||
|         physical.GetProperties2KHR(physical_properties); | ||||
|  | ||||
|         max_push_descriptors = push_descriptor.maxPushDescriptors; | ||||
|     } | ||||
|     return extensions; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -154,6 +154,11 @@ public: | ||||
|         return guest_warp_stages & stage; | ||||
|     } | ||||
|  | ||||
|     /// Returns the maximum number of push descriptors. | ||||
|     u32 MaxPushDescriptors() const { | ||||
|         return max_push_descriptors; | ||||
|     } | ||||
|  | ||||
|     /// Returns true if formatless image load is supported. | ||||
|     bool IsFormatlessImageLoadSupported() const { | ||||
|         return is_formatless_image_load_supported; | ||||
| @@ -194,6 +199,11 @@ public: | ||||
|         return khr_spirv_1_4; | ||||
|     } | ||||
|  | ||||
|     /// Returns true if the device supports VK_KHR_push_descriptor. | ||||
|     bool IsKhrPushDescriptorSupported() const { | ||||
|         return khr_push_descriptor; | ||||
|     } | ||||
|  | ||||
|     /// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout. | ||||
|     bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const { | ||||
|         return khr_workgroup_memory_explicit_layout; | ||||
| @@ -330,6 +340,7 @@ private: | ||||
|     VkDriverIdKHR driver_id{};                  ///< Driver ID. | ||||
|     VkShaderStageFlags guest_warp_stages{};     ///< Stages where the guest warp size can be forced. | ||||
|     u64 device_access_memory{};                 ///< Total size of device local memory in bytes. | ||||
|     u32 max_push_descriptors{};                 ///< Maximum number of push descriptors | ||||
|     bool is_optimal_astc_supported{};           ///< Support for native ASTC. | ||||
|     bool is_float16_supported{};                ///< Support for float16 arithmetics. | ||||
|     bool is_warp_potentially_bigger{};          ///< Host warp size can be bigger than guest. | ||||
| @@ -345,6 +356,7 @@ private: | ||||
|     bool khr_uniform_buffer_standard_layout{};  ///< Support for scalar uniform buffer layouts. | ||||
|     bool khr_spirv_1_4{};                       ///< Support for VK_KHR_spirv_1_4. | ||||
|     bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts. | ||||
|     bool khr_push_descriptor{};                  ///< Support for VK_KHR_push_descritor. | ||||
|     bool ext_index_type_uint8{};                 ///< Support for VK_EXT_index_type_uint8. | ||||
|     bool ext_sampler_filter_minmax{};            ///< Support for VK_EXT_sampler_filter_minmax. | ||||
|     bool ext_depth_range_unrestricted{};         ///< Support for VK_EXT_depth_range_unrestricted. | ||||
|   | ||||
| @@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { | ||||
|     X(vkCmdFillBuffer); | ||||
|     X(vkCmdPipelineBarrier); | ||||
|     X(vkCmdPushConstants); | ||||
|     X(vkCmdPushDescriptorSetWithTemplateKHR); | ||||
|     X(vkCmdSetBlendConstants); | ||||
|     X(vkCmdSetDepthBias); | ||||
|     X(vkCmdSetDepthBounds); | ||||
|   | ||||
| @@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch { | ||||
|     PFN_vkBeginCommandBuffer vkBeginCommandBuffer{}; | ||||
|     PFN_vkBindBufferMemory vkBindBufferMemory{}; | ||||
|     PFN_vkBindImageMemory vkBindImageMemory{}; | ||||
|     PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdBeginQuery vkCmdBeginQuery{}; | ||||
|     PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{}; | ||||
|     PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{}; | ||||
|     PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{}; | ||||
|     PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{}; | ||||
|     PFN_vkCmdBindPipeline vkCmdBindPipeline{}; | ||||
|     PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{}; | ||||
|     PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{}; | ||||
|     PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; | ||||
|     PFN_vkCmdBlitImage vkCmdBlitImage{}; | ||||
|     PFN_vkCmdClearAttachments vkCmdClearAttachments{}; | ||||
|     PFN_vkCmdCopyBuffer vkCmdCopyBuffer{}; | ||||
| @@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch { | ||||
|     PFN_vkCmdDispatch vkCmdDispatch{}; | ||||
|     PFN_vkCmdDraw vkCmdDraw{}; | ||||
|     PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; | ||||
|     PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdEndQuery vkCmdEndQuery{}; | ||||
|     PFN_vkCmdEndRenderPass vkCmdEndRenderPass{}; | ||||
|     PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{}; | ||||
|     PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{}; | ||||
|     PFN_vkCmdFillBuffer vkCmdFillBuffer{}; | ||||
|     PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{}; | ||||
|     PFN_vkCmdPushConstants vkCmdPushConstants{}; | ||||
|     PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{}; | ||||
|     PFN_vkCmdResolveImage vkCmdResolveImage{}; | ||||
|     PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{}; | ||||
|     PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; | ||||
|     PFN_vkCmdSetDepthBias vkCmdSetDepthBias{}; | ||||
|     PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{}; | ||||
|     PFN_vkCmdSetEvent vkCmdSetEvent{}; | ||||
|     PFN_vkCmdSetScissor vkCmdSetScissor{}; | ||||
|     PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; | ||||
|     PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; | ||||
|     PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; | ||||
|     PFN_vkCmdSetViewport vkCmdSetViewport{}; | ||||
|     PFN_vkCmdWaitEvents vkCmdWaitEvents{}; | ||||
|     PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{}; | ||||
|     PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{}; | ||||
|     PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{}; | ||||
|     PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{}; | ||||
|     PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{}; | ||||
|     PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{}; | ||||
|     PFN_vkCmdSetEvent vkCmdSetEvent{}; | ||||
|     PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{}; | ||||
|     PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{}; | ||||
|     PFN_vkCmdSetScissor vkCmdSetScissor{}; | ||||
|     PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{}; | ||||
|     PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{}; | ||||
|     PFN_vkCmdSetStencilReference vkCmdSetStencilReference{}; | ||||
|     PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{}; | ||||
|     PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{}; | ||||
|     PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{}; | ||||
|     PFN_vkCmdResolveImage vkCmdResolveImage{}; | ||||
|     PFN_vkCmdSetViewport vkCmdSetViewport{}; | ||||
|     PFN_vkCmdWaitEvents vkCmdWaitEvents{}; | ||||
|     PFN_vkCreateBuffer vkCreateBuffer{}; | ||||
|     PFN_vkCreateBufferView vkCreateBufferView{}; | ||||
|     PFN_vkCreateCommandPool vkCreateCommandPool{}; | ||||
| @@ -990,6 +991,12 @@ public: | ||||
|                                      dynamic_offsets.size(), dynamic_offsets.data()); | ||||
|     } | ||||
|  | ||||
|     void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template, | ||||
|                                           VkPipelineLayout layout, u32 set, | ||||
|                                           const void* data) const noexcept { | ||||
|         dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data); | ||||
|     } | ||||
|  | ||||
|     void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept { | ||||
|         dld->vkCmdBindPipeline(handle, bind_point, pipeline); | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user