Vk Async pipeline compilation

2020-07-28 00:08:02 -04:00
parent db96034ea4
commit 6ac97405df
13 changed files with 182 additions and 20 deletions
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -382,6 +382,8 @@ bool VKDevice::Create() {

    graphics_queue = logical.GetQueue(graphics_family);
    present_queue = logical.GetQueue(present_family);
+
+    use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
    return true;
 }

--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -202,6 +202,10 @@ public:
        return reported_extensions;
    }

+    bool UseAsynchronousShaders() const {
+        return use_asynchronous_shaders;
+    }
+
    /// Checks if the physical device is suitable.
    static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);

@@ -251,6 +255,7 @@ private:
    bool ext_custom_border_color{};            ///< Support for VK_EXT_custom_border_color.
    bool ext_extended_dynamic_state{};         ///< Support for VK_EXT_extended_dynamic_state.
    bool nv_device_diagnostics_config{};       ///< Support for VK_NV_device_diagnostics_config.
+    bool use_asynchronous_shaders{};

    // Telemetry parameters
    std::string vendor_name;                      ///< Device's driver name.
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -29,7 +29,7 @@ void InnerFence::Queue() {
    }
    ASSERT(!event);

-    event = device.GetLogical().CreateEvent();
+    event = device.GetLogical().CreateNewEvent();
    ticks = scheduler.Ticks();

    scheduler.RequestOutsideRenderPassOperationContext();
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -84,9 +84,8 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
      update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
      descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
                                                                        program)},
-      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline(
-                                                                             key.renderpass_params,
-                                                                             program)} {}
+      renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)},
+      pipeline{CreatePipeline(key.renderpass_params, program)}, m_key{key} {}

 VKGraphicsPipeline::~VKGraphicsPipeline() = default;

--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@@ -54,6 +54,10 @@ public:
        return renderpass;
    }

+    const GraphicsPipelineCacheKey& GetCacheKey() {
+        return m_key;
+    }
+
 private:
    vk::DescriptorSetLayout CreateDescriptorSetLayout(
        vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@@ -82,6 +86,8 @@ private:

    VkRenderPass renderpass;
    vk::Pipeline pipeline;
+
+    const GraphicsPipelineCacheKey& m_key;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -205,7 +205,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
    return last_shaders = shaders;
 }

-VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) {
+VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
+    const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
    MICROPROFILE_SCOPE(Vulkan_PipelineCache);

    if (last_graphics_pipeline && last_graphics_key == key) {
@@ -213,11 +214,27 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineC
    }
    last_graphics_key = key;

+    if (device.UseAsynchronousShaders()) {
+        auto work = async_shaders.GetCompletedWork();
+        for (std::size_t i = 0; i < work.size(); ++i) {
+            auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey());
+            entry = std::move(work[i].pipeline);
+        }
+        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
+        if (is_cache_miss) {
+            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
+            const auto [program, bindings] = DecompileShaders(key.fixed_state);
+            async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
+                                            key.padding, key.shaders, key.fixed_state);
+        }
+        return *(last_graphics_pipeline = graphics_cache.at(key).get());
+    }
+
    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
    auto& entry = pair->second;
    if (is_cache_miss) {
        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-        const auto [program, bindings] = DecompileShaders(key);
+        const auto [program, bindings] = DecompileShaders(key.fixed_state);
        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
                                                     update_descriptor_queue, renderpass_cache, key,
                                                     bindings, program);
@@ -312,8 +329,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
 }

 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
-VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
-    const auto& fixed_state = key.fixed_state;
+VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
    auto& memory_manager = system.GPU().MemoryManager();
    const auto& gpu = system.GPU().Maxwell3D();

--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -22,6 +22,7 @@
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
@@ -152,16 +153,37 @@ public:

    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();

-    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
+    VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
+                                            VideoCommon::Shader::AsyncShaders& async_shaders);

    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);

+    const VKDevice& GetDevice() {
+        return device;
+    }
+
+    VKScheduler& GetScheduler() {
+        return scheduler;
+    }
+
+    VKDescriptorPool& GetDescriptorPool() {
+        return descriptor_pool;
+    }
+
+    VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
+        return update_descriptor_queue;
+    }
+
+    VKRenderPassCache& GetRenderpassCache() {
+        return renderpass_cache;
+    }
+
 protected:
    void OnShaderRemoval(Shader* shader) final;

 private:
    std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
-        const GraphicsPipelineCacheKey& key);
+        const FixedPipelineState& fixed_state);

    Core::System& system;
    const VKDevice& device;
@@ -177,6 +199,7 @@ private:

    GraphicsPipelineCacheKey last_graphics_key;
    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
+    std::vector<std::unique_ptr<VKGraphicsPipeline>> duplicates;

    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
        graphics_cache;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -400,8 +400,25 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
      sampler_cache(device),
      fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
-      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+      query_cache(system, *this, device, scheduler),
+      wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
    scheduler.SetQueryCache(query_cache);
+    if (device.UseAsynchronousShaders()) {
+        // Max worker threads we should allow
+        constexpr auto MAX_THREADS = 2u;
+        // Amount of threads we should reserve for other parts of yuzu
+        constexpr auto RESERVED_THREADS = 6u;
+        // Get the amount of threads we can use(this can return zero)
+        const auto cpu_thread_count =
+            std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
+        // Deduce how many "extra" threads we have to use.
+        const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
+        // Always allow at least 1 thread regardless of our settings
+        const auto max_worker_count = std::max(1u, max_threads_unused);
+        // Don't use more than MAX_THREADS
+        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
+        async_shaders.AllocateWorkers(worker_count);
+    }
 }

 RasterizerVulkan::~RasterizerVulkan() = default;
@@ -439,7 +456,13 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    key.renderpass_params = GetRenderPassParams(texceptions);
    key.padding = 0;

-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
+    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
+        // Async graphics pipeline was not ready.
+        system.GPU().TickWork();
+        return;
+    }
+
    scheduler.BindGraphicsPipeline(pipeline.GetHandle());

    const auto renderpass = pipeline.GetRenderPass();
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -32,6 +32,7 @@
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/async_shaders.h"

 namespace Core {
 class System;
@@ -136,6 +137,14 @@ public:
                           u32 pixel_stride) override;
    void SetupDirtyFlags() override;

+    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
+        return async_shaders;
+    }
+
+    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
+        return async_shaders;
+    }
+
    /// Maximum supported size that a constbuffer can have in bytes.
    static constexpr std::size_t MaxConstbufferSize = 0x10000;
    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
@@ -278,6 +287,7 @@ private:
    VKMemoryManager& memory_manager;
    StateTracker& state_tracker;
    VKScheduler& scheduler;
+    VideoCommon::Shader::AsyncShaders async_shaders;

    VKStagingBufferPool staging_pool;
    VKDescriptorPool descriptor_pool;
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
    return ShaderModule(object, handle, *dld);
 }

-Event Device::CreateEvent() const {
+Event Device::CreateNewEvent() const {
    static constexpr VkEventCreateInfo ci{
        .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
        .pNext = nullptr,
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -721,7 +721,7 @@ public:

    ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;

-    Event CreateEvent() const;
+    Event CreateNewEvent() const;

    SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;

--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -113,15 +113,38 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                     VAddr cpu_addr) {
    WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM
                                                    : AsyncShaders::Backend::OpenGL,
-                        device,
+                        &device,
                        shader_type,
                        uid,
                        std::move(code),
                        std::move(code_b),
                        main_offset,
                        compiler_settings,
-                        registry,
+                        &registry,
                        cpu_addr};
+
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push_back(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(
+    Vulkan::VKPipelineCache* pp_cache, std::vector<VkDescriptorSetLayoutBinding> bindings,
+    Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, u32 padding,
+    std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
+    Vulkan::FixedPipelineState fixed_state) {
+
+    WorkerParams params{
+        .backend = AsyncShaders::Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .bindings = bindings,
+        .program = program,
+        .renderpass_params = renderpass_params,
+        .padding = padding,
+        .shaders = shaders,
+        .fixed_state = fixed_state,
+    };
+
    std::unique_lock lock(queue_mutex);
    pending_queue.push_back(std::move(params));
    cv.notify_one();
@@ -140,6 +163,7 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
        if (!HasWorkQueued()) {
            continue;
        }
+
        // Another thread beat us, just unlock and wait for the next load
        if (pending_queue.empty()) {
            continue;
@@ -152,10 +176,11 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context

        if (work.backend == AsyncShaders::Backend::OpenGL ||
            work.backend == AsyncShaders::Backend::GLASM) {
-            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
+            VideoCommon::Shader::Registry registry = *work.registry;
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
            const auto scope = context->Acquire();
            auto program =
-                OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry);
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry);
            Result result{};
            result.backend = work.backend;
            result.cpu_address = work.cpu_address;
@@ -174,6 +199,32 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
                std::unique_lock complete_lock(completed_mutex);
                finished_work.push_back(std::move(result));
            }
+
+        } else if (work.backend == AsyncShaders::Backend::Vulkan) {
+            Vulkan::GraphicsPipelineCacheKey params_key{
+                work.renderpass_params,
+                work.padding,
+                work.shaders,
+                work.fixed_state,
+            };
+            {
+                std::unique_lock complete_lock(completed_mutex);
+
+                // Duplicate creation of pipelines leads to instability and crashing, caused by a
+                // race condition but band-aid solution is locking the making of the pipeline
+                // results in only one pipeline created at a time.
+                Result result{
+                    .backend = work.backend,
+                    .pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                        work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(),
+                        work.pp_cache->GetDescriptorPool(),
+                        work.pp_cache->GetUpdateDescriptorQueue(),
+                        work.pp_cache->GetRenderpassCache(), params_key, work.bindings,
+                        work.program),
+                };
+
+                finished_work.push_back(std::move(result));
+            }
        }
    }
 }
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -14,6 +14,10 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"

 namespace Core::Frontend {
 class EmuWindow;
@@ -24,6 +28,10 @@ namespace Tegra {
 class GPU;
 }

+namespace Vulkan {
+class VKPipelineCache;
+}
+
 namespace VideoCommon::Shader {

 class AsyncShaders {
@@ -31,6 +39,7 @@ public:
    enum class Backend {
        OpenGL,
        GLASM,
+        Vulkan,
    };

    struct ResultPrograms {
@@ -46,6 +55,7 @@ public:
        std::vector<u64> code;
        std::vector<u64> code_b;
        Tegra::Engines::ShaderType shader_type;
+        std::unique_ptr<Vulkan::VKGraphicsPipeline> pipeline;
    };

    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
@@ -76,6 +86,13 @@ public:
                           VideoCommon::Shader::CompilerSettings compiler_settings,
                           const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);

+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                           std::vector<VkDescriptorSetLayoutBinding> bindings,
+                           Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params,
+                           u32 padding,
+                           std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
+                           Vulkan::FixedPipelineState fixed_state);
+
 private:
    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);

@@ -84,15 +101,25 @@ private:

    struct WorkerParams {
        AsyncShaders::Backend backend;
-        OpenGL::Device device;
+        // For OGL
+        const OpenGL::Device* device;
        Tegra::Engines::ShaderType shader_type;
        u64 uid;
        std::vector<u64> code;
        std::vector<u64> code_b;
        u32 main_offset;
        VideoCommon::Shader::CompilerSettings compiler_settings;
-        VideoCommon::Shader::Registry registry;
+        const VideoCommon::Shader::Registry* registry;
        VAddr cpu_address;
+
+        // For Vulkan
+        Vulkan::VKPipelineCache* pp_cache;
+        std::vector<VkDescriptorSetLayoutBinding> bindings;
+        Vulkan::SPIRVProgram program;
+        Vulkan::RenderPassParams renderpass_params;
+        u32 padding;
+        std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders;
+        Vulkan::FixedPipelineState fixed_state;
    };

    std::condition_variable cv;