diff --git a/src/citra_qt/applesurfacehelper.mm b/src/citra_qt/applesurfacehelper.mm index f67e0e230..a6fc11b39 100644 --- a/src/citra_qt/applesurfacehelper.mm +++ b/src/citra_qt/applesurfacehelper.mm @@ -13,4 +13,4 @@ void* GetSurfaceLayer(void* surface) { return view.layer; } -} // AppleSurfaceHelper +} // namespace AppleSurfaceHelper diff --git a/src/citra_qt/configuration/config.cpp b/src/citra_qt/configuration/config.cpp index 4de8281e7..441cd96b4 100644 --- a/src/citra_qt/configuration/config.cpp +++ b/src/citra_qt/configuration/config.cpp @@ -605,6 +605,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.physical_device); ReadGlobalSetting(Settings::values.async_command_recording); + ReadGlobalSetting(Settings::values.async_shader_compilation); ReadGlobalSetting(Settings::values.spirv_shader_gen); ReadGlobalSetting(Settings::values.graphics_api); ReadGlobalSetting(Settings::values.use_hw_renderer); @@ -1088,6 +1089,7 @@ void Config::SaveRendererValues() { WriteGlobalSetting(Settings::values.graphics_api); WriteGlobalSetting(Settings::values.physical_device); WriteGlobalSetting(Settings::values.async_command_recording); + WriteGlobalSetting(Settings::values.async_shader_compilation); WriteGlobalSetting(Settings::values.spirv_shader_gen); WriteGlobalSetting(Settings::values.use_hw_renderer); WriteGlobalSetting(Settings::values.use_hw_shader); diff --git a/src/citra_qt/configuration/configure_graphics.cpp b/src/citra_qt/configuration/configure_graphics.cpp index 3b04877ba..9d48f5eee 100644 --- a/src/citra_qt/configuration/configure_graphics.cpp +++ b/src/citra_qt/configuration/configure_graphics.cpp @@ -89,6 +89,7 @@ void ConfigureGraphics::SetConfiguration() { static_cast(Settings::values.physical_device.GetValue())); ui->toggle_async_recording->setChecked(Settings::values.async_command_recording.GetValue()); ui->spirv_shader_gen->setChecked(Settings::values.spirv_shader_gen.GetValue()); + ui->toggle_async_shaders->setChecked(Settings::values.async_shader_compilation.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue()); @@ -114,6 +115,8 @@ void ConfigureGraphics::ApplyConfiguration() { ui->physical_device_combo); ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_command_recording, ui->toggle_async_recording, async_command_recording); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_shader_compilation, + ui->toggle_async_shaders, async_shader_compilation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.spirv_shader_gen, ui->spirv_shader_gen, spirv_shader_gen); @@ -136,6 +139,8 @@ void ConfigureGraphics::SetupPerGameUI() { ui->toggle_disk_shader_cache->setEnabled( Settings::values.use_disk_shader_cache.UsingGlobal()); ui->toggle_vsync_new->setEnabled(Settings::values.use_vsync_new.UsingGlobal()); + ui->toggle_async_shaders->setEnabled( + Settings::values.async_shader_compilation.UsingGlobal()); return; } @@ -154,6 +159,9 @@ void ConfigureGraphics::SetupPerGameUI() { use_disk_shader_cache); ConfigurationShared::SetColoredTristate(ui->toggle_vsync_new, Settings::values.use_vsync_new, use_vsync_new); + ConfigurationShared::SetColoredTristate(ui->toggle_async_shaders, + Settings::values.async_shader_compilation, + async_shader_compilation); } void ConfigureGraphics::DiscoverPhysicalDevices() { diff --git a/src/citra_qt/configuration/configure_graphics.h b/src/citra_qt/configuration/configure_graphics.h index f8783ee90..6530f1779 100644 --- a/src/citra_qt/configuration/configure_graphics.h +++ b/src/citra_qt/configuration/configure_graphics.h @@ -41,6 +41,7 @@ private: ConfigurationShared::CheckState use_disk_shader_cache; ConfigurationShared::CheckState use_vsync_new; ConfigurationShared::CheckState async_command_recording; + ConfigurationShared::CheckState async_shader_compilation; ConfigurationShared::CheckState spirv_shader_gen; std::unique_ptr ui; QColor bg_color; diff --git a/src/citra_qt/configuration/configure_graphics.ui b/src/citra_qt/configuration/configure_graphics.ui index 313cb82b1..78055787e 100644 --- a/src/citra_qt/configuration/configure_graphics.ui +++ b/src/citra_qt/configuration/configure_graphics.ui @@ -7,7 +7,7 @@ 0 0 400 - 513 + 579 @@ -169,6 +169,13 @@ + + + + Async Shader Compilation + + + diff --git a/src/citra_qt/macos_authorization.mm b/src/citra_qt/macos_authorization.mm index 465b13085..cca44866c 100644 --- a/src/citra_qt/macos_authorization.mm +++ b/src/citra_qt/macos_authorization.mm @@ -90,4 +90,4 @@ bool CheckAuthorizationForMicrophone() { return authorized_microphone; } -} // AppleAuthorization +} // namespace AppleAuthorization diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 468363cc9..590098389 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -60,6 +60,7 @@ add_library(common STATIC announce_multiplayer_room.h archives.h assert.h + async_handle.h atomic_ops.h detached_tasks.cpp detached_tasks.h diff --git a/src/common/async_handle.h b/src/common/async_handle.h new file mode 100644 index 000000000..d7d540030 --- /dev/null +++ b/src/common/async_handle.h @@ -0,0 +1,37 @@ +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +namespace Common { + +struct AsyncHandle { +public: + [[nodiscard]] bool IsBuilt() noexcept { + return is_built.load(std::memory_order::relaxed); + } + + void WaitBuilt() noexcept { + std::unique_lock lock{mutex}; + condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); }); + } + + void MarkBuilt() noexcept { + std::scoped_lock lock{mutex}; + is_built = true; + condvar.notify_all(); + } + +private: + std::condition_variable condvar; + std::mutex mutex; + std::atomic_bool is_built{false}; +}; + +} // namespace Common diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 49731cc09..862c96f77 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -107,6 +107,7 @@ void LogSettings() { log_setting("Core_CPUClockPercentage", values.cpu_clock_percentage.GetValue()); log_setting("Renderer_GraphicsAPI", GetAPIName(values.graphics_api.GetValue())); log_setting("Renderer_AsyncRecording", values.async_command_recording.GetValue()); + log_setting("Renderer_AsyncShaders", values.async_shader_compilation.GetValue()); log_setting("Renderer_SpirvShaderGen", values.spirv_shader_gen.GetValue()); log_setting("Renderer_Debug", values.renderer_debug.GetValue()); log_setting("Renderer_UseHwRenderer", values.use_hw_renderer.GetValue()); @@ -193,6 +194,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_hw_renderer.SetGlobal(true); values.use_hw_shader.SetGlobal(true); values.separable_shader.SetGlobal(true); + values.async_shader_compilation.SetGlobal(true); values.use_disk_shader_cache.SetGlobal(true); values.shaders_accurate_mul.SetGlobal(true); values.use_vsync_new.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 12aa11ae5..7540974ea 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -448,9 +448,10 @@ struct Values { // Renderer SwitchableSetting graphics_api{GraphicsAPI::OpenGL, "graphics_api"}; SwitchableSetting physical_device{0, "physical_device"}; - SwitchableSetting spirv_shader_gen{true, "spirv_shader_gen"}; Setting renderer_debug{false, "renderer_debug"}; Setting dump_command_buffers{false, "dump_command_buffers"}; + SwitchableSetting spirv_shader_gen{true, "spirv_shader_gen"}; + SwitchableSetting async_shader_compilation{false, "async_shader_compilation"}; SwitchableSetting async_command_recording{true, "async_command_recording"}; SwitchableSetting use_hw_renderer{true, "use_hw_renderer"}; SwitchableSetting use_hw_shader{true, "use_hw_shader"}; diff --git a/src/common/telemetry.h b/src/common/telemetry.h index ba5f0c87d..f69752e76 100644 --- a/src/common/telemetry.h +++ b/src/common/telemetry.h @@ -8,6 +8,7 @@ #include #include #include +#include #include "common/common_types.h" namespace Common::Telemetry { @@ -52,8 +53,8 @@ public: template class Field : public FieldInterface { public: - Field(FieldType type, std::string name, T value) - : name(std::move(name)), type(type), value(std::move(value)) {} + Field(FieldType type, std::string_view name, T value) + : name(name), type(type), value(std::move(value)) {} Field(const Field&) = default; Field& operator=(const Field&) = default; @@ -115,7 +116,7 @@ public: * @param value Value for the field to add. */ template - void AddField(FieldType type, const char* name, T value) { + void AddField(FieldType type, std::string_view name, T value) { return AddField(std::make_unique>(type, name, std::move(value))); } diff --git a/src/common/thread_worker.h b/src/common/thread_worker.h index 260ad44e4..a7dfc379c 100644 --- a/src/common/thread_worker.h +++ b/src/common/thread_worker.h @@ -34,10 +34,10 @@ class StatefulThreadWorker { using StateMaker = std::conditional_t, DummyCallable>; public: - explicit StatefulThreadWorker(size_t num_workers, std::string name, StateMaker func = {}) - : workers_queued{num_workers}, thread_name{std::move(name)} { + explicit StatefulThreadWorker(size_t num_workers, std::string_view name, StateMaker func = {}) + : workers_queued{num_workers}, thread_name{name} { const auto lambda = [this, func](std::stop_token stop_token) { - Common::SetCurrentThreadName(thread_name.c_str()); + Common::SetCurrentThreadName(thread_name.data()); { [[maybe_unused]] std::conditional_t state{func()}; while (!stop_token.stop_requested()) { @@ -108,7 +108,7 @@ private: std::atomic work_done{}; std::atomic workers_stopped{}; std::atomic workers_queued{}; - std::string thread_name; + std::string_view thread_name; std::vector threads; }; diff --git a/src/core/telemetry_session.h b/src/core/telemetry_session.h index f64dd2a64..cc312d708 100644 --- a/src/core/telemetry_session.h +++ b/src/core/telemetry_session.h @@ -6,6 +6,7 @@ #include #include +#include #include "common/telemetry.h" namespace Loader { @@ -53,7 +54,7 @@ public: * @param value Value for the field to add. */ template - void AddField(Common::Telemetry::FieldType type, const char* name, T value) { + void AddField(Common::Telemetry::FieldType type, std::string_view name, T value) { field_collection.AddField(type, name, std::move(value)); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1b4ef49ed..5fc9d2e2b 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -126,7 +126,6 @@ add_library(video_core STATIC shader/debug_data.h shader/shader.cpp shader/shader.h - shader/shader_cache.h shader/shader_interpreter.cpp shader/shader_interpreter.h shader/shader_jit_x64.cpp diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index f342d69a6..d973e0dd2 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -12,9 +12,9 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback, +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback_, bool prefer_coherent) - : gl_target(target), buffer_size(size) { + : gl_target(target), readback(readback_), buffer_size(size) { gl_buffer.Create(); glBindBuffer(gl_target, gl_buffer.handle); diff --git a/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp b/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp index f2f3224c5..d5b1214e0 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_manager.cpp @@ -96,7 +96,7 @@ DescriptorManager::DescriptorManager(const Instance& instance, Scheduler& schedu : instance{instance}, scheduler{scheduler}, pool_provider{instance, scheduler.GetMasterSemaphore()} { BuildLayouts(); - descriptor_set_dirty.fill(true); + descriptor_set_dirty.set(); current_pool = pool_provider.Commit(); } @@ -120,14 +120,14 @@ void DescriptorManager::SetBinding(u32 set, u32 binding, DescriptorData data) { void DescriptorManager::BindDescriptorSets() { const bool is_dirty = scheduler.IsStateDirty(StateFlags::DescriptorSets); - if (is_dirty) { - descriptor_set_dirty.fill(true); + if (descriptor_set_dirty.none() && !is_dirty) { + return; } const vk::Device device = instance.GetDevice(); std::array bound_sets; for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) { - if (descriptor_set_dirty[i]) { + if (descriptor_set_dirty[i] || is_dirty) { vk::DescriptorSet set = AllocateSet(descriptor_set_layouts[i]); device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]); descriptor_sets[i] = set; @@ -141,10 +141,8 @@ void DescriptorManager::BindDescriptorSets() { {}); }); - descriptor_set_dirty.fill(false); - if (is_dirty) { - scheduler.MarkStateNonDirty(StateFlags::DescriptorSets); - } + descriptor_set_dirty.reset(); + scheduler.MarkStateNonDirty(StateFlags::DescriptorSets); } void DescriptorManager::BuildLayouts() { diff --git a/src/video_core/renderer_vulkan/vk_descriptor_manager.h b/src/video_core/renderer_vulkan/vk_descriptor_manager.h index 1206bacca..89ecd794f 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_manager.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_manager.h @@ -4,6 +4,7 @@ #pragma once +#include #include "video_core/renderer_vulkan/vk_resource_pool.h" namespace Vulkan { @@ -59,7 +60,7 @@ private: std::array update_templates; std::array update_data{}; std::array descriptor_sets{}; - std::array descriptor_set_dirty{}; + std::bitset descriptor_set_dirty{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 4b8c893a2..8e7e83f3b 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -391,13 +391,12 @@ void Instance::CreateFormatTable() { } bool Instance::CreateDevice() { - const vk::StructureChain feature_chain = - physical_device.getFeatures2(); + const vk::StructureChain feature_chain = physical_device.getFeatures2< + vk::PhysicalDeviceFeatures2, vk::PhysicalDevicePortabilitySubsetFeaturesKHR, + vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT, + vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR, + vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT, + vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>(); // Not having geometry shaders will cause issues with accelerated rendering. features = feature_chain.get().features; @@ -439,6 +438,8 @@ bool Instance::CreateDevice() { custom_border_color = AddExtension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); index_type_uint8 = AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); image_format_list = AddExtension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME); + pipeline_creation_cache_control = + AddExtension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME); // Search queue families for graphics and present queues auto family_properties = physical_device.getQueueFamilyProperties(); @@ -518,6 +519,7 @@ bool Instance::CreateDevice() { feature_chain.get(), feature_chain.get(), feature_chain.get(), + feature_chain.get(), }; if (portability_subset) { @@ -547,6 +549,10 @@ bool Instance::CreateDevice() { device_chain.unlink(); } + if (!pipeline_creation_cache_control) { + device_chain.unlink(); + } + try { device = physical_device.createDevice(device_chain.get()); } catch (vk::ExtensionNotPresentError& err) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index fa6a11758..527b316da 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -137,6 +137,11 @@ public: return image_format_list; } + /// Returns true when VK_EXT_pipeline_creation_cache_control is supported + bool IsPipelineCreationCacheControlSupported() const { + return pipeline_creation_cache_control; + } + /// Returns the vendor ID of the physical device u32 GetVendorID() const { return properties.vendorID; @@ -248,6 +253,7 @@ private: bool custom_border_color{}; bool index_type_uint8{}; bool image_format_list{}; + bool pipeline_creation_cache_control{}; bool enable_validation{}; bool dump_command_buffers{}; }; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 4fbd18777..b76cb6fe4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include "common/common_paths.h" #include "common/file_util.h" #include "common/logging/log.h" @@ -14,6 +13,8 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_gen_spv.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" namespace Vulkan { @@ -97,374 +98,72 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { return it->second; }; -PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler, - RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) - : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, - desc_manager{desc_manager} { - trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex, - instance.GetDevice(), ShaderOptimization::Debug); +PipelineCache::Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {} + +PipelineCache::Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage, + std::string code) + : Shader{instance} { + module = Compile(code, stage, instance.GetDevice(), ShaderOptimization::High); + MarkBuilt(); } -PipelineCache::~PipelineCache() { - vk::Device device = instance.GetDevice(); - - SaveDiskCache(); - - device.destroyPipelineCache(pipeline_cache); - device.destroyShaderModule(trivial_vertex_shader); - - for (auto& [key, module] : programmable_vertex_shaders.shader_cache) { +PipelineCache::Shader::~Shader() { + if (module && device) { device.destroyShaderModule(module); } - - for (auto& [key, module] : fixed_geometry_shaders.shaders) { - device.destroyShaderModule(module); - } - - for (auto& [key, module] : fragment_shaders_glsl.shaders) { - device.destroyShaderModule(module); - } - - for (auto& [key, module] : fragment_shaders_spv.shaders) { - device.destroyShaderModule(module); - } - - for (const auto& [hash, pipeline] : graphics_pipelines) { - device.destroyPipeline(pipeline); - } - - graphics_pipelines.clear(); } -void PipelineCache::LoadDiskCache() { - if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { +PipelineCache::GraphicsPipeline::GraphicsPipeline( + const Instance& instance_, RenderpassCache& renderpass_cache_, const PipelineInfo& info_, + vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, std::array stages_, + Common::ThreadWorker* worker_) + : instance{instance_}, worker{worker_}, pipeline_layout{layout_}, + pipeline_cache{pipeline_cache_}, info{info_}, stages{stages_}, + renderpass{ + renderpass_cache_.GetRenderpass(info.color_attachment, info.depth_attachment, false)} { + + // Ask the driver if it can give us the pipeline quickly + if (Build(true)) { return; } - const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), - instance.GetVendorID(), instance.GetDeviceID()); - vk::PipelineCacheCreateInfo cache_info = { - .initialDataSize = 0, - .pInitialData = nullptr, - }; - - std::vector cache_data; - FileUtil::IOFile cache_file{cache_file_path, "r"}; - if (cache_file.IsOpen()) { - LOG_INFO(Render_Vulkan, "Loading pipeline cache"); - - const u64 cache_file_size = cache_file.GetSize(); - cache_data.resize(cache_file_size); - if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) { - if (!IsCacheValid(cache_data.data(), cache_file_size)) { - LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring"); - } else { - cache_info.initialDataSize = cache_file_size; - cache_info.pInitialData = cache_data.data(); - } - } - - cache_file.Close(); - } - - vk::Device device = instance.GetDevice(); - pipeline_cache = device.createPipelineCache(cache_info); -} - -void PipelineCache::SaveDiskCache() { - if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { - return; - } - - const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), - instance.GetVendorID(), instance.GetDeviceID()); - FileUtil::IOFile cache_file{cache_file_path, "wb"}; - if (!cache_file.IsOpen()) { - LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing"); - return; - } - - vk::Device device = instance.GetDevice(); - auto cache_data = device.getPipelineCacheData(pipeline_cache); - if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) { - LOG_WARNING(Render_Vulkan, "Error during pipeline cache write"); - return; - } - - cache_file.Close(); -} - -void PipelineCache::BindPipeline(const PipelineInfo& info) { - ApplyDynamic(info); - - scheduler.Record([this, info](vk::CommandBuffer cmdbuf) { - std::size_t shader_hash = 0; - for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { - shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); - } - - const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() - ? offsetof(PipelineInfo, rasterization) - : offsetof(PipelineInfo, dynamic); - - u64 info_hash = Common::ComputeHash64(&info, info_hash_size); - u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); - - auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{}); - if (new_pipeline) { - it->second = BuildPipeline(info); - } - - cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second); - current_pipeline = it->second; - }); - - desc_manager.BindDescriptorSets(); -} - -bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, - Pica::Shader::ShaderSetup& setup, - const VertexLayout& layout) { - PicaVSConfig config{regs.rasterizer, regs.vs, setup}; - config.state.use_geometry_shader = instance.UseGeometryShaders(); - - u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES; - for (u32 i = 0; i < layout.attribute_count; i++) { - const auto& attrib = layout.attributes[i]; - const u32 location = attrib.location.Value(); - const bool is_supported = IsAttribFormatSupported(attrib, instance); - ASSERT(is_supported || attrib.size == 3); - - config.state.attrib_types[location] = attrib.type.Value(); - config.state.emulated_attrib_locations[location] = is_supported ? 0 : emulated_attrib_loc++; - } - - auto [handle, result] = - programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex, - instance.GetDevice(), ShaderOptimization::High); - if (!handle) { - LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); - return false; - } - - scheduler.Record([this, handle = handle, hash = config.Hash()](vk::CommandBuffer) { - current_shaders[ProgramType::VS] = handle; - shader_hashes[ProgramType::VS] = hash; - }); - - return true; -} - -void PipelineCache::UseTrivialVertexShader() { - scheduler.Record([this](vk::CommandBuffer) { - current_shaders[ProgramType::VS] = trivial_vertex_shader; - shader_hashes[ProgramType::VS] = 0; - }); -} - -void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { - if (!instance.UseGeometryShaders()) { - return UseTrivialGeometryShader(); - } - - const PicaFixedGSConfig gs_config{regs}; - const vk::ShaderModule handle = - fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, - instance.GetDevice(), ShaderOptimization::Debug); - - scheduler.Record([this, handle, hash = gs_config.Hash()](vk::CommandBuffer) { - current_shaders[ProgramType::GS] = handle; - shader_hashes[ProgramType::GS] = hash; - }); -} - -void PipelineCache::UseTrivialGeometryShader() { - scheduler.Record([this](vk::CommandBuffer) { - current_shaders[ProgramType::GS] = VK_NULL_HANDLE; - shader_hashes[ProgramType::GS] = 0; - }); -} - -MICROPROFILE_DEFINE(Vulkan_FragmentGeneration, "Vulkan", "Fragment Shader Compilation", - MP_RGB(255, 100, 100)); -void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { - const PicaFSConfig config{regs, instance}; - - scheduler.Record([this, config](vk::CommandBuffer) { - MICROPROFILE_SCOPE(Vulkan_FragmentGeneration); - - vk::ShaderModule handle{}; - if (Settings::values.spirv_shader_gen) { - handle = fragment_shaders_spv.Get(config, instance.GetDevice()); - } else { - handle = fragment_shaders_glsl.Get(config, vk::ShaderStageFlagBits::eFragment, - instance.GetDevice(), ShaderOptimization::Debug); - } - - current_shaders[ProgramType::FS] = handle; - shader_hashes[ProgramType::FS] = config.Hash(); - }); -} - -void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { - const vk::DescriptorImageInfo image_info = { - .imageView = image_view, - .imageLayout = vk::ImageLayout::eGeneral, - }; - desc_manager.SetBinding(1, binding, DescriptorData{image_info}); -} - -void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { - const vk::DescriptorImageInfo image_info = { - .imageView = image_view, - .imageLayout = vk::ImageLayout::eGeneral, - }; - desc_manager.SetBinding(3, binding, DescriptorData{image_info}); -} - -void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { - const DescriptorData data = { - .buffer_info = - vk::DescriptorBufferInfo{ - .buffer = buffer, - .offset = offset, - .range = size, - }, - }; - desc_manager.SetBinding(0, binding, data); -} - -void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { - const DescriptorData data = { - .buffer_view = buffer_view, - }; - desc_manager.SetBinding(0, binding, data); -} - -void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) { - const DescriptorData data = { - .image_info = - vk::DescriptorImageInfo{ - .sampler = sampler, - }, - }; - desc_manager.SetBinding(2, binding, data); -} - -void PipelineCache::SetViewport(float x, float y, float width, float height) { - const vk::Viewport viewport{x, y, width, height, 0.f, 1.f}; - scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.setViewport(0, viewport); }); -} - -void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) { - const vk::Rect2D scissor{{x, y}, {width, height}}; - scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.setScissor(0, scissor); }); -} - -void PipelineCache::ApplyDynamic(const PipelineInfo& info) { - const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); - - PipelineInfo current = current_info; - scheduler.Record([this, info, is_dirty, current](vk::CommandBuffer cmdbuf) { - if (info.dynamic.stencil_compare_mask != current.dynamic.stencil_compare_mask || is_dirty) { - cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, - info.dynamic.stencil_compare_mask); - } - - if (info.dynamic.stencil_write_mask != current.dynamic.stencil_write_mask || is_dirty) { - cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, - info.dynamic.stencil_write_mask); - } - - if (info.dynamic.stencil_reference != current.dynamic.stencil_reference || is_dirty) { - cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, - info.dynamic.stencil_reference); - } - - if (info.dynamic.blend_color != current.dynamic.blend_color || is_dirty) { - const Common::Vec4f color = PicaToVK::ColorRGBA8(info.dynamic.blend_color); - cmdbuf.setBlendConstants(color.AsArray()); - } - - if (instance.IsExtendedDynamicStateSupported()) { - if (info.rasterization.cull_mode != current.rasterization.cull_mode || is_dirty) { - cmdbuf.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode)); - cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode)); - } - - if (info.depth_stencil.depth_compare_op != current.depth_stencil.depth_compare_op || - is_dirty) { - cmdbuf.setDepthCompareOpEXT( - PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op)); - } - - if (info.depth_stencil.depth_test_enable != current.depth_stencil.depth_test_enable || - is_dirty) { - cmdbuf.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable); - } - - if (info.depth_stencil.depth_write_enable != current.depth_stencil.depth_write_enable || - is_dirty) { - cmdbuf.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable); - } - - if (info.rasterization.topology != current.rasterization.topology || is_dirty) { - cmdbuf.setPrimitiveTopologyEXT( - PicaToVK::PrimitiveTopology(info.rasterization.topology)); - } - - if (info.depth_stencil.stencil_test_enable != - current.depth_stencil.stencil_test_enable || - is_dirty) { - cmdbuf.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable); - } - - if (info.depth_stencil.stencil_fail_op != current.depth_stencil.stencil_fail_op || - info.depth_stencil.stencil_pass_op != current.depth_stencil.stencil_pass_op || - info.depth_stencil.stencil_depth_fail_op != - current.depth_stencil.stencil_depth_fail_op || - info.depth_stencil.stencil_compare_op != current.depth_stencil.stencil_compare_op || - is_dirty) { - cmdbuf.setStencilOpEXT( - vk::StencilFaceFlagBits::eFrontAndBack, - PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op), - PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op), - PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op), - PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op)); - } - } - }); - - current_info = info; - if (is_dirty) { - scheduler.MarkStateNonDirty(StateFlags::Pipeline); + // Fallback to (a)synchronous compilation + if (worker) { + worker->QueueWork([this] { Build(); }); + } else { + Build(); } } -vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { +PipelineCache::GraphicsPipeline::~GraphicsPipeline() { + if (pipeline) { + instance.GetDevice().destroyPipeline(pipeline); + } +} + +MICROPROFILE_DEFINE(Vulkan_Pipeline, "Vulkan", "Pipeline Building", MP_RGB(0, 192, 32)); +bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { + if (fail_on_compile_required) { + if (!instance.IsPipelineCreationCacheControlSupported()) { + return false; + } + + // Check if all shader modules are ready + bool shaders_ready = true; + for (Shader* shader : stages) { + if (shader) { + shaders_ready &= shader->IsBuilt(); + } + } + + if (!shaders_ready) { + return false; + } + } + + MICROPROFILE_SCOPE(Vulkan_Pipeline); const vk::Device device = instance.GetDevice(); - u32 shader_count = 0; - std::array shader_stages; - for (std::size_t i = 0; i < current_shaders.size(); i++) { - const vk::ShaderModule shader = current_shaders[i]; - if (!shader) { - continue; - } - - shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ - .stage = ToVkShaderStage(i), - .module = shader, - .pName = "main", - }; - } - - /** - * Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and - * increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE - * as the input rate. Since one instance is all we render, the shader will always read the - * single attribute. - **/ std::array bindings; for (u32 i = 0; i < info.vertex_layout.binding_count; i++) { const auto& binding = info.vertex_layout.bindings[i]; @@ -615,7 +314,45 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { .back = stencil_op_state, }; + u32 shader_count = 0; + std::array shader_stages; + for (std::size_t i = 0; i < stages.size(); i++) { + Shader* shader = stages[i]; + if (!shader) { + continue; + } + + shader->WaitBuilt(); + shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ + .stage = ToVkShaderStage(i), + .module = shader->Handle(), + .pName = "main", + }; + } + + std::array creation_stage_feedback; + for (u32 i = 0; i < shader_count; i++) { + creation_stage_feedback[i] = vk::PipelineCreationFeedbackEXT{ + .flags = vk::PipelineCreationFeedbackFlagBits::eValid, + .duration = 0, + }; + } + + vk::PipelineCreationFeedbackEXT creation_feedback = { + .flags = vk::PipelineCreationFeedbackFlagBits::eValid, + }; + + const vk::PipelineCreationFeedbackCreateInfoEXT creation_feedback_info = { + .pPipelineCreationFeedback = &creation_feedback, + .pipelineStageCreationFeedbackCount = shader_count, + .pPipelineStageCreationFeedbacks = creation_stage_feedback.data(), + }; + const vk::GraphicsPipelineCreateInfo pipeline_info = { + .pNext = fail_on_compile_required ? &creation_feedback_info : nullptr, + .flags = fail_on_compile_required + ? vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT + : vk::PipelineCreateFlags{}, .stageCount = shader_count, .pStages = shader_stages.data(), .pVertexInputState = &vertex_input_info, @@ -626,20 +363,397 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) { .pDepthStencilState = &depth_info, .pColorBlendState = &color_blending, .pDynamicState = &dynamic_info, - .layout = desc_manager.GetPipelineLayout(), - .renderPass = - renderpass_cache.GetRenderpass(info.color_attachment, info.depth_attachment, false), + .layout = pipeline_layout, + .renderPass = renderpass, }; - if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info); - result.result == vk::Result::eSuccess) { - return result.value; + const vk::ResultValue result = device.createGraphicsPipeline(pipeline_cache, pipeline_info); + if (result.result == vk::Result::eSuccess) { + pipeline = result.value; + } else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) { + return false; } else { LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!"); UNREACHABLE(); } - return VK_NULL_HANDLE; + MarkBuilt(); + return true; +} + +PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler, + RenderpassCache& renderpass_cache, DescriptorManager& desc_manager) + : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, + desc_manager{desc_manager}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1, + "Pipeline builder"}, + trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex, + GenerateTrivialVertexShader()} {} + +PipelineCache::~PipelineCache() { + vk::Device device = instance.GetDevice(); + + SaveDiskCache(); + device.destroyPipelineCache(pipeline_cache); +} + +void PipelineCache::LoadDiskCache() { + if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { + return; + } + + const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), + instance.GetVendorID(), instance.GetDeviceID()); + vk::PipelineCacheCreateInfo cache_info = { + .initialDataSize = 0, + .pInitialData = nullptr, + }; + + std::vector cache_data; + FileUtil::IOFile cache_file{cache_file_path, "r"}; + if (cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Loading pipeline cache"); + + const u64 cache_file_size = cache_file.GetSize(); + cache_data.resize(cache_file_size); + if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) { + if (!IsCacheValid(cache_data.data(), cache_file_size)) { + LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring"); + } else { + cache_info.initialDataSize = cache_file_size; + cache_info.pInitialData = cache_data.data(); + } + } + + cache_file.Close(); + } + + vk::Device device = instance.GetDevice(); + pipeline_cache = device.createPipelineCache(cache_info); +} + +void PipelineCache::SaveDiskCache() { + if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) { + return; + } + + const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(), + instance.GetVendorID(), instance.GetDeviceID()); + FileUtil::IOFile cache_file{cache_file_path, "wb"}; + if (!cache_file.IsOpen()) { + LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing"); + return; + } + + vk::Device device = instance.GetDevice(); + auto cache_data = device.getPipelineCacheData(pipeline_cache); + if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) { + LOG_WARNING(Render_Vulkan, "Error during pipeline cache write"); + return; + } + + cache_file.Close(); +} + +MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32)); +bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) { + MICROPROFILE_SCOPE(Vulkan_Bind); + std::size_t shader_hash = 0; + for (u32 i = 0; i < MAX_SHADER_STAGES; i++) { + shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]); + } + + const u64 info_hash_size = instance.IsExtendedDynamicStateSupported() + ? offsetof(PipelineInfo, rasterization) + : offsetof(PipelineInfo, dynamic); + + u64 info_hash = Common::ComputeHash64(&info, info_hash_size); + u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash); + + auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash); + if (new_pipeline) { + it->second = std::make_unique( + instance, renderpass_cache, info, pipeline_cache, desc_manager.GetPipelineLayout(), + current_shaders, &workers); + } + + GraphicsPipeline* const pipeline{it->second.get()}; + if (!wait_built && !pipeline->IsBuilt()) { + return false; + } + + const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline); + ApplyDynamic(info, is_dirty); + + if (current_pipeline != pipeline || is_dirty) { + if (!pipeline->IsBuilt()) { + scheduler.Record([pipeline](vk::CommandBuffer) { pipeline->WaitBuilt(); }); + } + + scheduler.Record([pipeline](vk::CommandBuffer cmdbuf) { + cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle()); + }); + + current_pipeline = pipeline; + } + + desc_manager.BindDescriptorSets(); + scheduler.MarkStateNonDirty(StateFlags::Pipeline); + + return true; +} + +bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, + Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout) { + PicaVSConfig config{regs.rasterizer, regs.vs, setup}; + config.state.use_geometry_shader = instance.UseGeometryShaders(); + + u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES; + for (u32 i = 0; i < layout.attribute_count; i++) { + const auto& attrib = layout.attributes[i]; + const u32 location = attrib.location.Value(); + const bool is_supported = IsAttribFormatSupported(attrib, instance); + ASSERT(is_supported || attrib.size == 3); + + config.state.attrib_types[location] = attrib.type.Value(); + config.state.emulated_attrib_locations[location] = is_supported ? 0 : emulated_attrib_loc++; + } + + auto [it, new_config] = programmable_vertex_map.try_emplace(config); + if (new_config) { + auto code = GenerateVertexShader(setup, config); + if (!code) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); + programmable_vertex_map[config] = nullptr; + return false; + } + + std::string& program = code.value(); + auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance); + auto& shader = iter->second; + + if (new_program) { + shader.program = std::move(program); + const vk::Device device = instance.GetDevice(); + + workers.QueueWork([device, &shader] { + shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device, + ShaderOptimization::High); + shader.MarkBuilt(); + }); + } + + it->second = &shader; + } + + Shader* const shader{it->second}; + if (!shader) { + LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader"); + return false; + } + + current_shaders[ProgramType::VS] = shader; + shader_hashes[ProgramType::VS] = config.Hash(); + + return true; +} + +void PipelineCache::UseTrivialVertexShader() { + current_shaders[ProgramType::VS] = &trivial_vertex_shader; + shader_hashes[ProgramType::VS] = 0; +} + +bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { + if (!instance.UseGeometryShaders()) { + UseTrivialGeometryShader(); + return true; + } + + const PicaFixedGSConfig gs_config{regs}; + auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance); + auto& shader = it->second; + + if (new_shader) { + const vk::Device device = instance.GetDevice(); + workers.QueueWork([gs_config, device, &shader]() { + const std::string code = GenerateFixedGeometryShader(gs_config); + shader.module = + Compile(code, vk::ShaderStageFlagBits::eGeometry, device, ShaderOptimization::High); + shader.MarkBuilt(); + }); + } + + current_shaders[ProgramType::GS] = &shader; + shader_hashes[ProgramType::GS] = gs_config.Hash(); + + return true; +} + +void PipelineCache::UseTrivialGeometryShader() { + current_shaders[ProgramType::GS] = nullptr; + shader_hashes[ProgramType::GS] = 0; +} + +void PipelineCache::UseFragmentShader(const Pica::Regs& regs) { + const PicaFSConfig config{regs, instance}; + + auto [it, new_shader] = fragment_shaders.try_emplace(config, instance); + auto& shader = it->second; + + if (new_shader) { + const bool emit_spirv = Settings::values.spirv_shader_gen.GetValue(); + const vk::Device device = instance.GetDevice(); + + workers.QueueWork([config, device, emit_spirv, &shader]() { + if (emit_spirv) { + const std::vector code = GenerateFragmentShaderSPV(config); + shader.module = CompileSPV(code, device); + } else { + const std::string code = GenerateFragmentShader(config); + shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device, + ShaderOptimization::High); + } + shader.MarkBuilt(); + }); + } + + current_shaders[ProgramType::FS] = &shader; + shader_hashes[ProgramType::FS] = config.Hash(); +} + +void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) { + const vk::DescriptorImageInfo image_info = { + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral, + }; + desc_manager.SetBinding(1, binding, DescriptorData{image_info}); +} + +void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) { + const vk::DescriptorImageInfo image_info = { + .imageView = image_view, + .imageLayout = vk::ImageLayout::eGeneral, + }; + desc_manager.SetBinding(3, binding, DescriptorData{image_info}); +} + +void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) { + const DescriptorData data = { + .buffer_info = + vk::DescriptorBufferInfo{ + .buffer = buffer, + .offset = offset, + .range = size, + }, + }; + desc_manager.SetBinding(0, binding, data); +} + +void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) { + const DescriptorData data = { + .buffer_view = buffer_view, + }; + desc_manager.SetBinding(0, binding, data); +} + +void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) { + const DescriptorData data = { + .image_info = + vk::DescriptorImageInfo{ + .sampler = sampler, + }, + }; + desc_manager.SetBinding(2, binding, data); +} + +void PipelineCache::SetViewport(float x, float y, float width, float height) { + const vk::Viewport viewport{x, y, width, height, 0.f, 1.f}; + scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.setViewport(0, viewport); }); +} + +void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) { + const vk::Rect2D scissor{{x, y}, {width, height}}; + scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.setScissor(0, scissor); }); +} + +void PipelineCache::ApplyDynamic(const PipelineInfo& info, bool is_dirty) { + if (!is_dirty && info.dynamic == current_info.dynamic && + info.rasterization.value == current_info.rasterization.value && + info.depth_stencil.value == current_info.depth_stencil.value) { + return; + } + + scheduler.Record([this, is_dirty, current_dynamic = current_info.dynamic, + current_rasterization = current_info.rasterization, + current_depth_stencil = current_info.depth_stencil, dynamic = info.dynamic, + rasterization = info.rasterization, + depth_stencil = info.depth_stencil](vk::CommandBuffer cmdbuf) { + if (dynamic.stencil_compare_mask != current_dynamic.stencil_compare_mask || is_dirty) { + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_compare_mask); + } + + if (dynamic.stencil_write_mask != current_dynamic.stencil_write_mask || is_dirty) { + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_write_mask); + } + + if (dynamic.stencil_reference != current_dynamic.stencil_reference || is_dirty) { + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, + dynamic.stencil_reference); + } + + if (dynamic.blend_color != current_dynamic.blend_color || is_dirty) { + const Common::Vec4f color = PicaToVK::ColorRGBA8(dynamic.blend_color); + cmdbuf.setBlendConstants(color.AsArray()); + } + + if (instance.IsExtendedDynamicStateSupported()) { + if (rasterization.cull_mode != current_rasterization.cull_mode || is_dirty) { + cmdbuf.setCullModeEXT(PicaToVK::CullMode(rasterization.cull_mode)); + cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(rasterization.cull_mode)); + } + + if (depth_stencil.depth_compare_op != current_depth_stencil.depth_compare_op || + is_dirty) { + cmdbuf.setDepthCompareOpEXT(PicaToVK::CompareFunc(depth_stencil.depth_compare_op)); + } + + if (depth_stencil.depth_test_enable != current_depth_stencil.depth_test_enable || + is_dirty) { + cmdbuf.setDepthTestEnableEXT(depth_stencil.depth_test_enable); + } + + if (depth_stencil.depth_write_enable != current_depth_stencil.depth_write_enable || + is_dirty) { + cmdbuf.setDepthWriteEnableEXT(depth_stencil.depth_write_enable); + } + + if (rasterization.topology != current_rasterization.topology || is_dirty) { + cmdbuf.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(rasterization.topology)); + } + + if (depth_stencil.stencil_test_enable != current_depth_stencil.stencil_test_enable || + is_dirty) { + cmdbuf.setStencilTestEnableEXT(depth_stencil.stencil_test_enable); + } + + if (depth_stencil.stencil_fail_op != current_depth_stencil.stencil_fail_op || + depth_stencil.stencil_pass_op != current_depth_stencil.stencil_pass_op || + depth_stencil.stencil_depth_fail_op != + current_depth_stencil.stencil_depth_fail_op || + depth_stencil.stencil_compare_op != current_depth_stencil.stencil_compare_op || + is_dirty) { + cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack, + PicaToVK::StencilOp(depth_stencil.stencil_fail_op), + PicaToVK::StencilOp(depth_stencil.stencil_pass_op), + PicaToVK::StencilOp(depth_stencil.stencil_depth_fail_op), + PicaToVK::CompareFunc(depth_stencil.stencil_compare_op)); + } + } + }); + + current_info = info; } bool PipelineCache::IsCacheValid(const u8* data, u64 size) const { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 655bde8b3..dd75fceff 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -5,13 +5,17 @@ #pragma once #include +#include "common/async_handle.h" #include "common/bit_field.h" #include "common/hash.h" +#include "common/thread_worker.h" #include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/regs.h" -#include "video_core/renderer_vulkan/vk_shader_gen_spv.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/shader/shader_cache.h" +#include "video_core/renderer_vulkan/vk_common.h" +#include "video_core/renderer_vulkan/vk_shader_gen.h" + +namespace Pica { +struct Regs; +} namespace Vulkan { @@ -59,6 +63,8 @@ struct DynamicState { u8 stencil_reference; u8 stencil_compare_mask; u8 stencil_write_mask; + + auto operator<=>(const DynamicState&) const noexcept = default; }; union VertexBinding { @@ -107,21 +113,6 @@ struct PipelineInfo { } }; -/** - * Vulkan specialized PICA shader caches - */ -using ProgrammableVertexShaders = Pica::Shader::ShaderDoubleCache; - -using FixedGeometryShaders = Pica::Shader::ShaderCache; - -using FragmentShadersGLSL = - Pica::Shader::ShaderCache; - -using FragmentShadersSPV = Pica::Shader::ShaderCache; - class Instance; class Scheduler; class RenderpassCache; @@ -131,6 +122,48 @@ class DescriptorManager; * Stores a collection of rasterizer pipelines used during rendering. */ class PipelineCache { + struct Shader : public Common::AsyncHandle { + Shader(const Instance& instance); + Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code); + + ~Shader(); + + [[nodiscard]] vk::ShaderModule Handle() const noexcept { + return module; + } + + vk::ShaderModule module; + vk::Device device; + std::string program; + }; + + class GraphicsPipeline : public Common::AsyncHandle { + public: + GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache, + const PipelineInfo& info, vk::PipelineCache pipeline_cache, + vk::PipelineLayout layout, std::array stages, + Common::ThreadWorker* worker); + ~GraphicsPipeline(); + + bool Build(bool fail_on_compile_required = false); + + [[nodiscard]] vk::Pipeline Handle() const noexcept { + return pipeline; + } + + private: + const Instance& instance; + Common::ThreadWorker* worker; + + vk::Pipeline pipeline; + vk::PipelineLayout pipeline_layout; + vk::PipelineCache pipeline_cache; + + PipelineInfo info; + std::array stages; + vk::RenderPass renderpass; + }; + public: PipelineCache(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache, DescriptorManager& desc_manager); @@ -143,7 +176,7 @@ public: void SaveDiskCache(); /// Binds a pipeline using the provided information - void BindPipeline(const PipelineInfo& info); + bool BindPipeline(const PipelineInfo& info, bool wait_built = false); /// Binds a PICA decompiled vertex shader bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, @@ -153,7 +186,7 @@ public: void UseTrivialVertexShader(); /// Binds a PICA decompiled geometry shader - void UseFixedGeometryShader(const Pica::Regs& regs); + bool UseFixedGeometryShader(const Pica::Regs& regs); /// Binds a passthrough geometry shader void UseTrivialGeometryShader(); @@ -184,14 +217,11 @@ public: private: /// Applies dynamic pipeline state to the current command buffer - void ApplyDynamic(const PipelineInfo& info); + void ApplyDynamic(const PipelineInfo& info, bool is_dirty); /// Builds the rasterizer pipeline layout void BuildLayout(); - /// Builds a rasterizer pipeline using the PipelineInfo struct - vk::Pipeline BuildPipeline(const PipelineInfo& info); - /// Returns true when the disk data can be used by the current driver bool IsCacheValid(const u8* data, u64 size) const; @@ -207,22 +237,26 @@ private: RenderpassCache& renderpass_cache; DescriptorManager& desc_manager; - // Cached pipelines vk::PipelineCache pipeline_cache; - std::unordered_map> graphics_pipelines; - vk::Pipeline current_pipeline{}; + Common::ThreadWorker workers; PipelineInfo current_info{}; + GraphicsPipeline* current_pipeline{}; + std::unordered_map, Common::IdentityHash> + graphics_pipelines; - // Bound shader modules - enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 }; + enum ProgramType : u32 { + VS = 0, + GS = 2, + FS = 1, + }; - std::array current_shaders; std::array shader_hashes; - ProgrammableVertexShaders programmable_vertex_shaders; - FixedGeometryShaders fixed_geometry_shaders; - FragmentShadersGLSL fragment_shaders_glsl; - FragmentShadersSPV fragment_shaders_spv; - vk::ShaderModule trivial_vertex_shader; + std::array current_shaders; + std::unordered_map programmable_vertex_map; + std::unordered_map programmable_vertex_cache; + std::unordered_map fixed_geometry_shaders; + std::unordered_map fragment_shaders; + Shader trivial_vertex_shader; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index ca0c7680c..143f6403a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -69,7 +69,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan vk::ImageAspectFlagBits::eColor, runtime}, stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE}, texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)}, - texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} { + texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)}, + async_shaders{Settings::values.async_shader_compilation.GetValue()} { vertex_buffers.fill(stream_buffer.Handle()); @@ -355,8 +356,7 @@ bool RasterizerVulkan::SetupGeometryShader() { return false; } - pipeline_cache.UseFixedGeometryShader(regs); - return true; + return pipeline_cache.UseFixedGeometryShader(regs); } bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { @@ -400,7 +400,9 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { } pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); - pipeline_cache.BindPipeline(pipeline_info); + if (!pipeline_cache.BindPipeline(pipeline_info, !async_shaders)) { + return true; ///< Skip draw call when pipeline is not ready + } const DrawParams params = { .vertex_count = regs.pipeline.num_vertices, @@ -459,7 +461,6 @@ void RasterizerVulkan::DrawTriangles() { MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192)); bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { - MICROPROFILE_SCOPE(Vulkan_Drawing); const auto& regs = Pica::g_state.regs; const bool shadow_rendering = regs.framebuffer.IsShadowRendering(); @@ -679,6 +680,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { viewport_rect_unscaled.GetWidth() * res_scale, viewport_rect_unscaled.GetHeight() * res_scale); + MICROPROFILE_SCOPE(Vulkan_Drawing); + // Sync and bind the shader if (shader_dirty) { pipeline_cache.UseFragmentShader(regs); @@ -748,7 +751,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_info.vertex_layout = software_layout; pipeline_cache.UseTrivialVertexShader(); pipeline_cache.UseTrivialGeometryShader(); - pipeline_cache.BindPipeline(pipeline_info); + pipeline_cache.BindPipeline(pipeline_info, true); const u32 max_vertices = STREAM_BUFFER_SIZE / sizeof(HardwareVertex); const u32 batch_size = static_cast(vertex_batch.size()); @@ -785,11 +788,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface); } - static int submit_threshold = 40; + static int submit_threshold = 20; submit_threshold--; if (!submit_threshold) { - submit_threshold = 40; - scheduler.Flush(); + submit_threshold = 20; + scheduler.DispatchWork(); } return succeeded; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 73be65b81..6faf36692 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -202,6 +202,7 @@ private: u64 uniform_buffer_alignment; u64 uniform_size_aligned_vs; u64 uniform_size_aligned_fs; + bool async_shaders{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 183ce4823..c932227ba 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -4,11 +4,9 @@ #pragma once -#include #include #include "common/hash.h" #include "video_core/regs.h" -#include "video_core/regs_pipeline.h" #include "video_core/shader/shader.h" namespace Vulkan { diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp index a3cc8746c..67f5f5cc7 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp @@ -43,7 +43,7 @@ void FragmentModule::Generate() { } // Check if the fragment is outside scissor rectangle - WriteScissor(); + // WriteScissor(); // Write shader bytecode to emulate all enabled PICA lights if (config.state.lighting.enable) { diff --git a/src/video_core/renderer_vulkan/vk_shader_util.cpp b/src/video_core/renderer_vulkan/vk_shader_util.cpp index 621c1b240..e2f1c998d 100644 --- a/src/video_core/renderer_vulkan/vk_shader_util.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp @@ -6,12 +6,15 @@ #include #include #include "common/assert.h" +#include "common/literals.h" #include "common/logging/log.h" #include "common/microprofile.h" #include "video_core/renderer_vulkan/vk_shader_util.h" namespace Vulkan { +using namespace Common::Literals; + constexpr TBuiltInResource DefaultTBuiltInResource = { .maxLights = 32, .maxClipPlanes = 6, @@ -156,12 +159,16 @@ bool InitializeCompiler() { return true; } +MICROPROFILE_DEFINE(Vulkan_GLSLCompilation, "VulkanShader", "GLSL Shader Compilation", + MP_RGB(100, 255, 52)); vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device, ShaderOptimization level) { if (!InitializeCompiler()) { return VK_NULL_HANDLE; } + MICROPROFILE_SCOPE(Vulkan_GLSLCompilation); + EProfile profile = ECoreProfile; EShMessages messages = static_cast(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules); @@ -209,9 +216,11 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v options.validate = true; } else { options.disableOptimizer = false; - options.stripDebugInfo = true; + options.validate = false; + options.optimizeSize = true; } + out_code.reserve(8_KiB); glslang::GlslangToSpv(*intermediate, out_code, &logger, &options); const std::string spv_messages = logger.getAllMessages(); @@ -222,10 +231,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v return CompileSPV(out_code, device); } -MICROPROFILE_DEFINE(Vulkan_SPVCompilation, "Vulkan", "SPIR-V Shader Compilation", - MP_RGB(100, 255, 52)); vk::ShaderModule CompileSPV(std::span code, vk::Device device) { - MICROPROFILE_SCOPE(Vulkan_SPVCompilation); const vk::ShaderModuleCreateInfo shader_info = { .codeSize = code.size() * sizeof(u32), .pCode = code.data(), diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 2756adf51..3f6c819b0 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -196,6 +196,8 @@ void Swapchain::SetPresentMode() { present_mode = vk::PresentModeKHR::eMailbox; } } + + LOG_INFO(Render_Vulkan, "Using {} present mode", vk::to_string(present_mode)); } void Swapchain::SetSurfaceProperties() { @@ -217,6 +219,8 @@ void Swapchain::SetSurfaceProperties() { capabilities.maxImageCount); } + LOG_INFO(Render_Vulkan, "Using {} images", image_count); + // Prefer identity transform if possible transform = vk::SurfaceTransformFlagBitsKHR::eIdentity; if (!(capabilities.supportedTransforms & transform)) { diff --git a/src/video_core/shader/shader_cache.h b/src/video_core/shader/shader_cache.h deleted file mode 100644 index 16d6bcf82..000000000 --- a/src/video_core/shader/shader_cache.h +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include "video_core/shader/shader.h" - -namespace Pica::Shader { - -template -using ShaderCacheResult = std::pair>; - -template -class ShaderCache { -public: - ShaderCache() {} - ~ShaderCache() = default; - - /// Returns a shader handle generated from the provided config - template - auto Get(const KeyType& config, Args&&... args) { - auto [iter, new_shader] = shaders.emplace(config, ShaderType{}); - auto& shader = iter->second; - - if (new_shader) { - const auto code = CodeGenerator(config); - shader = ModuleCompiler(code, args...); - return shader; - } - - return shader; - } - - void Inject(const KeyType& key, ShaderType&& shader) { - shaders.emplace(key, std::move(shader)); - } - -public: - std::unordered_map shaders; -}; - -/** - * This is a cache designed for shaders translated from PICA shaders. The first cache matches the - * config structure like a normal cache does. On cache miss, the second cache matches the generated - * GLSL code. The configuration is like this because there might be leftover code in the PICA shader - * program buffer from the previous shader, which is hashed into the config, resulting several - * different config values from the same shader program. - */ -template -class ShaderDoubleCache { -public: - ShaderDoubleCache() = default; - ~ShaderDoubleCache() = default; - - template - auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args) - -> ShaderCacheResult { - if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) { - auto code = CodeGenerator(setup, key); - if (!code) { - shader_map[key] = nullptr; - return std::make_pair(ShaderType{}, std::nullopt); - } - - std::string& program = code.value(); - auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{}); - auto& shader = iter->second; - - if (new_shader) { - shader = ModuleCompiler(program, args...); - } - - shader_map[key] = &shader; - return std::make_pair(shader, std::move(program)); - } else { - return std::make_pair(*map_iter->second, std::nullopt); - } - } - - void Inject(const KeyType& key, std::string decomp, ShaderType&& program) { - const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first; - - auto& cached_shader = iter->second; - shader_map.insert_or_assign(key, &cached_shader); - } - -public: - std::unordered_map shader_map; - std::unordered_map shader_cache; -}; - -} // namespace Pica::Shader