renderer_vulkan: Async shaders

This commit is contained in:
GPUCode
2023-01-13 13:40:18 +02:00
parent 131129062b
commit 4752818920
28 changed files with 676 additions and 541 deletions

View File

@ -13,4 +13,4 @@ void* GetSurfaceLayer(void* surface) {
return view.layer;
}
} // AppleSurfaceHelper
} // namespace AppleSurfaceHelper

View File

@ -605,6 +605,7 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.physical_device);
ReadGlobalSetting(Settings::values.async_command_recording);
ReadGlobalSetting(Settings::values.async_shader_compilation);
ReadGlobalSetting(Settings::values.spirv_shader_gen);
ReadGlobalSetting(Settings::values.graphics_api);
ReadGlobalSetting(Settings::values.use_hw_renderer);
@ -1088,6 +1089,7 @@ void Config::SaveRendererValues() {
WriteGlobalSetting(Settings::values.graphics_api);
WriteGlobalSetting(Settings::values.physical_device);
WriteGlobalSetting(Settings::values.async_command_recording);
WriteGlobalSetting(Settings::values.async_shader_compilation);
WriteGlobalSetting(Settings::values.spirv_shader_gen);
WriteGlobalSetting(Settings::values.use_hw_renderer);
WriteGlobalSetting(Settings::values.use_hw_shader);

View File

@ -89,6 +89,7 @@ void ConfigureGraphics::SetConfiguration() {
static_cast<int>(Settings::values.physical_device.GetValue()));
ui->toggle_async_recording->setChecked(Settings::values.async_command_recording.GetValue());
ui->spirv_shader_gen->setChecked(Settings::values.spirv_shader_gen.GetValue());
ui->toggle_async_shaders->setChecked(Settings::values.async_shader_compilation.GetValue());
if (Settings::IsConfiguringGlobal()) {
ui->toggle_shader_jit->setChecked(Settings::values.use_shader_jit.GetValue());
@ -114,6 +115,8 @@ void ConfigureGraphics::ApplyConfiguration() {
ui->physical_device_combo);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_command_recording,
ui->toggle_async_recording, async_command_recording);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_shader_compilation,
ui->toggle_async_shaders, async_shader_compilation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.spirv_shader_gen,
ui->spirv_shader_gen, spirv_shader_gen);
@ -136,6 +139,8 @@ void ConfigureGraphics::SetupPerGameUI() {
ui->toggle_disk_shader_cache->setEnabled(
Settings::values.use_disk_shader_cache.UsingGlobal());
ui->toggle_vsync_new->setEnabled(Settings::values.use_vsync_new.UsingGlobal());
ui->toggle_async_shaders->setEnabled(
Settings::values.async_shader_compilation.UsingGlobal());
return;
}
@ -154,6 +159,9 @@ void ConfigureGraphics::SetupPerGameUI() {
use_disk_shader_cache);
ConfigurationShared::SetColoredTristate(ui->toggle_vsync_new, Settings::values.use_vsync_new,
use_vsync_new);
ConfigurationShared::SetColoredTristate(ui->toggle_async_shaders,
Settings::values.async_shader_compilation,
async_shader_compilation);
}
void ConfigureGraphics::DiscoverPhysicalDevices() {

View File

@ -41,6 +41,7 @@ private:
ConfigurationShared::CheckState use_disk_shader_cache;
ConfigurationShared::CheckState use_vsync_new;
ConfigurationShared::CheckState async_command_recording;
ConfigurationShared::CheckState async_shader_compilation;
ConfigurationShared::CheckState spirv_shader_gen;
std::unique_ptr<Ui::ConfigureGraphics> ui;
QColor bg_color;

View File

@ -7,7 +7,7 @@
<x>0</x>
<y>0</y>
<width>400</width>
<height>513</height>
<height>579</height>
</rect>
</property>
<property name="minimumSize">
@ -169,6 +169,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="toggle_async_shaders">
<property name="text">
<string>Async Shader Compilation</string>
</property>
</widget>
</item>
</layout>
</widget>
</item>

View File

@ -90,4 +90,4 @@ bool CheckAuthorizationForMicrophone() {
return authorized_microphone;
}
} // AppleAuthorization
} // namespace AppleAuthorization

View File

@ -60,6 +60,7 @@ add_library(common STATIC
announce_multiplayer_room.h
archives.h
assert.h
async_handle.h
atomic_ops.h
detached_tasks.cpp
detached_tasks.h

37
src/common/async_handle.h Normal file
View File

@ -0,0 +1,37 @@
// Copyright 2023 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <type_traits>
namespace Common {
struct AsyncHandle {
public:
[[nodiscard]] bool IsBuilt() noexcept {
return is_built.load(std::memory_order::relaxed);
}
void WaitBuilt() noexcept {
std::unique_lock lock{mutex};
condvar.wait(lock, [this] { return is_built.load(std::memory_order::relaxed); });
}
void MarkBuilt() noexcept {
std::scoped_lock lock{mutex};
is_built = true;
condvar.notify_all();
}
private:
std::condition_variable condvar;
std::mutex mutex;
std::atomic_bool is_built{false};
};
} // namespace Common

View File

@ -107,6 +107,7 @@ void LogSettings() {
log_setting("Core_CPUClockPercentage", values.cpu_clock_percentage.GetValue());
log_setting("Renderer_GraphicsAPI", GetAPIName(values.graphics_api.GetValue()));
log_setting("Renderer_AsyncRecording", values.async_command_recording.GetValue());
log_setting("Renderer_AsyncShaders", values.async_shader_compilation.GetValue());
log_setting("Renderer_SpirvShaderGen", values.spirv_shader_gen.GetValue());
log_setting("Renderer_Debug", values.renderer_debug.GetValue());
log_setting("Renderer_UseHwRenderer", values.use_hw_renderer.GetValue());
@ -193,6 +194,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_hw_renderer.SetGlobal(true);
values.use_hw_shader.SetGlobal(true);
values.separable_shader.SetGlobal(true);
values.async_shader_compilation.SetGlobal(true);
values.use_disk_shader_cache.SetGlobal(true);
values.shaders_accurate_mul.SetGlobal(true);
values.use_vsync_new.SetGlobal(true);

View File

@ -448,9 +448,10 @@ struct Values {
// Renderer
SwitchableSetting<GraphicsAPI> graphics_api{GraphicsAPI::OpenGL, "graphics_api"};
SwitchableSetting<u16> physical_device{0, "physical_device"};
SwitchableSetting<bool> spirv_shader_gen{true, "spirv_shader_gen"};
Setting<bool> renderer_debug{false, "renderer_debug"};
Setting<bool> dump_command_buffers{false, "dump_command_buffers"};
SwitchableSetting<bool> spirv_shader_gen{true, "spirv_shader_gen"};
SwitchableSetting<bool> async_shader_compilation{false, "async_shader_compilation"};
SwitchableSetting<bool> async_command_recording{true, "async_command_recording"};
SwitchableSetting<bool> use_hw_renderer{true, "use_hw_renderer"};
SwitchableSetting<bool> use_hw_shader{true, "use_hw_shader"};

View File

@ -8,6 +8,7 @@
#include <map>
#include <memory>
#include <string>
#include <string_view>
#include "common/common_types.h"
namespace Common::Telemetry {
@ -52,8 +53,8 @@ public:
template <typename T>
class Field : public FieldInterface {
public:
Field(FieldType type, std::string name, T value)
: name(std::move(name)), type(type), value(std::move(value)) {}
Field(FieldType type, std::string_view name, T value)
: name(name), type(type), value(std::move(value)) {}
Field(const Field&) = default;
Field& operator=(const Field&) = default;
@ -115,7 +116,7 @@ public:
* @param value Value for the field to add.
*/
template <typename T>
void AddField(FieldType type, const char* name, T value) {
void AddField(FieldType type, std::string_view name, T value) {
return AddField(std::make_unique<Field<T>>(type, name, std::move(value)));
}

View File

@ -34,10 +34,10 @@ class StatefulThreadWorker {
using StateMaker = std::conditional_t<with_state, std::function<StateType()>, DummyCallable>;
public:
explicit StatefulThreadWorker(size_t num_workers, std::string name, StateMaker func = {})
: workers_queued{num_workers}, thread_name{std::move(name)} {
explicit StatefulThreadWorker(size_t num_workers, std::string_view name, StateMaker func = {})
: workers_queued{num_workers}, thread_name{name} {
const auto lambda = [this, func](std::stop_token stop_token) {
Common::SetCurrentThreadName(thread_name.c_str());
Common::SetCurrentThreadName(thread_name.data());
{
[[maybe_unused]] std::conditional_t<with_state, StateType, int> state{func()};
while (!stop_token.stop_requested()) {
@ -108,7 +108,7 @@ private:
std::atomic<size_t> work_done{};
std::atomic<size_t> workers_stopped{};
std::atomic<size_t> workers_queued{};
std::string thread_name;
std::string_view thread_name;
std::vector<std::jthread> threads;
};

View File

@ -6,6 +6,7 @@
#include <memory>
#include <string>
#include <string_view>
#include "common/telemetry.h"
namespace Loader {
@ -53,7 +54,7 @@ public:
* @param value Value for the field to add.
*/
template <typename T>
void AddField(Common::Telemetry::FieldType type, const char* name, T value) {
void AddField(Common::Telemetry::FieldType type, std::string_view name, T value) {
field_collection.AddField(type, name, std::move(value));
}

View File

@ -126,7 +126,6 @@ add_library(video_core STATIC
shader/debug_data.h
shader/shader.cpp
shader/shader.h
shader/shader_cache.h
shader/shader_interpreter.cpp
shader/shader_interpreter.h
shader/shader_jit_x64.cpp

View File

@ -12,9 +12,9 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
namespace OpenGL {
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback,
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback_,
bool prefer_coherent)
: gl_target(target), buffer_size(size) {
: gl_target(target), readback(readback_), buffer_size(size) {
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle);

View File

@ -96,7 +96,7 @@ DescriptorManager::DescriptorManager(const Instance& instance, Scheduler& schedu
: instance{instance}, scheduler{scheduler}, pool_provider{instance,
scheduler.GetMasterSemaphore()} {
BuildLayouts();
descriptor_set_dirty.fill(true);
descriptor_set_dirty.set();
current_pool = pool_provider.Commit();
}
@ -120,14 +120,14 @@ void DescriptorManager::SetBinding(u32 set, u32 binding, DescriptorData data) {
void DescriptorManager::BindDescriptorSets() {
const bool is_dirty = scheduler.IsStateDirty(StateFlags::DescriptorSets);
if (is_dirty) {
descriptor_set_dirty.fill(true);
if (descriptor_set_dirty.none() && !is_dirty) {
return;
}
const vk::Device device = instance.GetDevice();
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> bound_sets;
for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
if (descriptor_set_dirty[i]) {
if (descriptor_set_dirty[i] || is_dirty) {
vk::DescriptorSet set = AllocateSet(descriptor_set_layouts[i]);
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]);
descriptor_sets[i] = set;
@ -141,11 +141,9 @@ void DescriptorManager::BindDescriptorSets() {
{});
});
descriptor_set_dirty.fill(false);
if (is_dirty) {
descriptor_set_dirty.reset();
scheduler.MarkStateNonDirty(StateFlags::DescriptorSets);
}
}
void DescriptorManager::BuildLayouts() {
std::array<vk::DescriptorSetLayoutBinding, MAX_DESCRIPTORS> set_bindings;

View File

@ -4,6 +4,7 @@
#pragma once
#include <bitset>
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
@ -59,7 +60,7 @@ private:
std::array<vk::DescriptorUpdateTemplate, MAX_DESCRIPTOR_SETS> update_templates;
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets{};
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_set_dirty{};
std::bitset<MAX_DESCRIPTOR_SETS> descriptor_set_dirty{};
};
} // namespace Vulkan

View File

@ -391,13 +391,12 @@ void Instance::CreateFormatTable() {
}
bool Instance::CreateDevice() {
const vk::StructureChain feature_chain =
physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
const vk::StructureChain feature_chain = physical_device.getFeatures2<
vk::PhysicalDeviceFeatures2, vk::PhysicalDevicePortabilitySubsetFeaturesKHR,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR,
vk::PhysicalDeviceCustomBorderColorFeaturesEXT,
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
vk::PhysicalDeviceCustomBorderColorFeaturesEXT, vk::PhysicalDeviceIndexTypeUint8FeaturesEXT,
vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>();
// Not having geometry shaders will cause issues with accelerated rendering.
features = feature_chain.get().features;
@ -439,6 +438,8 @@ bool Instance::CreateDevice() {
custom_border_color = AddExtension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
index_type_uint8 = AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
image_format_list = AddExtension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME);
pipeline_creation_cache_control =
AddExtension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME);
// Search queue families for graphics and present queues
auto family_properties = physical_device.getQueueFamilyProperties();
@ -518,6 +519,7 @@ bool Instance::CreateDevice() {
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>(),
feature_chain.get<vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>(),
};
if (portability_subset) {
@ -547,6 +549,10 @@ bool Instance::CreateDevice() {
device_chain.unlink<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
}
if (!pipeline_creation_cache_control) {
device_chain.unlink<vk::PhysicalDevicePipelineCreationCacheControlFeaturesEXT>();
}
try {
device = physical_device.createDevice(device_chain.get());
} catch (vk::ExtensionNotPresentError& err) {

View File

@ -137,6 +137,11 @@ public:
return image_format_list;
}
/// Returns true when VK_EXT_pipeline_creation_cache_control is supported
bool IsPipelineCreationCacheControlSupported() const {
return pipeline_creation_cache_control;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
@ -248,6 +253,7 @@ private:
bool custom_border_color{};
bool index_type_uint8{};
bool image_format_list{};
bool pipeline_creation_cache_control{};
bool enable_validation{};
bool dump_command_buffers{};
};

View File

@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <filesystem>
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/logging/log.h"
@ -14,6 +13,8 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
@ -97,374 +98,72 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
return it->second;
};
PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
desc_manager{desc_manager} {
trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
PipelineCache::Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {}
PipelineCache::Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage,
std::string code)
: Shader{instance} {
module = Compile(code, stage, instance.GetDevice(), ShaderOptimization::High);
MarkBuilt();
}
PipelineCache::~PipelineCache() {
vk::Device device = instance.GetDevice();
SaveDiskCache();
device.destroyPipelineCache(pipeline_cache);
device.destroyShaderModule(trivial_vertex_shader);
for (auto& [key, module] : programmable_vertex_shaders.shader_cache) {
PipelineCache::Shader::~Shader() {
if (module && device) {
device.destroyShaderModule(module);
}
for (auto& [key, module] : fixed_geometry_shaders.shaders) {
device.destroyShaderModule(module);
}
for (auto& [key, module] : fragment_shaders_glsl.shaders) {
device.destroyShaderModule(module);
}
PipelineCache::GraphicsPipeline::GraphicsPipeline(
const Instance& instance_, RenderpassCache& renderpass_cache_, const PipelineInfo& info_,
vk::PipelineCache pipeline_cache_, vk::PipelineLayout layout_, std::array<Shader*, 3> stages_,
Common::ThreadWorker* worker_)
: instance{instance_}, worker{worker_}, pipeline_layout{layout_},
pipeline_cache{pipeline_cache_}, info{info_}, stages{stages_},
renderpass{
renderpass_cache_.GetRenderpass(info.color_attachment, info.depth_attachment, false)} {
for (auto& [key, module] : fragment_shaders_spv.shaders) {
device.destroyShaderModule(module);
}
for (const auto& [hash, pipeline] : graphics_pipelines) {
device.destroyPipeline(pipeline);
}
graphics_pipelines.clear();
}
void PipelineCache::LoadDiskCache() {
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
// Ask the driver if it can give us the pipeline quickly
if (Build(true)) {
return;
}
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
instance.GetVendorID(), instance.GetDeviceID());
vk::PipelineCacheCreateInfo cache_info = {
.initialDataSize = 0,
.pInitialData = nullptr,
};
std::vector<u8> cache_data;
FileUtil::IOFile cache_file{cache_file_path, "r"};
if (cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Loading pipeline cache");
const u64 cache_file_size = cache_file.GetSize();
cache_data.resize(cache_file_size);
if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
if (!IsCacheValid(cache_data.data(), cache_file_size)) {
LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring");
// Fallback to (a)synchronous compilation
if (worker) {
worker->QueueWork([this] { Build(); });
} else {
cache_info.initialDataSize = cache_file_size;
cache_info.pInitialData = cache_data.data();
Build();
}
}
cache_file.Close();
PipelineCache::GraphicsPipeline::~GraphicsPipeline() {
if (pipeline) {
instance.GetDevice().destroyPipeline(pipeline);
}
}
vk::Device device = instance.GetDevice();
pipeline_cache = device.createPipelineCache(cache_info);
}
void PipelineCache::SaveDiskCache() {
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
return;
}
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
instance.GetVendorID(), instance.GetDeviceID());
FileUtil::IOFile cache_file{cache_file_path, "wb"};
if (!cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing");
return;
}
vk::Device device = instance.GetDevice();
auto cache_data = device.getPipelineCacheData(pipeline_cache);
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
LOG_WARNING(Render_Vulkan, "Error during pipeline cache write");
return;
}
cache_file.Close();
}
void PipelineCache::BindPipeline(const PipelineInfo& info) {
ApplyDynamic(info);
scheduler.Record([this, info](vk::CommandBuffer cmdbuf) {
std::size_t shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported()
? offsetof(PipelineInfo, rasterization)
: offsetof(PipelineInfo, dynamic);
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
if (new_pipeline) {
it->second = BuildPipeline(info);
}
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
current_pipeline = it->second;
});
desc_manager.BindDescriptorSets();
}
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
PicaVSConfig config{regs.rasterizer, regs.vs, setup};
config.state.use_geometry_shader = instance.UseGeometryShaders();
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
for (u32 i = 0; i < layout.attribute_count; i++) {
const auto& attrib = layout.attributes[i];
const u32 location = attrib.location.Value();
const bool is_supported = IsAttribFormatSupported(attrib, instance);
ASSERT(is_supported || attrib.size == 3);
config.state.attrib_types[location] = attrib.type.Value();
config.state.emulated_attrib_locations[location] = is_supported ? 0 : emulated_attrib_loc++;
}
auto [handle, result] =
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::High);
if (!handle) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
MICROPROFILE_DEFINE(Vulkan_Pipeline, "Vulkan", "Pipeline Building", MP_RGB(0, 192, 32));
bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) {
if (fail_on_compile_required) {
if (!instance.IsPipelineCreationCacheControlSupported()) {
return false;
}
scheduler.Record([this, handle = handle, hash = config.Hash()](vk::CommandBuffer) {
current_shaders[ProgramType::VS] = handle;
shader_hashes[ProgramType::VS] = hash;
});
return true;
}
void PipelineCache::UseTrivialVertexShader() {
scheduler.Record([this](vk::CommandBuffer) {
current_shaders[ProgramType::VS] = trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
});
}
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
if (!instance.UseGeometryShaders()) {
return UseTrivialGeometryShader();
}
const PicaFixedGSConfig gs_config{regs};
const vk::ShaderModule handle =
fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::Debug);
scheduler.Record([this, handle, hash = gs_config.Hash()](vk::CommandBuffer) {
current_shaders[ProgramType::GS] = handle;
shader_hashes[ProgramType::GS] = hash;
});
}
void PipelineCache::UseTrivialGeometryShader() {
scheduler.Record([this](vk::CommandBuffer) {
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
shader_hashes[ProgramType::GS] = 0;
});
}
MICROPROFILE_DEFINE(Vulkan_FragmentGeneration, "Vulkan", "Fragment Shader Compilation",
MP_RGB(255, 100, 100));
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config{regs, instance};
scheduler.Record([this, config](vk::CommandBuffer) {
MICROPROFILE_SCOPE(Vulkan_FragmentGeneration);
vk::ShaderModule handle{};
if (Settings::values.spirv_shader_gen) {
handle = fragment_shaders_spv.Get(config, instance.GetDevice());
} else {
handle = fragment_shaders_glsl.Get(config, vk::ShaderStageFlagBits::eFragment,
instance.GetDevice(), ShaderOptimization::Debug);
}
current_shaders[ProgramType::FS] = handle;
shader_hashes[ProgramType::FS] = config.Hash();
});
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = {
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
desc_manager.SetBinding(1, binding, DescriptorData{image_info});
}
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = {
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
desc_manager.SetBinding(3, binding, DescriptorData{image_info});
}
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
const DescriptorData data = {
.buffer_info =
vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
},
};
desc_manager.SetBinding(0, binding, data);
}
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
const DescriptorData data = {
.buffer_view = buffer_view,
};
desc_manager.SetBinding(0, binding, data);
}
void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
const DescriptorData data = {
.image_info =
vk::DescriptorImageInfo{
.sampler = sampler,
},
};
desc_manager.SetBinding(2, binding, data);
}
void PipelineCache::SetViewport(float x, float y, float width, float height) {
const vk::Viewport viewport{x, y, width, height, 0.f, 1.f};
scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.setViewport(0, viewport); });
}
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
const vk::Rect2D scissor{{x, y}, {width, height}};
scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.setScissor(0, scissor); });
}
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
PipelineInfo current = current_info;
scheduler.Record([this, info, is_dirty, current](vk::CommandBuffer cmdbuf) {
if (info.dynamic.stencil_compare_mask != current.dynamic.stencil_compare_mask || is_dirty) {
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
info.dynamic.stencil_compare_mask);
}
if (info.dynamic.stencil_write_mask != current.dynamic.stencil_write_mask || is_dirty) {
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
info.dynamic.stencil_write_mask);
}
if (info.dynamic.stencil_reference != current.dynamic.stencil_reference || is_dirty) {
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
info.dynamic.stencil_reference);
}
if (info.dynamic.blend_color != current.dynamic.blend_color || is_dirty) {
const Common::Vec4f color = PicaToVK::ColorRGBA8(info.dynamic.blend_color);
cmdbuf.setBlendConstants(color.AsArray());
}
if (instance.IsExtendedDynamicStateSupported()) {
if (info.rasterization.cull_mode != current.rasterization.cull_mode || is_dirty) {
cmdbuf.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode));
cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode));
}
if (info.depth_stencil.depth_compare_op != current.depth_stencil.depth_compare_op ||
is_dirty) {
cmdbuf.setDepthCompareOpEXT(
PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op));
}
if (info.depth_stencil.depth_test_enable != current.depth_stencil.depth_test_enable ||
is_dirty) {
cmdbuf.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable);
}
if (info.depth_stencil.depth_write_enable != current.depth_stencil.depth_write_enable ||
is_dirty) {
cmdbuf.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable);
}
if (info.rasterization.topology != current.rasterization.topology || is_dirty) {
cmdbuf.setPrimitiveTopologyEXT(
PicaToVK::PrimitiveTopology(info.rasterization.topology));
}
if (info.depth_stencil.stencil_test_enable !=
current.depth_stencil.stencil_test_enable ||
is_dirty) {
cmdbuf.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable);
}
if (info.depth_stencil.stencil_fail_op != current.depth_stencil.stencil_fail_op ||
info.depth_stencil.stencil_pass_op != current.depth_stencil.stencil_pass_op ||
info.depth_stencil.stencil_depth_fail_op !=
current.depth_stencil.stencil_depth_fail_op ||
info.depth_stencil.stencil_compare_op != current.depth_stencil.stencil_compare_op ||
is_dirty) {
cmdbuf.setStencilOpEXT(
vk::StencilFaceFlagBits::eFrontAndBack,
PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op));
}
}
});
current_info = info;
if (is_dirty) {
scheduler.MarkStateNonDirty(StateFlags::Pipeline);
// Check if all shader modules are ready
bool shaders_ready = true;
for (Shader* shader : stages) {
if (shader) {
shaders_ready &= shader->IsBuilt();
}
}
vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
if (!shaders_ready) {
return false;
}
}
MICROPROFILE_SCOPE(Vulkan_Pipeline);
const vk::Device device = instance.GetDevice();
u32 shader_count = 0;
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
for (std::size_t i = 0; i < current_shaders.size(); i++) {
const vk::ShaderModule shader = current_shaders[i];
if (!shader) {
continue;
}
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
.stage = ToVkShaderStage(i),
.module = shader,
.pName = "main",
};
}
/**
* Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
* increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
* as the input rate. Since one instance is all we render, the shader will always read the
* single attribute.
**/
std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
const auto& binding = info.vertex_layout.bindings[i];
@ -615,7 +314,45 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
.back = stencil_op_state,
};
u32 shader_count = 0;
std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
for (std::size_t i = 0; i < stages.size(); i++) {
Shader* shader = stages[i];
if (!shader) {
continue;
}
shader->WaitBuilt();
shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{
.stage = ToVkShaderStage(i),
.module = shader->Handle(),
.pName = "main",
};
}
std::array<vk::PipelineCreationFeedbackEXT, MAX_SHADER_STAGES> creation_stage_feedback;
for (u32 i = 0; i < shader_count; i++) {
creation_stage_feedback[i] = vk::PipelineCreationFeedbackEXT{
.flags = vk::PipelineCreationFeedbackFlagBits::eValid,
.duration = 0,
};
}
vk::PipelineCreationFeedbackEXT creation_feedback = {
.flags = vk::PipelineCreationFeedbackFlagBits::eValid,
};
const vk::PipelineCreationFeedbackCreateInfoEXT creation_feedback_info = {
.pPipelineCreationFeedback = &creation_feedback,
.pipelineStageCreationFeedbackCount = shader_count,
.pPipelineStageCreationFeedbacks = creation_stage_feedback.data(),
};
const vk::GraphicsPipelineCreateInfo pipeline_info = {
.pNext = fail_on_compile_required ? &creation_feedback_info : nullptr,
.flags = fail_on_compile_required
? vk::PipelineCreateFlagBits::eFailOnPipelineCompileRequiredEXT
: vk::PipelineCreateFlags{},
.stageCount = shader_count,
.pStages = shader_stages.data(),
.pVertexInputState = &vertex_input_info,
@ -626,20 +363,397 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
.pDepthStencilState = &depth_info,
.pColorBlendState = &color_blending,
.pDynamicState = &dynamic_info,
.layout = desc_manager.GetPipelineLayout(),
.renderPass =
renderpass_cache.GetRenderpass(info.color_attachment, info.depth_attachment, false),
.layout = pipeline_layout,
.renderPass = renderpass,
};
if (const auto result = device.createGraphicsPipeline(pipeline_cache, pipeline_info);
result.result == vk::Result::eSuccess) {
return result.value;
const vk::ResultValue result = device.createGraphicsPipeline(pipeline_cache, pipeline_info);
if (result.result == vk::Result::eSuccess) {
pipeline = result.value;
} else if (result.result == vk::Result::eErrorPipelineCompileRequiredEXT) {
return false;
} else {
LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
UNREACHABLE();
}
return VK_NULL_HANDLE;
MarkBuilt();
return true;
}
PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
desc_manager{desc_manager}, workers{std::max(std::thread::hardware_concurrency(), 2U) - 1,
"Pipeline builder"},
trivial_vertex_shader{instance, vk::ShaderStageFlagBits::eVertex,
GenerateTrivialVertexShader()} {}
PipelineCache::~PipelineCache() {
vk::Device device = instance.GetDevice();
SaveDiskCache();
device.destroyPipelineCache(pipeline_cache);
}
void PipelineCache::LoadDiskCache() {
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
return;
}
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
instance.GetVendorID(), instance.GetDeviceID());
vk::PipelineCacheCreateInfo cache_info = {
.initialDataSize = 0,
.pInitialData = nullptr,
};
std::vector<u8> cache_data;
FileUtil::IOFile cache_file{cache_file_path, "r"};
if (cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Loading pipeline cache");
const u64 cache_file_size = cache_file.GetSize();
cache_data.resize(cache_file_size);
if (cache_file.ReadBytes(cache_data.data(), cache_file_size)) {
if (!IsCacheValid(cache_data.data(), cache_file_size)) {
LOG_WARNING(Render_Vulkan, "Pipeline cache provided invalid, ignoring");
} else {
cache_info.initialDataSize = cache_file_size;
cache_info.pInitialData = cache_data.data();
}
}
cache_file.Close();
}
vk::Device device = instance.GetDevice();
pipeline_cache = device.createPipelineCache(cache_info);
}
void PipelineCache::SaveDiskCache() {
if (!Settings::values.use_disk_shader_cache || !EnsureDirectories()) {
return;
}
const std::string cache_file_path = fmt::format("{}{:x}{:x}.bin", GetPipelineCacheDir(),
instance.GetVendorID(), instance.GetDeviceID());
FileUtil::IOFile cache_file{cache_file_path, "wb"};
if (!cache_file.IsOpen()) {
LOG_INFO(Render_Vulkan, "Unable to open pipeline cache for writing");
return;
}
vk::Device device = instance.GetDevice();
auto cache_data = device.getPipelineCacheData(pipeline_cache);
if (!cache_file.WriteBytes(cache_data.data(), cache_data.size())) {
LOG_WARNING(Render_Vulkan, "Error during pipeline cache write");
return;
}
cache_file.Close();
}
MICROPROFILE_DEFINE(Vulkan_Bind, "Vulkan", "Pipeline Bind", MP_RGB(192, 32, 32));
bool PipelineCache::BindPipeline(const PipelineInfo& info, bool wait_built) {
MICROPROFILE_SCOPE(Vulkan_Bind);
std::size_t shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported()
? offsetof(PipelineInfo, rasterization)
: offsetof(PipelineInfo, dynamic);
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash);
if (new_pipeline) {
it->second = std::make_unique<GraphicsPipeline>(
instance, renderpass_cache, info, pipeline_cache, desc_manager.GetPipelineLayout(),
current_shaders, &workers);
}
GraphicsPipeline* const pipeline{it->second.get()};
if (!wait_built && !pipeline->IsBuilt()) {
return false;
}
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
ApplyDynamic(info, is_dirty);
if (current_pipeline != pipeline || is_dirty) {
if (!pipeline->IsBuilt()) {
scheduler.Record([pipeline](vk::CommandBuffer) { pipeline->WaitBuilt(); });
}
scheduler.Record([pipeline](vk::CommandBuffer cmdbuf) {
cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline->Handle());
});
current_pipeline = pipeline;
}
desc_manager.BindDescriptorSets();
scheduler.MarkStateNonDirty(StateFlags::Pipeline);
return true;
}
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
PicaVSConfig config{regs.rasterizer, regs.vs, setup};
config.state.use_geometry_shader = instance.UseGeometryShaders();
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
for (u32 i = 0; i < layout.attribute_count; i++) {
const auto& attrib = layout.attributes[i];
const u32 location = attrib.location.Value();
const bool is_supported = IsAttribFormatSupported(attrib, instance);
ASSERT(is_supported || attrib.size == 3);
config.state.attrib_types[location] = attrib.type.Value();
config.state.emulated_attrib_locations[location] = is_supported ? 0 : emulated_attrib_loc++;
}
auto [it, new_config] = programmable_vertex_map.try_emplace(config);
if (new_config) {
auto code = GenerateVertexShader(setup, config);
if (!code) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
programmable_vertex_map[config] = nullptr;
return false;
}
std::string& program = code.value();
auto [iter, new_program] = programmable_vertex_cache.try_emplace(program, instance);
auto& shader = iter->second;
if (new_program) {
shader.program = std::move(program);
const vk::Device device = instance.GetDevice();
workers.QueueWork([device, &shader] {
shader.module = Compile(shader.program, vk::ShaderStageFlagBits::eVertex, device,
ShaderOptimization::High);
shader.MarkBuilt();
});
}
it->second = &shader;
}
Shader* const shader{it->second};
if (!shader) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
return false;
}
current_shaders[ProgramType::VS] = shader;
shader_hashes[ProgramType::VS] = config.Hash();
return true;
}
void PipelineCache::UseTrivialVertexShader() {
current_shaders[ProgramType::VS] = &trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
}
bool PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
if (!instance.UseGeometryShaders()) {
UseTrivialGeometryShader();
return true;
}
const PicaFixedGSConfig gs_config{regs};
auto [it, new_shader] = fixed_geometry_shaders.try_emplace(gs_config, instance);
auto& shader = it->second;
if (new_shader) {
const vk::Device device = instance.GetDevice();
workers.QueueWork([gs_config, device, &shader]() {
const std::string code = GenerateFixedGeometryShader(gs_config);
shader.module =
Compile(code, vk::ShaderStageFlagBits::eGeometry, device, ShaderOptimization::High);
shader.MarkBuilt();
});
}
current_shaders[ProgramType::GS] = &shader;
shader_hashes[ProgramType::GS] = gs_config.Hash();
return true;
}
void PipelineCache::UseTrivialGeometryShader() {
current_shaders[ProgramType::GS] = nullptr;
shader_hashes[ProgramType::GS] = 0;
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config{regs, instance};
auto [it, new_shader] = fragment_shaders.try_emplace(config, instance);
auto& shader = it->second;
if (new_shader) {
const bool emit_spirv = Settings::values.spirv_shader_gen.GetValue();
const vk::Device device = instance.GetDevice();
workers.QueueWork([config, device, emit_spirv, &shader]() {
if (emit_spirv) {
const std::vector code = GenerateFragmentShaderSPV(config);
shader.module = CompileSPV(code, device);
} else {
const std::string code = GenerateFragmentShader(config);
shader.module = Compile(code, vk::ShaderStageFlagBits::eFragment, device,
ShaderOptimization::High);
}
shader.MarkBuilt();
});
}
current_shaders[ProgramType::FS] = &shader;
shader_hashes[ProgramType::FS] = config.Hash();
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = {
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
desc_manager.SetBinding(1, binding, DescriptorData{image_info});
}
void PipelineCache::BindStorageImage(u32 binding, vk::ImageView image_view) {
const vk::DescriptorImageInfo image_info = {
.imageView = image_view,
.imageLayout = vk::ImageLayout::eGeneral,
};
desc_manager.SetBinding(3, binding, DescriptorData{image_info});
}
void PipelineCache::BindBuffer(u32 binding, vk::Buffer buffer, u32 offset, u32 size) {
const DescriptorData data = {
.buffer_info =
vk::DescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
.range = size,
},
};
desc_manager.SetBinding(0, binding, data);
}
void PipelineCache::BindTexelBuffer(u32 binding, vk::BufferView buffer_view) {
const DescriptorData data = {
.buffer_view = buffer_view,
};
desc_manager.SetBinding(0, binding, data);
}
void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
const DescriptorData data = {
.image_info =
vk::DescriptorImageInfo{
.sampler = sampler,
},
};
desc_manager.SetBinding(2, binding, data);
}
void PipelineCache::SetViewport(float x, float y, float width, float height) {
const vk::Viewport viewport{x, y, width, height, 0.f, 1.f};
scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.setViewport(0, viewport); });
}
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
const vk::Rect2D scissor{{x, y}, {width, height}};
scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.setScissor(0, scissor); });
}
void PipelineCache::ApplyDynamic(const PipelineInfo& info, bool is_dirty) {
if (!is_dirty && info.dynamic == current_info.dynamic &&
info.rasterization.value == current_info.rasterization.value &&
info.depth_stencil.value == current_info.depth_stencil.value) {
return;
}
scheduler.Record([this, is_dirty, current_dynamic = current_info.dynamic,
current_rasterization = current_info.rasterization,
current_depth_stencil = current_info.depth_stencil, dynamic = info.dynamic,
rasterization = info.rasterization,
depth_stencil = info.depth_stencil](vk::CommandBuffer cmdbuf) {
if (dynamic.stencil_compare_mask != current_dynamic.stencil_compare_mask || is_dirty) {
cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
dynamic.stencil_compare_mask);
}
if (dynamic.stencil_write_mask != current_dynamic.stencil_write_mask || is_dirty) {
cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
dynamic.stencil_write_mask);
}
if (dynamic.stencil_reference != current_dynamic.stencil_reference || is_dirty) {
cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
dynamic.stencil_reference);
}
if (dynamic.blend_color != current_dynamic.blend_color || is_dirty) {
const Common::Vec4f color = PicaToVK::ColorRGBA8(dynamic.blend_color);
cmdbuf.setBlendConstants(color.AsArray());
}
if (instance.IsExtendedDynamicStateSupported()) {
if (rasterization.cull_mode != current_rasterization.cull_mode || is_dirty) {
cmdbuf.setCullModeEXT(PicaToVK::CullMode(rasterization.cull_mode));
cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(rasterization.cull_mode));
}
if (depth_stencil.depth_compare_op != current_depth_stencil.depth_compare_op ||
is_dirty) {
cmdbuf.setDepthCompareOpEXT(PicaToVK::CompareFunc(depth_stencil.depth_compare_op));
}
if (depth_stencil.depth_test_enable != current_depth_stencil.depth_test_enable ||
is_dirty) {
cmdbuf.setDepthTestEnableEXT(depth_stencil.depth_test_enable);
}
if (depth_stencil.depth_write_enable != current_depth_stencil.depth_write_enable ||
is_dirty) {
cmdbuf.setDepthWriteEnableEXT(depth_stencil.depth_write_enable);
}
if (rasterization.topology != current_rasterization.topology || is_dirty) {
cmdbuf.setPrimitiveTopologyEXT(PicaToVK::PrimitiveTopology(rasterization.topology));
}
if (depth_stencil.stencil_test_enable != current_depth_stencil.stencil_test_enable ||
is_dirty) {
cmdbuf.setStencilTestEnableEXT(depth_stencil.stencil_test_enable);
}
if (depth_stencil.stencil_fail_op != current_depth_stencil.stencil_fail_op ||
depth_stencil.stencil_pass_op != current_depth_stencil.stencil_pass_op ||
depth_stencil.stencil_depth_fail_op !=
current_depth_stencil.stencil_depth_fail_op ||
depth_stencil.stencil_compare_op != current_depth_stencil.stencil_compare_op ||
is_dirty) {
cmdbuf.setStencilOpEXT(vk::StencilFaceFlagBits::eFrontAndBack,
PicaToVK::StencilOp(depth_stencil.stencil_fail_op),
PicaToVK::StencilOp(depth_stencil.stencil_pass_op),
PicaToVK::StencilOp(depth_stencil.stencil_depth_fail_op),
PicaToVK::CompareFunc(depth_stencil.stencil_compare_op));
}
}
});
current_info = info;
}
bool PipelineCache::IsCacheValid(const u8* data, u64 size) const {

View File

@ -5,13 +5,17 @@
#pragma once
#include <array>
#include "common/async_handle.h"
#include "common/bit_field.h"
#include "common/hash.h"
#include "common/thread_worker.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/shader/shader_cache.h"
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
namespace Pica {
struct Regs;
}
namespace Vulkan {
@ -59,6 +63,8 @@ struct DynamicState {
u8 stencil_reference;
u8 stencil_compare_mask;
u8 stencil_write_mask;
auto operator<=>(const DynamicState&) const noexcept = default;
};
union VertexBinding {
@ -107,21 +113,6 @@ struct PipelineInfo {
}
};
/**
* Vulkan specialized PICA shader caches
*/
using ProgrammableVertexShaders = Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule,
&Compile, &GenerateVertexShader>;
using FixedGeometryShaders = Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule,
&Compile, &GenerateFixedGeometryShader>;
using FragmentShadersGLSL =
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
using FragmentShadersSPV = Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &CompileSPV,
&GenerateFragmentShaderSPV>;
class Instance;
class Scheduler;
class RenderpassCache;
@ -131,6 +122,48 @@ class DescriptorManager;
* Stores a collection of rasterizer pipelines used during rendering.
*/
class PipelineCache {
struct Shader : public Common::AsyncHandle {
Shader(const Instance& instance);
Shader(const Instance& instance, vk::ShaderStageFlagBits stage, std::string code);
~Shader();
[[nodiscard]] vk::ShaderModule Handle() const noexcept {
return module;
}
vk::ShaderModule module;
vk::Device device;
std::string program;
};
class GraphicsPipeline : public Common::AsyncHandle {
public:
GraphicsPipeline(const Instance& instance, RenderpassCache& renderpass_cache,
const PipelineInfo& info, vk::PipelineCache pipeline_cache,
vk::PipelineLayout layout, std::array<Shader*, 3> stages,
Common::ThreadWorker* worker);
~GraphicsPipeline();
bool Build(bool fail_on_compile_required = false);
[[nodiscard]] vk::Pipeline Handle() const noexcept {
return pipeline;
}
private:
const Instance& instance;
Common::ThreadWorker* worker;
vk::Pipeline pipeline;
vk::PipelineLayout pipeline_layout;
vk::PipelineCache pipeline_cache;
PipelineInfo info;
std::array<Shader*, 3> stages;
vk::RenderPass renderpass;
};
public:
PipelineCache(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache,
DescriptorManager& desc_manager);
@ -143,7 +176,7 @@ public:
void SaveDiskCache();
/// Binds a pipeline using the provided information
void BindPipeline(const PipelineInfo& info);
bool BindPipeline(const PipelineInfo& info, bool wait_built = false);
/// Binds a PICA decompiled vertex shader
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
@ -153,7 +186,7 @@ public:
void UseTrivialVertexShader();
/// Binds a PICA decompiled geometry shader
void UseFixedGeometryShader(const Pica::Regs& regs);
bool UseFixedGeometryShader(const Pica::Regs& regs);
/// Binds a passthrough geometry shader
void UseTrivialGeometryShader();
@ -184,14 +217,11 @@ public:
private:
/// Applies dynamic pipeline state to the current command buffer
void ApplyDynamic(const PipelineInfo& info);
void ApplyDynamic(const PipelineInfo& info, bool is_dirty);
/// Builds the rasterizer pipeline layout
void BuildLayout();
/// Builds a rasterizer pipeline using the PipelineInfo struct
vk::Pipeline BuildPipeline(const PipelineInfo& info);
/// Returns true when the disk data can be used by the current driver
bool IsCacheValid(const u8* data, u64 size) const;
@ -207,22 +237,26 @@ private:
RenderpassCache& renderpass_cache;
DescriptorManager& desc_manager;
// Cached pipelines
vk::PipelineCache pipeline_cache;
std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
vk::Pipeline current_pipeline{};
Common::ThreadWorker workers;
PipelineInfo current_info{};
GraphicsPipeline* current_pipeline{};
std::unordered_map<u64, std::unique_ptr<GraphicsPipeline>, Common::IdentityHash<u64>>
graphics_pipelines;
// Bound shader modules
enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 };
enum ProgramType : u32 {
VS = 0,
GS = 2,
FS = 1,
};
std::array<vk::ShaderModule, MAX_SHADER_STAGES> current_shaders;
std::array<u64, MAX_SHADER_STAGES> shader_hashes;
ProgrammableVertexShaders programmable_vertex_shaders;
FixedGeometryShaders fixed_geometry_shaders;
FragmentShadersGLSL fragment_shaders_glsl;
FragmentShadersSPV fragment_shaders_spv;
vk::ShaderModule trivial_vertex_shader;
std::array<Shader*, MAX_SHADER_STAGES> current_shaders;
std::unordered_map<PicaVSConfig, Shader*> programmable_vertex_map;
std::unordered_map<std::string, Shader> programmable_vertex_cache;
std::unordered_map<PicaFixedGSConfig, Shader> fixed_geometry_shaders;
std::unordered_map<PicaFSConfig, Shader> fragment_shaders;
Shader trivial_vertex_shader;
};
} // namespace Vulkan

View File

@ -69,7 +69,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
vk::ImageAspectFlagBits::eColor, runtime},
stream_buffer{instance, scheduler, BUFFER_USAGE, STREAM_BUFFER_SIZE},
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
async_shaders{Settings::values.async_shader_compilation.GetValue()} {
vertex_buffers.fill(stream_buffer.Handle());
@ -355,8 +356,7 @@ bool RasterizerVulkan::SetupGeometryShader() {
return false;
}
pipeline_cache.UseFixedGeometryShader(regs);
return true;
return pipeline_cache.UseFixedGeometryShader(regs);
}
bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
@ -400,7 +400,9 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
}
pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology);
pipeline_cache.BindPipeline(pipeline_info);
if (!pipeline_cache.BindPipeline(pipeline_info, !async_shaders)) {
return true; ///< Skip draw call when pipeline is not ready
}
const DrawParams params = {
.vertex_count = regs.pipeline.num_vertices,
@ -459,7 +461,6 @@ void RasterizerVulkan::DrawTriangles() {
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
const auto& regs = Pica::g_state.regs;
const bool shadow_rendering = regs.framebuffer.IsShadowRendering();
@ -679,6 +680,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
viewport_rect_unscaled.GetWidth() * res_scale,
viewport_rect_unscaled.GetHeight() * res_scale);
MICROPROFILE_SCOPE(Vulkan_Drawing);
// Sync and bind the shader
if (shader_dirty) {
pipeline_cache.UseFragmentShader(regs);
@ -748,7 +751,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_info.vertex_layout = software_layout;
pipeline_cache.UseTrivialVertexShader();
pipeline_cache.UseTrivialGeometryShader();
pipeline_cache.BindPipeline(pipeline_info);
pipeline_cache.BindPipeline(pipeline_info, true);
const u32 max_vertices = STREAM_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 batch_size = static_cast<u32>(vertex_batch.size());
@ -785,11 +788,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface);
}
static int submit_threshold = 40;
static int submit_threshold = 20;
submit_threshold--;
if (!submit_threshold) {
submit_threshold = 40;
scheduler.Flush();
submit_threshold = 20;
scheduler.DispatchWork();
}
return succeeded;

View File

@ -202,6 +202,7 @@ private:
u64 uniform_buffer_alignment;
u64 uniform_size_aligned_vs;
u64 uniform_size_aligned_fs;
bool async_shaders{false};
};
} // namespace Vulkan

View File

@ -4,11 +4,9 @@
#pragma once
#include <functional>
#include <optional>
#include "common/hash.h"
#include "video_core/regs.h"
#include "video_core/regs_pipeline.h"
#include "video_core/shader/shader.h"
namespace Vulkan {

View File

@ -43,7 +43,7 @@ void FragmentModule::Generate() {
}
// Check if the fragment is outside scissor rectangle
WriteScissor();
// WriteScissor();
// Write shader bytecode to emulate all enabled PICA lights
if (config.state.lighting.enable) {

View File

@ -6,12 +6,15 @@
#include <glslang/Include/ResourceLimits.h>
#include <glslang/Public/ShaderLang.h>
#include "common/assert.h"
#include "common/literals.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
using namespace Common::Literals;
constexpr TBuiltInResource DefaultTBuiltInResource = {
.maxLights = 32,
.maxClipPlanes = 6,
@ -156,12 +159,16 @@ bool InitializeCompiler() {
return true;
}
MICROPROFILE_DEFINE(Vulkan_GLSLCompilation, "VulkanShader", "GLSL Shader Compilation",
MP_RGB(100, 255, 52));
vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
ShaderOptimization level) {
if (!InitializeCompiler()) {
return VK_NULL_HANDLE;
}
MICROPROFILE_SCOPE(Vulkan_GLSLCompilation);
EProfile profile = ECoreProfile;
EShMessages messages =
static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
@ -209,9 +216,11 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
options.validate = true;
} else {
options.disableOptimizer = false;
options.stripDebugInfo = true;
options.validate = false;
options.optimizeSize = true;
}
out_code.reserve(8_KiB);
glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
const std::string spv_messages = logger.getAllMessages();
@ -222,10 +231,7 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
return CompileSPV(out_code, device);
}
MICROPROFILE_DEFINE(Vulkan_SPVCompilation, "Vulkan", "SPIR-V Shader Compilation",
MP_RGB(100, 255, 52));
vk::ShaderModule CompileSPV(std::span<const u32> code, vk::Device device) {
MICROPROFILE_SCOPE(Vulkan_SPVCompilation);
const vk::ShaderModuleCreateInfo shader_info = {
.codeSize = code.size() * sizeof(u32),
.pCode = code.data(),

View File

@ -196,6 +196,8 @@ void Swapchain::SetPresentMode() {
present_mode = vk::PresentModeKHR::eMailbox;
}
}
LOG_INFO(Render_Vulkan, "Using {} present mode", vk::to_string(present_mode));
}
void Swapchain::SetSurfaceProperties() {
@ -217,6 +219,8 @@ void Swapchain::SetSurfaceProperties() {
capabilities.maxImageCount);
}
LOG_INFO(Render_Vulkan, "Using {} images", image_count);
// Prefer identity transform if possible
transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
if (!(capabilities.supportedTransforms & transform)) {

View File

@ -1,96 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <optional>
#include <tuple>
#include <unordered_map>
#include "video_core/shader/shader.h"
namespace Pica::Shader {
template <typename ShaderType>
using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
template <typename KeyType, typename ShaderType, auto ModuleCompiler, auto CodeGenerator>
class ShaderCache {
public:
ShaderCache() {}
~ShaderCache() = default;
/// Returns a shader handle generated from the provided config
template <typename... Args>
auto Get(const KeyType& config, Args&&... args) {
auto [iter, new_shader] = shaders.emplace(config, ShaderType{});
auto& shader = iter->second;
if (new_shader) {
const auto code = CodeGenerator(config);
shader = ModuleCompiler(code, args...);
return shader;
}
return shader;
}
void Inject(const KeyType& key, ShaderType&& shader) {
shaders.emplace(key, std::move(shader));
}
public:
std::unordered_map<KeyType, ShaderType> shaders;
};
/**
* This is a cache designed for shaders translated from PICA shaders. The first cache matches the
* config structure like a normal cache does. On cache miss, the second cache matches the generated
* GLSL code. The configuration is like this because there might be leftover code in the PICA shader
* program buffer from the previous shader, which is hashed into the config, resulting several
* different config values from the same shader program.
*/
template <typename KeyType, typename ShaderType, auto ModuleCompiler, auto CodeGenerator>
class ShaderDoubleCache {
public:
ShaderDoubleCache() = default;
~ShaderDoubleCache() = default;
template <typename... Args>
auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args)
-> ShaderCacheResult<ShaderType> {
if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
auto code = CodeGenerator(setup, key);
if (!code) {
shader_map[key] = nullptr;
return std::make_pair(ShaderType{}, std::nullopt);
}
std::string& program = code.value();
auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{});
auto& shader = iter->second;
if (new_shader) {
shader = ModuleCompiler(program, args...);
}
shader_map[key] = &shader;
return std::make_pair(shader, std::move(program));
} else {
return std::make_pair(*map_iter->second, std::nullopt);
}
}
void Inject(const KeyType& key, std::string decomp, ShaderType&& program) {
const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first;
auto& cached_shader = iter->second;
shader_map.insert_or_assign(key, &cached_shader);
}
public:
std::unordered_map<KeyType, ShaderType*> shader_map;
std::unordered_map<std::string, ShaderType> shader_cache;
};
} // namespace Pica::Shader