From c5f2267306da186f54c48eb5ce3af545f82cf39d Mon Sep 17 00:00:00 2001 From: GPUCode Date: Thu, 9 Mar 2023 14:45:22 +0200 Subject: [PATCH] renderer_vulkan: Emulate 3 component formats when unsupported * Using uscaled formats isn't enough for all targets, the steam deck for example still crashes. The previous approach involved breaking the attribute to vec2 + vec1 and combining them. This commit implements something much simpler; the attribute is replaced with the 4 component version and the shader then zeros the w component. Since the fixed binding is at least 16bytes and exists at the end of the vertex data, we always have adequate space to alias so tihs shouldn't break. --- .../renderer_vulkan/vk_instance.cpp | 19 ++++++++++-- src/video_core/renderer_vulkan/vk_instance.h | 10 +++++-- .../renderer_vulkan/vk_pipeline_cache.cpp | 29 ++++++++++++------ .../renderer_vulkan/vk_shader_gen.cpp | 30 ++++++++++++++----- .../renderer_vulkan/vk_shader_gen.h | 11 ++++++- 5 files changed, 75 insertions(+), 24 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index ac8565c25..8e112bf60 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -368,11 +368,11 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) LOG_INFO(Render_Vulkan, "Creating logical device for physical device: {}", properties.deviceName); + CollectTelemetryParameters(); CreateDevice(); CreateFormatTable(); CreateCustomFormatTable(); CreateAttribTable(); - CollectTelemetryParameters(); } Instance::~Instance() { @@ -549,6 +549,14 @@ void Instance::CreateCustomFormatTable() { } } +void Instance::DetermineEmulation(Pica::PipelineRegs::VertexAttributeFormat format, + bool& needs_cast) { + // Check if (u)scaled formats can be used to emulate the 3 component format + vk::Format two_comp_format = MakeAttributeFormat(format, 2); + vk::FormatProperties format_properties = physical_device.getFormatProperties(two_comp_format); + needs_cast = !(format_properties.bufferFeatures & vk::FormatFeatureFlagBits::eVertexBuffer); +} + void Instance::CreateAttribTable() { constexpr std::array attrib_formats = { Pica::PipelineRegs::VertexAttributeFormat::BYTE, @@ -560,6 +568,7 @@ void Instance::CreateAttribTable() { for (const auto& format : attrib_formats) { for (u32 count = 1; count <= 4; count++) { bool needs_cast{false}; + bool needs_emulation{false}; vk::Format attrib_format = MakeAttributeFormat(format, count); vk::FormatProperties format_properties = physical_device.getFormatProperties(attrib_format); @@ -569,14 +578,18 @@ void Instance::CreateAttribTable() { format_properties = physical_device.getFormatProperties(attrib_format); if (!(format_properties.bufferFeatures & vk::FormatFeatureFlagBits::eVertexBuffer)) { - ASSERT_MSG(false, "Fallback format {} unsupported, device unsuitable!", - vk::to_string(attrib_format)); + ASSERT_MSG( + count == 3, + "Vertex attribute emulation is only supported for 3 component formats"); + DetermineEmulation(format, needs_cast); + needs_emulation = true; } } const u32 index = static_cast(format) * 4 + count - 1; attrib_table[index] = FormatTraits{ .requires_conversion = needs_cast, + .requires_emulation = needs_emulation, .native = attrib_format, }; } diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 91177744a..213ba28d3 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -28,9 +28,10 @@ struct FormatTraits { bool attachment_support = false; ///< True if the format supports being used as an attachment bool storage_support = false; ///< True if the format supports storage operations bool requires_conversion = - false; ///< True if the format requires conversion to the native format - vk::ImageUsageFlags usage{}; ///< Most supported usage for the native format - vk::ImageAspectFlags aspect; ///< Aspect flags of the format + false; ///< True if the format requires conversion to the native format + bool requires_emulation = false; ///< True if the format requires emulation + vk::ImageUsageFlags usage{}; ///< Most supported usage for the native format + vk::ImageAspectFlags aspect; ///< Aspect flags of the format vk::Format native = vk::Format::eUndefined; ///< Closest possible native format }; @@ -279,6 +280,9 @@ private: [[nodiscard]] FormatTraits DetermineTraits(VideoCore::PixelFormat pixel_format, vk::Format format); + /// Determines the best available vertex attribute format emulation + void DetermineEmulation(Pica::PipelineRegs::VertexAttributeFormat format, bool& needs_cast); + /// Creates the format compatibility table for the current device void CreateFormatTable(); void CreateCustomFormatTable(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 36ce0d9ab..fa2aa4428 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -35,17 +35,16 @@ u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) { return 0; } -char MakeAttribPrefix(Pica::PipelineRegs::VertexAttributeFormat format) { +AttribLoadFlags MakeAttribLoadFlag(Pica::PipelineRegs::VertexAttributeFormat format) { switch (format) { - case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: - return '\0'; case Pica::PipelineRegs::VertexAttributeFormat::BYTE: case Pica::PipelineRegs::VertexAttributeFormat::SHORT: - return 'i'; + return AttribLoadFlags::Sint; case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: - return 'u'; + return AttribLoadFlags::Uint; + default: + return AttribLoadFlags::Float; } - return '\0'; } vk::ShaderStageFlagBits MakeShaderStage(std::size_t index) { @@ -168,13 +167,20 @@ bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { std::array attributes; for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { const auto& attr = info.vertex_layout.attributes[i]; - const FormatTraits traits = instance.GetTraits(attr.type, attr.size); + const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); attributes[i] = vk::VertexInputAttributeDescription{ .location = attr.location, .binding = attr.binding, .format = traits.native, .offset = attr.offset, }; + + // At the end there's always the fixed binding which takes up + // at least 16 bytes so we should always be able to alias. + if (traits.requires_emulation) { + const FormatTraits& comp_four_traits = instance.GetTraits(attr.type, 4); + attributes[i].format = comp_four_traits.native; + } } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { @@ -519,9 +525,14 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, for (u32 i = 0; i < layout.attribute_count; i++) { const VertexAttribute& attr = layout.attributes[i]; const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); + const u32 location = attr.location.Value(); + AttribLoadFlags& flags = config.state.load_flags[location]; + if (traits.requires_conversion) { - const u32 location = attr.location.Value(); - config.state.attrib_prefix[location] = MakeAttribPrefix(attr.type); + flags = MakeAttribLoadFlag(attr.type); + } + if (traits.requires_emulation) { + flags |= AttribLoadFlags::ZeroW; } } diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index f240cdd2c..f2c3f784c 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -209,6 +209,7 @@ void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer, sanitize_mul = VideoCore::g_hw_shader_accurate_mul; num_outputs = 0; + load_flags.fill(AttribLoadFlags::Float); output_map.fill(16); for (int reg : Common::BitSet(regs.output_mask)) { @@ -1609,6 +1610,17 @@ void main() { return out; } +std::string_view MakeLoadPrefix(AttribLoadFlags flag) { + if (True(flag & AttribLoadFlags::Float)) { + return ""; + } else if (True(flag & AttribLoadFlags::Sint)) { + return "i"; + } else if (True(flag & AttribLoadFlags::Uint)) { + return "u"; + } + return ""; +} + std::optional GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config) { std::string out = "#extension GL_ARB_separate_shader_objects : enable\n"; @@ -1654,14 +1666,11 @@ layout (set = 0, binding = 0, std140) uniform vs_config { // input attributes declaration for (std::size_t i = 0; i < used_regs.size(); ++i) { if (used_regs[i]) { - if (!config.state.attrib_prefix[i]) { - out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i); - } else { - const char prefix = config.state.attrib_prefix[i]; - out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, - prefix); - out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i); - } + const auto flags = config.state.load_flags[i]; + const std::string_view prefix = MakeLoadPrefix(flags); + out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, + prefix); + out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i); } } out += '\n'; @@ -1738,6 +1747,11 @@ layout (set = 0, binding = 0, std140) uniform vs_config { for (u32 i = 0; i < config.state.num_outputs; ++i) { out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); } + for (std::size_t i = 0; i < used_regs.size(); ++i) { + if (used_regs[i] && True(config.state.load_flags[i] & AttribLoadFlags::ZeroW)) { + out += fmt::format("vs_in_reg{0}.w = 0;\n", i); + } + } out += "\n exec_shader();\nEmitVtx();\n}\n\n"; out += program_source; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 0973052bb..03e180e69 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -143,6 +143,14 @@ struct PicaFSConfig : Common::HashableStruct { } }; +enum class AttribLoadFlags { + Float = 1 << 0, + Sint = 1 << 1, + Uint = 1 << 2, + ZeroW = 1 << 3, +}; +DECLARE_ENUM_FLAG_OPERATORS(AttribLoadFlags) + /** * This struct contains common information to identify a GL vertex/geometry shader generated from * PICA vertex/geometry shader. @@ -157,7 +165,8 @@ struct PicaShaderConfigCommon { bool sanitize_mul; u32 num_outputs; - std::array attrib_prefix; ///< Type prefix for casted attributes + // Load operations to apply to the input vertex data + std::array load_flags; // output_map[output register index] -> output attribute index std::array output_map;