From 12e69913c24e8dd9a6bc2b59e8eb520b99a429f6 Mon Sep 17 00:00:00 2001 From: GPUCode Date: Tue, 24 Jan 2023 23:31:05 +0200 Subject: [PATCH] renderer_vulkan: Rework attribute format handling * Centralize format support query to the instance similar to pixel formats In addition drop the component splitting. Favour scaled formats which don't require any shader casting to work and fallback to uint if necessary. Using scaled formats also has the benefit of reducing vertex shader permutations. --- .../renderer_vulkan/vk_instance.cpp | 115 +++++++++++++---- src/video_core/renderer_vulkan/vk_instance.h | 13 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 116 ++++-------------- .../renderer_vulkan/vk_renderpass_cache.cpp | 28 ++--- .../renderer_vulkan/vk_shader_gen.cpp | 65 +--------- .../renderer_vulkan/vk_shader_gen.h | 6 +- 6 files changed, 151 insertions(+), 192 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index 2596f3d47..85561f7d1 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -82,7 +82,7 @@ static VKAPI_ATTR VkBool32 VKAPI_CALL DebugReportCallback(VkDebugReportFlagsEXT return VK_FALSE; } -vk::Format ToVkFormat(VideoCore::PixelFormat format) { +vk::Format MakeFormat(VideoCore::PixelFormat format) { switch (format) { case VideoCore::PixelFormat::RGBA8: return vk::Format::eR8G8B8A8Unorm; @@ -104,11 +104,55 @@ vk::Format ToVkFormat(VideoCore::PixelFormat format) { LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format); return vk::Format::eUndefined; default: - // Use default case for the texture formats - return vk::Format::eR8G8B8A8Unorm; + return vk::Format::eR8G8B8A8Unorm; ///< Use default case for the texture formats } } +vk::Format MakeAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 count, + bool scaled = true) { + static constexpr std::array attrib_formats_scaled = { + vk::Format::eR8Sscaled, vk::Format::eR8G8Sscaled, + vk::Format::eR8G8B8Sscaled, vk::Format::eR8G8B8A8Sscaled, + vk::Format::eR8Uscaled, vk::Format::eR8G8Uscaled, + vk::Format::eR8G8B8Uscaled, vk::Format::eR8G8B8A8Uscaled, + vk::Format::eR16Sscaled, vk::Format::eR16G16Sscaled, + vk::Format::eR16G16B16Sscaled, vk::Format::eR16G16B16A16Sscaled, + vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, + vk::Format::eR32G32B32Sfloat, vk::Format::eR32G32B32A32Sfloat, + }; + static constexpr std::array attrib_formats_int = { + vk::Format::eR8Sint, vk::Format::eR8G8Sint, + vk::Format::eR8G8B8Sint, vk::Format::eR8G8B8A8Sint, + vk::Format::eR8Uint, vk::Format::eR8G8Uint, + vk::Format::eR8G8B8Uint, vk::Format::eR8G8B8A8Uint, + vk::Format::eR16Sint, vk::Format::eR16G16Sint, + vk::Format::eR16G16B16Sint, vk::Format::eR16G16B16A16Sint, + vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, + vk::Format::eR32G32B32Sfloat, vk::Format::eR32G32B32A32Sfloat, + }; + + const u32 index = static_cast(format); + return (scaled ? attrib_formats_scaled : attrib_formats_int)[index * 4 + count - 1]; +} + +vk::ImageAspectFlags MakeAspect(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return vk::ImageAspectFlagBits::eColor; + case VideoCore::SurfaceType::Depth: + return vk::ImageAspectFlagBits::eDepth; + case VideoCore::SurfaceType::DepthStencil: + return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; + default: + LOG_CRITICAL(Render_Vulkan, "Invalid surface type {}", type); + UNREACHABLE(); + } + + return vk::ImageAspectFlagBits::eColor; +} + [[nodiscard]] vk::DebugUtilsMessengerCreateInfoEXT MakeDebugUtilsMessengerInfo() { return vk::DebugUtilsMessengerCreateInfoEXT{ .messageSeverity = vk::DebugUtilsMessageSeverityFlagBitsEXT::eInfo | @@ -306,6 +350,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) CreateDevice(); CreateFormatTable(); + CreateAttribTable(); CollectTelemetryParameters(); } @@ -325,31 +370,24 @@ Instance::~Instance() { instance.destroy(); } -FormatTraits Instance::GetTraits(VideoCore::PixelFormat pixel_format) const { +const FormatTraits& Instance::GetTraits(VideoCore::PixelFormat pixel_format) const { if (pixel_format == VideoCore::PixelFormat::Invalid) [[unlikely]] { - return FormatTraits{}; + constexpr static FormatTraits null_traits{}; + return null_traits; } const u32 index = static_cast(pixel_format); return format_table[index]; } -vk::ImageAspectFlags MakeAspect(VideoCore::SurfaceType type) { - switch (type) { - case VideoCore::SurfaceType::Color: - case VideoCore::SurfaceType::Texture: - case VideoCore::SurfaceType::Fill: - return vk::ImageAspectFlagBits::eColor; - case VideoCore::SurfaceType::Depth: - return vk::ImageAspectFlagBits::eDepth; - case VideoCore::SurfaceType::DepthStencil: - return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil; - default: - LOG_CRITICAL(Render_Vulkan, "Invalid surface type {}", type); - UNREACHABLE(); +const FormatTraits& Instance::GetTraits(Pica::PipelineRegs::VertexAttributeFormat format, + u32 count) const { + if (count == 0) [[unlikely]] { + ASSERT_MSG(false, "Unable to retrieve traits for invalid attribute component count"); } - return vk::ImageAspectFlagBits::eColor; + const u32 index = static_cast(format); + return attrib_table[index * 4 + count - 1]; } FormatTraits Instance::DetermineTraits(VideoCore::PixelFormat pixel_format, vk::Format format) { @@ -417,10 +455,11 @@ void Instance::CreateFormatTable() { VideoCore::PixelFormat::I4, VideoCore::PixelFormat::A4, VideoCore::PixelFormat::ETC1, VideoCore::PixelFormat::ETC1A4, VideoCore::PixelFormat::D16, VideoCore::PixelFormat::D24, - VideoCore::PixelFormat::D24S8}; + VideoCore::PixelFormat::D24S8, + }; for (const auto& pixel_format : pixel_formats) { - const auto format = ToVkFormat(pixel_format); + const auto format = MakeFormat(pixel_format); FormatTraits traits = DetermineTraits(pixel_format, format); const bool is_suitable = @@ -448,6 +487,40 @@ void Instance::CreateFormatTable() { } } +void Instance::CreateAttribTable() { + constexpr std::array attrib_formats = { + Pica::PipelineRegs::VertexAttributeFormat::BYTE, + Pica::PipelineRegs::VertexAttributeFormat::UBYTE, + Pica::PipelineRegs::VertexAttributeFormat::SHORT, + Pica::PipelineRegs::VertexAttributeFormat::FLOAT, + }; + + for (const auto& format : attrib_formats) { + for (u32 count = 1; count <= 4; count++) { + bool needs_cast{false}; + vk::Format attrib_format = MakeAttributeFormat(format, count); + vk::FormatProperties format_properties = + physical_device.getFormatProperties(attrib_format); + if (!(format_properties.bufferFeatures & vk::FormatFeatureFlagBits::eVertexBuffer)) { + needs_cast = true; + attrib_format = MakeAttributeFormat(format, count, false); + format_properties = physical_device.getFormatProperties(attrib_format); + if (!(format_properties.bufferFeatures & + vk::FormatFeatureFlagBits::eVertexBuffer)) { + ASSERT_MSG(false, "Fallback format {} unsupported, device unsuitable!", + vk::to_string(attrib_format)); + } + } + + const u32 index = static_cast(format) * 4 + count - 1; + attrib_table[index] = FormatTraits{ + .requires_conversion = needs_cast, + .native = attrib_format, + }; + } + } +} + bool Instance::CreateDevice() { const vk::StructureChain feature_chain = physical_device.getFeatures2< vk::PhysicalDeviceFeatures2, vk::PhysicalDevicePortabilitySubsetFeaturesKHR, diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 0ed17f332..1add82724 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -4,10 +4,10 @@ #pragma once -#include #include #include #include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/regs_pipeline.h" #include "video_core/renderer_vulkan/vk_common.h" namespace Frontend { @@ -38,7 +38,11 @@ public: ~Instance(); /// Returns the FormatTraits struct for the provided pixel format - FormatTraits GetTraits(VideoCore::PixelFormat pixel_format) const; + const FormatTraits& GetTraits(VideoCore::PixelFormat pixel_format) const; + + /// Returns the FormatTraits struct for the provided attribute format and count + const FormatTraits& GetTraits(Pica::PipelineRegs::VertexAttributeFormat format, + u32 count) const; /// Returns the Vulkan instance vk::Instance GetInstance() const { @@ -220,6 +224,9 @@ private: /// Creates the format compatibility table for the current device void CreateFormatTable(); + /// Creates the attribute format table for the current device + void CreateAttribTable(); + /// Creates the logical device opportunistically enabling extensions bool CreateDevice(); @@ -247,10 +254,10 @@ private: vk::Queue graphics_queue; std::vector physical_devices; std::array format_table; + std::array attrib_table; std::vector available_extensions; u32 present_queue_family_index{0}; u32 graphics_queue_family_index{0}; - bool triangle_fan_supported{true}; u32 min_vertex_stride_alignment{1}; bool timeline_semaphores{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 51d4ab32f..bd34f8bd6 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -28,43 +28,23 @@ u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) { case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: return sizeof(u8) * size; } - return 0; } -vk::Format ToVkAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) { - static constexpr std::array attribute_formats = { - std::array{ - vk::Format::eR8Sint, - vk::Format::eR8G8Sint, - vk::Format::eR8G8B8Sint, - vk::Format::eR8G8B8A8Sint, - }, - std::array{ - vk::Format::eR8Uint, - vk::Format::eR8G8Uint, - vk::Format::eR8G8B8Uint, - vk::Format::eR8G8B8A8Uint, - }, - std::array{ - vk::Format::eR16Sint, - vk::Format::eR16G16Sint, - vk::Format::eR16G16B16Sint, - vk::Format::eR16G16B16A16Sint, - }, - std::array{ - vk::Format::eR32Sfloat, - vk::Format::eR32G32Sfloat, - vk::Format::eR32G32B32Sfloat, - vk::Format::eR32G32B32A32Sfloat, - }, - }; - - ASSERT(size <= 4); - return attribute_formats[static_cast(format)][size - 1]; +char MakeAttribPrefix(Pica::PipelineRegs::VertexAttributeFormat format) { + switch (format) { + case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: + return '\0'; + case Pica::PipelineRegs::VertexAttributeFormat::BYTE: + case Pica::PipelineRegs::VertexAttributeFormat::SHORT: + return 'i'; + case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: + return 'u'; + } + return '\0'; } -vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { +vk::ShaderStageFlagBits MakeShaderStage(std::size_t index) { switch (index) { case 0: return vk::ShaderStageFlagBits::eVertex; @@ -76,28 +56,9 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { LOG_CRITICAL(Render_Vulkan, "Invalid shader stage index!"); UNREACHABLE(); } - return vk::ShaderStageFlagBits::eVertex; } -[[nodiscard]] bool IsAttribFormatSupported(const VertexAttribute& attrib, - const Instance& instance) { - static std::unordered_map format_support_cache; - - vk::PhysicalDevice physical_device = instance.GetPhysicalDevice(); - const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size); - auto [it, new_format] = format_support_cache.try_emplace(format, false); - if (new_format) { - LOG_INFO(Render_Vulkan, "Quering support for format {}", vk::to_string(format)); - const vk::FormatFeatureFlags features = - physical_device.getFormatProperties(format).bufferFeatures; - it->second = (features & vk::FormatFeatureFlagBits::eVertexBuffer) == - vk::FormatFeatureFlagBits::eVertexBuffer; - } - - return it->second; -}; - PipelineCache::Shader::Shader(const Instance& instance) : device{instance.GetDevice()} {} PipelineCache::Shader::Shader(const Instance& instance, vk::ShaderStageFlagBits stage, @@ -170,43 +131,22 @@ bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { }; } - u32 emulated_attrib_count = 0; - std::array attributes; + std::array attributes; for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) { - const VertexAttribute& attrib = info.vertex_layout.attributes[i]; - const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size); - const bool is_supported = IsAttribFormatSupported(attrib, instance); - ASSERT_MSG(is_supported || attrib.size == 3, "Failed attrib is_supported {} size {}", - is_supported, attrib.size); - + const auto& attr = info.vertex_layout.attributes[i]; + const FormatTraits traits = instance.GetTraits(attr.type, attr.size); attributes[i] = vk::VertexInputAttributeDescription{ - .location = attrib.location, - .binding = attrib.binding, - .format = is_supported ? format : ToVkAttributeFormat(attrib.type, 2), - .offset = attrib.offset, + .location = attr.location, + .binding = attr.binding, + .format = traits.native, + .offset = attr.offset, }; - - // When the requested 3-component vertex format is unsupported by the hardware - // is it emulated by breaking it into a vec2 + vec1. These are combined to a vec3 - // by the vertex shader. - if (!is_supported) { - const u32 location = MAX_VERTEX_ATTRIBUTES + emulated_attrib_count++; - LOG_WARNING(Render_Vulkan, "\nEmulating attrib {} at location {}\n", attrib.location, - location); - attributes[location] = vk::VertexInputAttributeDescription{ - .location = location, - .binding = attrib.binding, - .format = ToVkAttributeFormat(attrib.type, 1), - .offset = attrib.offset + AttribBytes(attrib.type, 2), - }; - } } const vk::PipelineVertexInputStateCreateInfo vertex_input_info = { .vertexBindingDescriptionCount = info.vertex_layout.binding_count, .pVertexBindingDescriptions = bindings.data(), - .vertexAttributeDescriptionCount = - info.vertex_layout.attribute_count + emulated_attrib_count, + .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count, .pVertexAttributeDescriptions = attributes.data(), }; @@ -320,7 +260,7 @@ bool PipelineCache::GraphicsPipeline::Build(bool fail_on_compile_required) { shader->WaitBuilt(); shader_stages[shader_count++] = vk::PipelineShaderStageCreateInfo{ - .stage = ToVkShaderStage(i), + .stage = MakeShaderStage(i), .module = shader->Handle(), .pName = "main", }; @@ -504,15 +444,13 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, PicaVSConfig config{regs.rasterizer, regs.vs, setup}; config.state.use_geometry_shader = instance.UseGeometryShaders(); - u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES; for (u32 i = 0; i < layout.attribute_count; i++) { - const auto& attrib = layout.attributes[i]; - const u32 location = attrib.location.Value(); - const bool is_supported = IsAttribFormatSupported(attrib, instance); - ASSERT(is_supported || attrib.size == 3); - - config.state.attrib_types[location] = attrib.type.Value(); - config.state.emulated_attrib_locations[location] = is_supported ? 0 : emulated_attrib_loc++; + const VertexAttribute& attr = layout.attributes[i]; + const FormatTraits& traits = instance.GetTraits(attr.type, attr.size); + if (traits.requires_conversion) { + const u32 location = attr.location.Value(); + config.state.attrib_prefix[location] = MakeAttribPrefix(attr.type); + } } auto [it, new_config] = programmable_vertex_map.try_emplace(config); diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 8af0f5cfd..981a411eb 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -41,25 +41,25 @@ VideoCore::PixelFormat ToFormatDepth(u32 index) { RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler) : instance{instance}, scheduler{scheduler} { - // Pre-create all needed renderpasses by the renderer + for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) { for (u32 depth = 0; depth <= MAX_DEPTH_FORMATS; depth++) { - const FormatTraits color_traits = instance.GetTraits(ToFormatColor(color)); - const FormatTraits depth_traits = instance.GetTraits(ToFormatDepth(depth)); - - const vk::Format color_format = color_traits.native; - const vk::Format depth_format = depth_traits.native; - - if (color_format == vk::Format::eUndefined && depth_format == vk::Format::eUndefined) { + const VideoCore::PixelFormat color_format = ToFormatColor(color); + const VideoCore::PixelFormat depth_format = ToFormatDepth(depth); + if (color_format == VideoCore::PixelFormat::Invalid && + depth_format == VideoCore::PixelFormat::Invalid) { continue; } - cached_renderpasses[color][depth][0] = - CreateRenderPass(color_format, depth_format, vk::AttachmentLoadOp::eLoad, - vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral); - cached_renderpasses[color][depth][1] = - CreateRenderPass(color_format, depth_format, vk::AttachmentLoadOp::eClear, - vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral); + const FormatTraits& color_traits = instance.GetTraits(color_format); + const FormatTraits& depth_traits = instance.GetTraits(depth_format); + + cached_renderpasses[color][depth][0] = CreateRenderPass( + color_traits.native, depth_traits.native, vk::AttachmentLoadOp::eLoad, + vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral); + cached_renderpasses[color][depth][1] = CreateRenderPass( + color_traits.native, depth_traits.native, vk::AttachmentLoadOp::eClear, + vk::ImageLayout::eGeneral, vk::ImageLayout::eGeneral); } } } diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 221e5e97e..4a047fa50 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1643,67 +1643,12 @@ layout (set = 0, binding = 0, std140) uniform vs_config { // input attributes declaration for (std::size_t i = 0; i < used_regs.size(); ++i) { if (used_regs[i]) { - std::string_view prefix; - switch (config.state.attrib_types[i]) { - case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: - prefix = ""; - break; - case Pica::PipelineRegs::VertexAttributeFormat::BYTE: - case Pica::PipelineRegs::VertexAttributeFormat::SHORT: - prefix = "i"; - break; - case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: - prefix = "u"; - break; - default: - LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", - config.state.attrib_types[i]); - UNREACHABLE(); - } - - out += - fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix); - } - } - - // Some 3-component attributes might be emulated by breaking them to vec2 + scalar. - // Define them here and combine them below - for (std::size_t i = 0; i < used_regs.size(); ++i) { - if (const u32 location = config.state.emulated_attrib_locations[i]; - location != 0 && used_regs[i]) { - std::string_view type; - switch (config.state.attrib_types[i]) { - case Pica::PipelineRegs::VertexAttributeFormat::FLOAT: - type = "float"; - break; - case Pica::PipelineRegs::VertexAttributeFormat::BYTE: - case Pica::PipelineRegs::VertexAttributeFormat::SHORT: - type = "int"; - break; - case Pica::PipelineRegs::VertexAttributeFormat::UBYTE: - type = "uint"; - break; - default: - LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", - config.state.attrib_types[i]); - UNREACHABLE(); - } - - out += fmt::format("layout(location = {}) in {} vs_in_typed_reg{}_part2;\n", location, - type, i); - } - } - - out += '\n'; - - // cast input registers to float to avoid computational errors - for (std::size_t i = 0; i < used_regs.size(); ++i) { - if (used_regs[i]) { - if (config.state.emulated_attrib_locations[i] != 0) { - out += fmt::format("vec4 vs_in_reg{0} = vec4(vec2(vs_in_typed_reg{0}), " - "float(vs_in_typed_reg{0}_part2), 0.f);\n", - i); + if (!config.state.attrib_prefix[i]) { + out += fmt::format("layout(location = {0}) in vec4 vs_in_reg{0};\n", i); } else { + const char prefix = config.state.attrib_prefix[i]; + out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, + prefix); out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i); } } diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index c932227ba..be5091c87 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -155,10 +155,9 @@ struct PicaShaderConfigCommon { u64 swizzle_hash; u32 main_offset; bool sanitize_mul; - std::array attrib_types; - std::array emulated_attrib_locations; u32 num_outputs; + std::array attrib_prefix; ///< Type prefix for casted attributes // output_map[output register index] -> output attribute index std::array output_map; @@ -185,9 +184,6 @@ struct PicaVSConfig : Common::HashableStruct { Pica::Shader::ShaderSetup& setup) { state.Init(rasterizer, regs, setup); } - explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { - state = conf; - } }; struct PicaGSConfigCommonRaw {