diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index e66cbb166..41cdd4cdc 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -252,6 +252,7 @@ bool Instance::CreateDevice() { AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); + AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME); timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME); push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -322,6 +323,7 @@ bool Instance::CreateDevice() { .shaderStorageImageMultisample = available.shaderStorageImageMultisample, .shaderClipDistance = available.shaderClipDistance}}, vk::PhysicalDeviceDepthClipControlFeaturesEXT{.depthClipControl = true}, + vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true}, feature_chain.get(), feature_chain.get()}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 87943876e..917d79952 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -78,33 +78,20 @@ u32 AttribBytes(VertexAttribute attrib) { } vk::Format ToVkAttributeFormat(VertexAttribute attrib) { - switch (attrib.type) { - case AttribType::Float: - switch (attrib.size) { - case 1: - return vk::Format::eR32Sfloat; - case 2: - return vk::Format::eR32G32Sfloat; - case 3: - return vk::Format::eR32G32B32Sfloat; - case 4: - return vk::Format::eR32G32B32A32Sfloat; - } - case AttribType::Ubyte: - switch (attrib.size) { - case 4: - return vk::Format::eR8G8B8A8Uint; - default: - fmt::print("{}\n", attrib.size.Value()); - UNREACHABLE(); - } + constexpr std::array attribute_formats = { + std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat, + vk::Format::eR32G32B32A32Sfloat}, + std::array{vk::Format::eR32Sint, vk::Format::eR32G32Sint, vk::Format::eR32G32B32Sint, + vk::Format::eR32G32B32A32Sint}, + std::array{vk::Format::eR16Sint, vk::Format::eR16G16Sint, vk::Format::eR16G16B16Sint, + vk::Format::eR16G16B16A16Sint}, + std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint, + vk::Format::eR8G8B8A8Sint}, + std::array{vk::Format::eR8Uint, vk::Format::eR8G8Uint, vk::Format::eR8G8B8Uint, + vk::Format::eR8G8B8A8Uint}}; - default: - LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute type {}", attrib.type.Value()); - UNREACHABLE(); - } - - return vk::Format::eR32Sfloat; + ASSERT(attrib.size <= 4); + return attribute_formats[static_cast(attrib.type.Value())][attrib.size.Value() - 1]; } vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) { @@ -197,8 +184,14 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) { } bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, - Pica::Shader::ShaderSetup& setup) { - const PicaVSConfig config{regs.vs, setup}; + Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout) { + PicaVSConfig config{regs.vs, setup}; + for (u32 i = 0; i < layout.attribute_count; i++) { + const auto& attrib = layout.attributes[i]; + config.state.attrib_types[attrib.location.Value()] = attrib.type.Value(); + } + auto [handle, result] = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex, instance.GetDevice(), ShaderOptimization::Debug); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 8a7ec0484..d21c1ba1e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -22,8 +22,6 @@ constexpr u32 MAX_VERTEX_BINDINGS = 16; constexpr u32 MAX_DESCRIPTORS = 8; constexpr u32 MAX_DESCRIPTOR_SETS = 6; -enum class AttribType : u32 { Float = 0, Int = 1, Short = 2, Byte = 3, Ubyte = 4 }; - /** * The pipeline state is tightly packed with bitfields to reduce * the overhead of hashing as much as possible @@ -154,7 +152,8 @@ public: void BindPipeline(const PipelineInfo& info); /// Binds a PICA decompiled vertex shader - bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup); + bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, + const VertexLayout& layout); /// Binds a passthrough vertex shader void UseTrivialVertexShader(); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index c59bdd5fc..3e396e473 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -328,7 +328,7 @@ RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_i void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max) { - auto [array_ptr, array_offset, _] = vertex_buffer.Map(vs_input_size, 4); + auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4); // The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU // how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base @@ -340,9 +340,8 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi const auto& vertex_attributes = regs.pipeline.vertex_attributes; PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE - VertexLayout layout{}; std::array enable_attributes{}; - std::array binding_offsets{}; + VertexLayout layout{}; u32 buffer_offset = array_offset; for (const auto& loader : vertex_attributes.attribute_loaders) { @@ -387,26 +386,32 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi const PAddr data_addr = base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; - const u32 data_size = loader.byte_count * vertex_num; + u32 data_size = loader.byte_count * vertex_num; res_cache.FlushRegion(data_addr, data_size, nullptr); std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size); // Create the binding associated with this loader - VertexBinding& binding = layout.bindings.at(layout.binding_count); + VertexBinding& binding = layout.bindings[layout.binding_count]; binding.binding.Assign(layout.binding_count); binding.fixed.Assign(0); binding.stride.Assign(loader.byte_count); // Keep track of the binding offsets so we can bind the vertex buffer later binding_offsets[layout.binding_count++] = buffer_offset; + data_size = Common::AlignUp(data_size, 16); array_ptr += data_size; buffer_offset += data_size; } - // Reserve the last binding for fixed attributes - u32 offset = 0; - bool has_fixed_binding = false; + // Reserve the last binding for fixed and default attributes + // Place the default attrib at offset zero for easy access + constexpr Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f); + u32 offset = sizeof(Common::Vec4f); + std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f)); + array_ptr += sizeof(Common::Vec4f); + + // Find all fixed attributes and assign them to the last binding for (std::size_t i = 0; i < 16; i++) { if (vertex_attributes.IsDefaultAttribute(i)) { const u32 reg = regs.vs.GetRegisterForAttribute(i); @@ -415,11 +420,10 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(), attr.w.ToFloat32()}; - // Copy the data to the end of the buffer const u32 data_size = sizeof(float) * static_cast(data.size()); std::memcpy(array_ptr, data.data(), data_size); - VertexAttribute& attribute = layout.attributes.at(layout.attribute_count++); + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; attribute.binding.Assign(layout.binding_count); attribute.location.Assign(reg); attribute.offset.Assign(offset); @@ -428,21 +432,36 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi offset += data_size; array_ptr += data_size; - has_fixed_binding = true; + enable_attributes[reg] = true; } } } - if (has_fixed_binding) { - VertexBinding& binding = layout.bindings.at(layout.binding_count); - binding.binding.Assign(layout.binding_count); - binding.fixed.Assign(1); - binding.stride.Assign(offset); - - binding_offsets[layout.binding_count++] = buffer_offset; - buffer_offset += offset; + // Loop one more time to find unused attributes and assign them to the default one + // This needs to happen because i = 2 might be assigned to location = 3 so the loop + // above would skip setting it + for (std::size_t i = 0; i < 16; i++) { + // If the attribute is just disabled, shove the default attribute to avoid + // errors if the shader ever decides to use it. The pipeline cache can discard + // this if needed since it has access to the usage mask from the code generator + if (!enable_attributes[i]) { + VertexAttribute& attribute = layout.attributes[layout.attribute_count++]; + attribute.binding.Assign(layout.binding_count); + attribute.location.Assign(i); + attribute.offset.Assign(0); + attribute.type.Assign(AttribType::Float); + attribute.size.Assign(4); + } } + // Define the fixed+default binding + VertexBinding& binding = layout.bindings[layout.binding_count]; + binding.binding.Assign(layout.binding_count); + binding.fixed.Assign(1); + binding.stride.Assign(offset); + binding_offsets[layout.binding_count++] = buffer_offset; + buffer_offset += offset; + pipeline_info.vertex_layout = layout; vertex_buffer.Commit(buffer_offset - array_offset); @@ -457,7 +476,8 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi bool RasterizerVulkan::SetupVertexShader() { MICROPROFILE_SCOPE(OpenGL_VS); - return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs); + return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs, + pipeline_info.vertex_layout); } bool RasterizerVulkan::SetupGeometryShader() { @@ -484,14 +504,6 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) { } } - if (!SetupVertexShader()) { - return false; - } - - if (!SetupGeometryShader()) { - return false; - } - return Draw(true, is_indexed); } @@ -506,6 +518,15 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { } SetupVertexArray(vs_input_size, vs_input_index_min, vs_input_index_max); + + if (!SetupVertexShader()) { + return false; + } + + if (!SetupGeometryShader()) { + return false; + } + pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology); pipeline_cache.BindPipeline(pipeline_info); @@ -848,6 +869,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { succeeded = AccelerateDrawBatchInternal(is_indexed); } else { pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List); + pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout(); pipeline_cache.UseTrivialVertexShader(); pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.BindPipeline(pipeline_info); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7a5d683b4..14ed6c95b 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -273,6 +273,7 @@ private: }; std::vector vertex_batch; + std::array binding_offsets{}; ImageAlloc default_texture; vk::Sampler default_sampler; @@ -289,8 +290,6 @@ private: bool dirty = true; } uniform_block_data = {}; - std::array hw_enabled_attributes{}; - std::array texture_samplers; SamplerInfo texture_cube_sampler; std::unordered_map samplers; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index 3550ec85e..79c9ced63 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -1625,7 +1625,24 @@ layout (set = 0, binding = 0, std140) uniform vs_config { // input attributes declaration for (std::size_t i = 0; i < used_regs.size(); ++i) { if (used_regs[i]) { - out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_reg{0};\n", i, i == 3 ? "" : ""); + std::string_view prefix; + switch (config.state.attrib_types[i]) { + case AttribType::Float: + prefix = ""; + break; + case AttribType::Byte: + case AttribType::Short: + prefix = "i"; + break; + case AttribType::Ubyte: + prefix = "u"; + break; + default: + LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]); + UNREACHABLE(); + } + + out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_reg{0};\n", i, prefix); } } out += '\n'; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 82bd7e850..336fa274e 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -12,6 +12,8 @@ namespace Vulkan { +enum class AttribType : u32 { Float = 0, Int = 1, Short = 2, Byte = 3, Ubyte = 4 }; + enum Attributes { ATTRIBUTE_POSITION, ATTRIBUTE_COLOR, @@ -147,6 +149,7 @@ struct PicaShaderConfigCommon { u64 swizzle_hash; u32 main_offset; bool sanitize_mul; + std::array attrib_types; u32 num_outputs; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index a03fec4df..e70767259 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "video_core/renderer_vulkan/vk_instance.h" @@ -115,6 +116,11 @@ std::tuple StreamBuffer::Map(u32 size, u32 alignment) { const u32 current_bucket = scheduler.GetCurrentSlotIndex(); auto& bucket = buckets[current_bucket]; + + if (alignment > 0) { + bucket.offset = Common::AlignUp(bucket.offset, alignment); + } + if (bucket.offset + size > bucket_size) { UNREACHABLE(); }