diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index a5be75431..37679b6a6 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -180,7 +180,8 @@ void RasterizerAccelerated::ClearAll(bool flush) { cached_pages = {}; } -RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(bool is_indexed) { +RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray( + bool is_indexed, u32 stride_alignment) { const auto& regs = Pica::g_state.regs; const auto& vertex_attributes = regs.pipeline.vertex_attributes; @@ -211,7 +212,9 @@ RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray u32 vs_input_size = 0; for (const auto& loader : vertex_attributes.attribute_loaders) { if (loader.component_count != 0) { - vs_input_size += Common::AlignUp(loader.byte_count * vertex_num, 4); + const u32 aligned_stride = + Common::AlignUp(static_cast(loader.byte_count), stride_alignment); + vs_input_size += Common::AlignUp(aligned_stride * vertex_num, 4); } } diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index ef4985328..e2270fd2d 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -122,7 +122,7 @@ protected: }; /// Retrieve the range and the size of the input vertex - VertexArrayInfo AnalyzeVertexArray(bool is_indexed); + VertexArrayInfo AnalyzeVertexArray(bool is_indexed, u32 stride_alignment = 1); protected: std::array cached_pages{}; diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index dee901b79..24be013fe 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -492,12 +492,19 @@ bool Instance::CreateDevice() { } #if __APPLE__ - const vk::StructureChain portability_chain = + const vk::StructureChain portability_features_chain = physical_device.getFeatures2(); const vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features = - portability_chain.get(); + portability_features_chain.get(); triangle_fan_supported = portability_features.triangleFans; + + const vk::StructureChain portability_properties_chain = + physical_device.getProperties2(); + const vk::PhysicalDevicePortabilitySubsetPropertiesKHR portability_properties = + portability_properties_chain.get(); + min_vertex_stride_alignment = portability_properties.minVertexInputBindingStrideAlignment; #endif try { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 0a9609984..d81714de6 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -195,6 +195,11 @@ public: return triangle_fan_supported; } + /// Returns the minimum vertex stride alignment + u32 GetMinVertexStrideAlignment() const { + return min_vertex_stride_alignment; + } + private: /// Returns the optimal supported usage for the requested format vk::FormatFeatureFlags GetFormatFeatures(vk::Format format); @@ -233,6 +238,7 @@ private: u32 graphics_queue_family_index{0}; bool triangle_fan_supported{true}; + u32 min_vertex_stride_alignment{1}; bool timeline_semaphores{}; bool extended_dynamic_state{}; bool push_descriptors{}; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 26c2ee643..1565aa9f7 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -216,6 +216,8 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi const auto& vertex_attributes = regs.pipeline.vertex_attributes; PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE + const u32 stride_alignment = instance.GetMinVertexStrideAlignment(); + VertexLayout& layout = pipeline_info.vertex_layout; layout.attribute_count = 0; layout.binding_count = 0; @@ -262,20 +264,32 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi base_address + loader.data_offset + (vs_input_index_min * loader.byte_count); const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1; const u32 data_size = loader.byte_count * vertex_num; - res_cache.FlushRegion(data_addr, data_size); - std::memcpy(array_ptr + buffer_offset, VideoCore::g_memory->GetPhysicalPointer(data_addr), - data_size); + + const u8* src_ptr = VideoCore::g_memory->GetPhysicalPointer(data_addr); + u8* dst_ptr = array_ptr + buffer_offset; + + // Align stride up if required by Vulkan implementation. + const u32 aligned_stride = + Common::AlignUp(static_cast(loader.byte_count), stride_alignment); + if (aligned_stride == loader.byte_count) { + std::memcpy(dst_ptr, src_ptr, data_size); + } else { + for (size_t vertex = 0; vertex < vertex_num; vertex++) { + std::memcpy(dst_ptr + vertex * aligned_stride, src_ptr + vertex * loader.byte_count, + loader.byte_count); + } + } // Create the binding associated with this loader VertexBinding& binding = layout.bindings[layout.binding_count]; binding.binding.Assign(layout.binding_count); binding.fixed.Assign(0); - binding.stride.Assign(loader.byte_count); + binding.stride.Assign(aligned_stride); // Keep track of the binding offsets so we can bind the vertex buffer later binding_offsets[layout.binding_count++] = array_offset + buffer_offset; - buffer_offset += Common::AlignUp(data_size, 4); + buffer_offset += Common::AlignUp(aligned_stride * vertex_num, 4); } binding_offsets[layout.binding_count] = array_offset + buffer_offset; @@ -392,7 +406,7 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) { const auto& regs = Pica::g_state.regs; const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = - AnalyzeVertexArray(is_indexed); + AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment()); if (regs.pipeline.triangle_topology == TriangleTopology::Fan && !instance.IsTriangleFanSupported()) {