video_core: Implement vulkan QuadStrip topology
This commit is contained in:
		| @@ -666,9 +666,10 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) { | ||||
|         BindHostIndexBuffer(); | ||||
|     } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) { | ||||
|         const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); | ||||
|         if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { | ||||
|             runtime.BindQuadArrayIndexBuffer(draw_state.vertex_buffer.first, | ||||
|                                              draw_state.vertex_buffer.count); | ||||
|         if (draw_state.topology == Maxwell::PrimitiveTopology::Quads || | ||||
|             draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { | ||||
|             runtime.BindQuadIndexBuffer(draw_state.topology, draw_state.vertex_buffer.first, | ||||
|                                         draw_state.vertex_buffer.count); | ||||
|         } | ||||
|     } | ||||
|     BindHostVertexBuffers(); | ||||
|   | ||||
| @@ -16,6 +16,7 @@ layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | ||||
| layout (push_constant) uniform PushConstants { | ||||
|     uint base_vertex; | ||||
|     int index_shift; // 0: uint8, 1: uint16, 2: uint32 | ||||
|     int is_strip; // 0: quads 1: quadstrip | ||||
| }; | ||||
|  | ||||
| void main() { | ||||
| @@ -28,9 +29,10 @@ void main() { | ||||
|     int flipped_shift = 2 - index_shift; | ||||
|     int mask = (1 << flipped_shift) - 1; | ||||
|  | ||||
|     const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); | ||||
|     const int quads_swizzle[6] = int[](0, 1, 2, 0, 2, 3); | ||||
|     const int quad_strip_swizzle[6] = int[](0, 3, 1, 0, 2, 3); | ||||
|     for (uint vertex = 0; vertex < 6; ++vertex) { | ||||
|         int offset = primitive * 4 + quad_swizzle[vertex]; | ||||
|         int offset = (is_strip == 0 ? primitive * 4 + quads_swizzle[vertex] : primitive * 2 + quad_strip_swizzle[vertex]); | ||||
|         int int_offset = offset >> flipped_shift; | ||||
|         int bit_offset = (offset & mask) * index_size; | ||||
|         uint packed_input = input_indexes[int_offset]; | ||||
|   | ||||
| @@ -310,7 +310,9 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const Device& device, | ||||
|     case Maxwell::PrimitiveTopology::TriangleFan: | ||||
|         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; | ||||
|     case Maxwell::PrimitiveTopology::Quads: | ||||
|         // TODO(Rodrigo): Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT whenever it releases | ||||
|     case Maxwell::PrimitiveTopology::QuadStrip: | ||||
|         // TODO: Use VK_PRIMITIVE_TOPOLOGY_QUAD_LIST_EXT/VK_PRIMITIVE_TOPOLOGY_QUAD_STRIP_EXT | ||||
|         // whenever it releases | ||||
|         return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; | ||||
|     case Maxwell::PrimitiveTopology::Patches: | ||||
|         return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; | ||||
|   | ||||
| @@ -51,15 +51,6 @@ size_t BytesPerIndex(VkIndexType index_type) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) { | ||||
|     std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||||
|     for (T& index : indices) { | ||||
|         index = static_cast<T>(first + index + quad * 4); | ||||
|     } | ||||
|     return indices; | ||||
| } | ||||
|  | ||||
| vk::Buffer CreateBuffer(const Device& device, u64 size) { | ||||
|     VkBufferUsageFlags flags = | ||||
|         VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | | ||||
| @@ -123,6 +114,187 @@ VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat | ||||
|     return *views.back().handle; | ||||
| } | ||||
|  | ||||
| class QuadIndexBuffer { | ||||
| public: | ||||
|     QuadIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                     Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||||
|         : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||
|           staging_pool{staging_pool_} {} | ||||
|  | ||||
|     virtual ~QuadIndexBuffer() = default; | ||||
|  | ||||
|     void UpdateBuffer(u32 num_indices_) { | ||||
|         if (num_indices_ <= num_indices) { | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         scheduler.Finish(); | ||||
|  | ||||
|         num_indices = num_indices_; | ||||
|         index_type = IndexTypeFromNumElements(device, num_indices); | ||||
|  | ||||
|         const u32 num_quads = GetQuadsNum(num_indices); | ||||
|         const u32 num_triangle_indices = num_quads * 6; | ||||
|         const u32 num_first_offset_copies = 4; | ||||
|         const size_t bytes_per_index = BytesPerIndex(index_type); | ||||
|         const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||||
|         buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||
|             .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|             .pNext = nullptr, | ||||
|             .flags = 0, | ||||
|             .size = size_bytes, | ||||
|             .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||
|             .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|             .queueFamilyIndexCount = 0, | ||||
|             .pQueueFamilyIndices = nullptr, | ||||
|         }); | ||||
|         if (device.HasDebuggingToolAttached()) { | ||||
|             buffer.SetObjectNameEXT("Quad LUT"); | ||||
|         } | ||||
|         memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal); | ||||
|  | ||||
|         const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||||
|         u8* staging_data = staging.mapped_span.data(); | ||||
|         const size_t quad_size = bytes_per_index * 6; | ||||
|  | ||||
|         for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||||
|             for (u32 quad = 0; quad < num_quads; ++quad) { | ||||
|                 MakeAndUpdateIndices(staging_data, quad_size, quad, first); | ||||
|                 staging_data += quad_size; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|         scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||||
|                           dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { | ||||
|             const VkBufferCopy copy{ | ||||
|                 .srcOffset = src_offset, | ||||
|                 .dstOffset = 0, | ||||
|                 .size = size_bytes, | ||||
|             }; | ||||
|             const VkBufferMemoryBarrier write_barrier{ | ||||
|                 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||||
|                 .pNext = nullptr, | ||||
|                 .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|                 .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||||
|                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|                 .buffer = dst_buffer, | ||||
|                 .offset = 0, | ||||
|                 .size = size_bytes, | ||||
|             }; | ||||
|             cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||||
|             cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||
|                                    VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     void BindBuffer(u32 first) { | ||||
|         const VkIndexType index_type_ = index_type; | ||||
|         const size_t sub_first_offset = static_cast<size_t>(first % 4) * GetQuadsNum(num_indices); | ||||
|         const size_t offset = | ||||
|             (sub_first_offset + GetQuadsNum(first)) * 6ULL * BytesPerIndex(index_type); | ||||
|         scheduler.Record([buffer = *buffer, index_type_, offset](vk::CommandBuffer cmdbuf) { | ||||
|             cmdbuf.BindIndexBuffer(buffer, offset, index_type_); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
| protected: | ||||
|     virtual u32 GetQuadsNum(u32 num_indices) const = 0; | ||||
|  | ||||
|     virtual void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) = 0; | ||||
|  | ||||
|     const Device& device; | ||||
|     MemoryAllocator& memory_allocator; | ||||
|     Scheduler& scheduler; | ||||
|     StagingBufferPool& staging_pool; | ||||
|  | ||||
|     vk::Buffer buffer{}; | ||||
|     MemoryCommit memory_commit{}; | ||||
|     VkIndexType index_type{}; | ||||
|     u32 num_indices = 0; | ||||
| }; | ||||
|  | ||||
| class QuadArrayIndexBuffer : public QuadIndexBuffer { | ||||
| public: | ||||
|     QuadArrayIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                          Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||||
|         : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} | ||||
|  | ||||
|     ~QuadArrayIndexBuffer() = default; | ||||
|  | ||||
| private: | ||||
|     u32 GetQuadsNum(u32 num_indices_) const override { | ||||
|         return num_indices_ / 4; | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     static std::array<T, 6> MakeIndices(u32 quad, u32 first) { | ||||
|         std::array<T, 6> indices{0, 1, 2, 0, 2, 3}; | ||||
|         for (T& index : indices) { | ||||
|             index = static_cast<T>(first + index + quad * 4); | ||||
|         } | ||||
|         return indices; | ||||
|     } | ||||
|  | ||||
|     void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { | ||||
|         switch (index_type) { | ||||
|         case VK_INDEX_TYPE_UINT8_EXT: | ||||
|             std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); | ||||
|             break; | ||||
|         case VK_INDEX_TYPE_UINT16: | ||||
|             std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); | ||||
|             break; | ||||
|         case VK_INDEX_TYPE_UINT32: | ||||
|             std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); | ||||
|             break; | ||||
|         default: | ||||
|             ASSERT(false); | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| class QuadStripIndexBuffer : public QuadIndexBuffer { | ||||
| public: | ||||
|     QuadStripIndexBuffer(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                          Scheduler& scheduler_, StagingBufferPool& staging_pool_) | ||||
|         : QuadIndexBuffer(device_, memory_allocator_, scheduler_, staging_pool_) {} | ||||
|  | ||||
|     ~QuadStripIndexBuffer() = default; | ||||
|  | ||||
| private: | ||||
|     u32 GetQuadsNum(u32 num_indices_) const override { | ||||
|         return num_indices_ >= 4 ? (num_indices_ - 2) / 2 : 0; | ||||
|     } | ||||
|  | ||||
|     template <typename T> | ||||
|     static std::array<T, 6> MakeIndices(u32 quad, u32 first) { | ||||
|         std::array<T, 6> indices{0, 3, 1, 0, 2, 3}; | ||||
|         for (T& index : indices) { | ||||
|             index = static_cast<T>(first + index + quad * 2); | ||||
|         } | ||||
|         return indices; | ||||
|     } | ||||
|  | ||||
|     void MakeAndUpdateIndices(u8* staging_data, size_t quad_size, u32 quad, u32 first) { | ||||
|         switch (index_type) { | ||||
|         case VK_INDEX_TYPE_UINT8_EXT: | ||||
|             std::memcpy(staging_data, MakeIndices<u8>(quad, first).data(), quad_size); | ||||
|             break; | ||||
|         case VK_INDEX_TYPE_UINT16: | ||||
|             std::memcpy(staging_data, MakeIndices<u16>(quad, first).data(), quad_size); | ||||
|             break; | ||||
|         case VK_INDEX_TYPE_UINT32: | ||||
|             std::memcpy(staging_data, MakeIndices<u32>(quad, first).data(), quad_size); | ||||
|             break; | ||||
|         default: | ||||
|             ASSERT(false); | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& memory_allocator_, | ||||
|                                        Scheduler& scheduler_, StagingBufferPool& staging_pool_, | ||||
|                                        UpdateDescriptorQueue& update_descriptor_queue_, | ||||
| @@ -130,7 +302,12 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m | ||||
|     : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, | ||||
|       staging_pool{staging_pool_}, update_descriptor_queue{update_descriptor_queue_}, | ||||
|       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) {} | ||||
|       quad_index_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue) { | ||||
|     quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_, | ||||
|                                                                      scheduler_, staging_pool_); | ||||
|     quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_, | ||||
|                                                                      scheduler_, staging_pool_); | ||||
| } | ||||
|  | ||||
| StagingBufferRef BufferCacheRuntime::UploadStagingBuffer(size_t size) { | ||||
|     return staging_pool.Request(size, MemoryUsage::Upload); | ||||
| @@ -245,10 +422,11 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat | ||||
|     VkIndexType vk_index_type = MaxwellToVK::IndexFormat(index_format); | ||||
|     VkDeviceSize vk_offset = offset; | ||||
|     VkBuffer vk_buffer = buffer; | ||||
|     if (topology == PrimitiveTopology::Quads) { | ||||
|     if (topology == PrimitiveTopology::Quads || topology == PrimitiveTopology::QuadStrip) { | ||||
|         vk_index_type = VK_INDEX_TYPE_UINT32; | ||||
|         std::tie(vk_buffer, vk_offset) = | ||||
|             quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset); | ||||
|             quad_index_pass.Assemble(index_format, num_indices, base_vertex, buffer, offset, | ||||
|                                      topology == PrimitiveTopology::QuadStrip); | ||||
|     } else if (vk_index_type == VK_INDEX_TYPE_UINT8_EXT && !device.IsExtIndexTypeUint8Supported()) { | ||||
|         vk_index_type = VK_INDEX_TYPE_UINT16; | ||||
|         std::tie(vk_buffer, vk_offset) = uint8_pass.Assemble(num_indices, buffer, offset); | ||||
| @@ -263,7 +441,7 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | ||||
| void BufferCacheRuntime::BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count) { | ||||
|     if (count == 0) { | ||||
|         ReserveNullBuffer(); | ||||
|         scheduler.Record([this](vk::CommandBuffer cmdbuf) { | ||||
| @@ -271,16 +449,14 @@ void BufferCacheRuntime::BindQuadArrayIndexBuffer(u32 first, u32 count) { | ||||
|         }); | ||||
|         return; | ||||
|     } | ||||
|     ReserveQuadArrayLUT(first + count, true); | ||||
|  | ||||
|     // The LUT has the indices 0, 1, 2, and 3 copied as an array | ||||
|     // To apply these 'first' offsets we can apply an offset based on the modulus. | ||||
|     const VkIndexType index_type = quad_array_lut_index_type; | ||||
|     const size_t sub_first_offset = static_cast<size_t>(first % 4) * (current_num_indices / 4); | ||||
|     const size_t offset = (sub_first_offset + first / 4) * 6ULL * BytesPerIndex(index_type); | ||||
|     scheduler.Record([buffer = *quad_array_lut, index_type, offset](vk::CommandBuffer cmdbuf) { | ||||
|         cmdbuf.BindIndexBuffer(buffer, offset, index_type); | ||||
|     }); | ||||
|     if (topology == PrimitiveTopology::Quads) { | ||||
|         quad_array_index_buffer->UpdateBuffer(first + count); | ||||
|         quad_array_index_buffer->BindBuffer(first); | ||||
|     } else if (topology == PrimitiveTopology::QuadStrip) { | ||||
|         quad_strip_index_buffer->UpdateBuffer(first + count); | ||||
|         quad_strip_index_buffer->BindBuffer(first); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void BufferCacheRuntime::BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, | ||||
| @@ -320,83 +496,6 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, VkBuffer buffer, | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void BufferCacheRuntime::ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle) { | ||||
|     if (num_indices <= current_num_indices) { | ||||
|         return; | ||||
|     } | ||||
|     if (wait_for_idle) { | ||||
|         scheduler.Finish(); | ||||
|     } | ||||
|     current_num_indices = num_indices; | ||||
|     quad_array_lut_index_type = IndexTypeFromNumElements(device, num_indices); | ||||
|  | ||||
|     const u32 num_quads = num_indices / 4; | ||||
|     const u32 num_triangle_indices = num_quads * 6; | ||||
|     const u32 num_first_offset_copies = 4; | ||||
|     const size_t bytes_per_index = BytesPerIndex(quad_array_lut_index_type); | ||||
|     const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies; | ||||
|     quad_array_lut = device.GetLogical().CreateBuffer(VkBufferCreateInfo{ | ||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, | ||||
|         .pNext = nullptr, | ||||
|         .flags = 0, | ||||
|         .size = size_bytes, | ||||
|         .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, | ||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, | ||||
|         .queueFamilyIndexCount = 0, | ||||
|         .pQueueFamilyIndices = nullptr, | ||||
|     }); | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         quad_array_lut.SetObjectNameEXT("Quad LUT"); | ||||
|     } | ||||
|     quad_array_lut_commit = memory_allocator.Commit(quad_array_lut, MemoryUsage::DeviceLocal); | ||||
|  | ||||
|     const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload); | ||||
|     u8* staging_data = staging.mapped_span.data(); | ||||
|     const size_t quad_size = bytes_per_index * 6; | ||||
|     for (u32 first = 0; first < num_first_offset_copies; ++first) { | ||||
|         for (u32 quad = 0; quad < num_quads; ++quad) { | ||||
|             switch (quad_array_lut_index_type) { | ||||
|             case VK_INDEX_TYPE_UINT8_EXT: | ||||
|                 std::memcpy(staging_data, MakeQuadIndices<u8>(quad, first).data(), quad_size); | ||||
|                 break; | ||||
|             case VK_INDEX_TYPE_UINT16: | ||||
|                 std::memcpy(staging_data, MakeQuadIndices<u16>(quad, first).data(), quad_size); | ||||
|                 break; | ||||
|             case VK_INDEX_TYPE_UINT32: | ||||
|                 std::memcpy(staging_data, MakeQuadIndices<u32>(quad, first).data(), quad_size); | ||||
|                 break; | ||||
|             default: | ||||
|                 ASSERT(false); | ||||
|                 break; | ||||
|             } | ||||
|             staging_data += quad_size; | ||||
|         } | ||||
|     } | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, | ||||
|                       dst_buffer = *quad_array_lut, size_bytes](vk::CommandBuffer cmdbuf) { | ||||
|         const VkBufferCopy copy{ | ||||
|             .srcOffset = src_offset, | ||||
|             .dstOffset = 0, | ||||
|             .size = size_bytes, | ||||
|         }; | ||||
|         const VkBufferMemoryBarrier write_barrier{ | ||||
|             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, | ||||
|             .pNext = nullptr, | ||||
|             .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||||
|             .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||
|             .buffer = dst_buffer, | ||||
|             .offset = 0, | ||||
|             .size = size_bytes, | ||||
|         }; | ||||
|         cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, | ||||
|                                0, write_barrier); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void BufferCacheRuntime::ReserveNullBuffer() { | ||||
|     if (null_buffer) { | ||||
|         return; | ||||
|   | ||||
| @@ -50,6 +50,9 @@ private: | ||||
|     std::vector<BufferView> views; | ||||
| }; | ||||
|  | ||||
| class QuadArrayIndexBuffer; | ||||
| class QuadStripIndexBuffer; | ||||
|  | ||||
| class BufferCacheRuntime { | ||||
|     friend Buffer; | ||||
|  | ||||
| @@ -86,7 +89,7 @@ public: | ||||
|     void BindIndexBuffer(PrimitiveTopology topology, IndexFormat index_format, u32 num_indices, | ||||
|                          u32 base_vertex, VkBuffer buffer, u32 offset, u32 size); | ||||
|  | ||||
|     void BindQuadArrayIndexBuffer(u32 first, u32 count); | ||||
|     void BindQuadIndexBuffer(PrimitiveTopology topology, u32 first, u32 count); | ||||
|  | ||||
|     void BindVertexBuffer(u32 index, VkBuffer buffer, u32 offset, u32 size, u32 stride); | ||||
|  | ||||
| @@ -118,8 +121,6 @@ private: | ||||
|         update_descriptor_queue.AddBuffer(buffer, offset, size); | ||||
|     } | ||||
|  | ||||
|     void ReserveQuadArrayLUT(u32 num_indices, bool wait_for_idle); | ||||
|  | ||||
|     void ReserveNullBuffer(); | ||||
|  | ||||
|     const Device& device; | ||||
| @@ -128,10 +129,8 @@ private: | ||||
|     StagingBufferPool& staging_pool; | ||||
|     UpdateDescriptorQueue& update_descriptor_queue; | ||||
|  | ||||
|     vk::Buffer quad_array_lut; | ||||
|     MemoryCommit quad_array_lut_commit; | ||||
|     VkIndexType quad_array_lut_index_type{}; | ||||
|     u32 current_num_indices = 0; | ||||
|     std::shared_ptr<QuadArrayIndexBuffer> quad_array_index_buffer; | ||||
|     std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer; | ||||
|  | ||||
|     vk::Buffer null_buffer; | ||||
|     MemoryCommit null_buffer_commit; | ||||
|   | ||||
| @@ -245,7 +245,7 @@ QuadIndexedPass::QuadIndexedPass(const Device& device_, Scheduler& scheduler_, | ||||
|                                  UpdateDescriptorQueue& update_descriptor_queue_) | ||||
|     : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, | ||||
|                   INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, | ||||
|                   COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 2>, VULKAN_QUAD_INDEXED_COMP_SPV), | ||||
|                   COMPUTE_PUSH_CONSTANT_RANGE<sizeof(u32) * 3>, VULKAN_QUAD_INDEXED_COMP_SPV), | ||||
|       scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_}, | ||||
|       update_descriptor_queue{update_descriptor_queue_} {} | ||||
|  | ||||
| @@ -253,7 +253,7 @@ QuadIndexedPass::~QuadIndexedPass() = default; | ||||
|  | ||||
| std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | ||||
|     Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | ||||
|     VkBuffer src_buffer, u32 src_offset) { | ||||
|     VkBuffer src_buffer, u32 src_offset, bool is_strip) { | ||||
|     const u32 index_shift = [index_format] { | ||||
|         switch (index_format) { | ||||
|         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | ||||
| @@ -267,7 +267,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | ||||
|         return 2; | ||||
|     }(); | ||||
|     const u32 input_size = num_vertices << index_shift; | ||||
|     const u32 num_tri_vertices = (num_vertices / 4) * 6; | ||||
|     const u32 num_tri_vertices = (is_strip ? (num_vertices - 2) / 2 : num_vertices / 4) * 6; | ||||
|  | ||||
|     const std::size_t staging_size = num_tri_vertices * sizeof(u32); | ||||
|     const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal); | ||||
| @@ -278,8 +278,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | ||||
|     const void* const descriptor_data{update_descriptor_queue.UpdateData()}; | ||||
|  | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, | ||||
|                       index_shift](vk::CommandBuffer cmdbuf) { | ||||
|     scheduler.Record([this, descriptor_data, num_tri_vertices, base_vertex, index_shift, | ||||
|                       is_strip](vk::CommandBuffer cmdbuf) { | ||||
|         static constexpr u32 DISPATCH_SIZE = 1024; | ||||
|         static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||
|             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||
| @@ -287,7 +287,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( | ||||
|             .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, | ||||
|             .dstAccessMask = VK_ACCESS_INDEX_READ_BIT, | ||||
|         }; | ||||
|         const std::array<u32, 2> push_constants{base_vertex, index_shift}; | ||||
|         const std::array<u32, 3> push_constants{base_vertex, index_shift, is_strip ? 1u : 0u}; | ||||
|         const VkDescriptorSet set = descriptor_allocator.Commit(); | ||||
|         device.GetLogical().UpdateDescriptorSet(set, *descriptor_template, descriptor_data); | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); | ||||
|   | ||||
| @@ -74,7 +74,7 @@ public: | ||||
|  | ||||
|     std::pair<VkBuffer, VkDeviceSize> Assemble( | ||||
|         Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, | ||||
|         u32 base_vertex, VkBuffer src_buffer, u32 src_offset); | ||||
|         u32 base_vertex, VkBuffer src_buffer, u32 src_offset, bool is_strip); | ||||
|  | ||||
| private: | ||||
|     Scheduler& scheduler; | ||||
|   | ||||
| @@ -138,12 +138,16 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, | ||||
|         .first_index = is_indexed ? draw_state.index_buffer.first : 0, | ||||
|         .is_indexed = is_indexed, | ||||
|     }; | ||||
|     // 6 triangle vertices per quad, base vertex is part of the index | ||||
|     // See BindQuadIndexBuffer for more details | ||||
|     if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) { | ||||
|         // 6 triangle vertices per quad, base vertex is part of the index | ||||
|         // See BindQuadArrayIndexBuffer for more details | ||||
|         params.num_vertices = (params.num_vertices / 4) * 6; | ||||
|         params.base_vertex = 0; | ||||
|         params.is_indexed = true; | ||||
|     } else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) { | ||||
|         params.num_vertices = (params.num_vertices - 2) / 2 * 6; | ||||
|         params.base_vertex = 0; | ||||
|         params.is_indexed = true; | ||||
|     } | ||||
|     return params; | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user