renderer_vulkan: Optimize stream buffer usage
* Use vma for allocating the memory. In addition place upload/download buffers on the cpu, they don't need to be device local
This commit is contained in:
@ -145,7 +145,6 @@ RasterizerVulkan::~RasterizerVulkan() {
|
|||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
const vk::Device device = instance.GetDevice();
|
const vk::Device device = instance.GetDevice();
|
||||||
|
|
||||||
device.destroySampler(default_sampler);
|
|
||||||
device.destroyBufferView(texture_lf_view);
|
device.destroyBufferView(texture_lf_view);
|
||||||
device.destroyBufferView(texture_rg_view);
|
device.destroyBufferView(texture_rg_view);
|
||||||
device.destroyBufferView(texture_rgba_view);
|
device.destroyBufferView(texture_rgba_view);
|
||||||
@ -363,8 +362,8 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vertex data analysis and setup might involve rasterizer cache memory flushes
|
// Vertex data setup might involve scheduler flushes so perform it
|
||||||
// so perform it early to avoid invalidating our state in the middle of the draw
|
// early to avoid invalidating our state in the middle of the draw.
|
||||||
vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment());
|
vertex_info = AnalyzeVertexArray(is_indexed, instance.GetMinVertexStrideAlignment());
|
||||||
SetupVertexArray();
|
SetupVertexArray();
|
||||||
|
|
||||||
@ -413,14 +412,15 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
|
|||||||
void RasterizerVulkan::SetupIndexArray() {
|
void RasterizerVulkan::SetupIndexArray() {
|
||||||
const bool index_u8 = regs.pipeline.index_array.format == 0;
|
const bool index_u8 = regs.pipeline.index_array.format == 0;
|
||||||
const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported();
|
const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported();
|
||||||
const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16;
|
|
||||||
const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2);
|
const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2);
|
||||||
|
const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16;
|
||||||
|
|
||||||
const u8* index_data =
|
const u8* index_data =
|
||||||
memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
|
memory.GetPhysicalPointer(regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
|
||||||
regs.pipeline.index_array.offset);
|
regs.pipeline.index_array.offset);
|
||||||
|
|
||||||
auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
|
auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
|
||||||
|
|
||||||
if (index_u8 && !native_u8) {
|
if (index_u8 && !native_u8) {
|
||||||
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
|
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
|
||||||
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
|
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
|
||||||
|
@ -140,7 +140,6 @@ private:
|
|||||||
std::array<u32, 16> binding_offsets{};
|
std::array<u32, 16> binding_offsets{};
|
||||||
std::array<bool, 16> enable_attributes{};
|
std::array<bool, 16> enable_attributes{};
|
||||||
std::array<vk::Buffer, 16> vertex_buffers;
|
std::array<vk::Buffer, 16> vertex_buffers;
|
||||||
vk::Sampler default_sampler;
|
|
||||||
Surface null_surface;
|
Surface null_surface;
|
||||||
Surface null_storage_surface;
|
Surface null_storage_surface;
|
||||||
PipelineInfo pipeline_info;
|
PipelineInfo pipeline_info;
|
||||||
|
@ -10,60 +10,40 @@
|
|||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
|
||||||
|
#include <vk_mem_alloc.h>
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
VmaAllocationCreateFlags MakeVMAFlags(BufferType type) {
|
||||||
|
switch (type) {
|
||||||
|
case BufferType::Upload:
|
||||||
|
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||||
|
case BufferType::Download:
|
||||||
|
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
|
||||||
|
case BufferType::Stream:
|
||||||
|
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
|
||||||
|
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||||
|
|
||||||
/// Find a memory type with the passed requirements
|
|
||||||
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
|
|
||||||
vk::MemoryPropertyFlags wanted,
|
|
||||||
u32 filter = std::numeric_limits<u32>::max()) {
|
|
||||||
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
|
||||||
const auto flags = properties.memoryTypes[i].propertyFlags;
|
|
||||||
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return std::nullopt;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the preferred host visible memory type.
|
|
||||||
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, bool readback,
|
|
||||||
u32 filter = std::numeric_limits<u32>::max()) {
|
|
||||||
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
|
|
||||||
// Otherwise search for a host visible allocation.
|
|
||||||
const vk::MemoryPropertyFlags HOST_MEMORY =
|
|
||||||
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
|
|
||||||
const vk::MemoryPropertyFlags DYNAMIC_MEMORY =
|
|
||||||
HOST_MEMORY | (readback ? vk::MemoryPropertyFlagBits::eHostCached
|
|
||||||
: vk::MemoryPropertyFlagBits::eDeviceLocal);
|
|
||||||
|
|
||||||
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
|
|
||||||
if (!preferred_type) {
|
|
||||||
preferred_type = FindMemoryType(properties, HOST_MEMORY);
|
|
||||||
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
|
|
||||||
}
|
|
||||||
return preferred_type.value_or(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
|
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
vk::BufferUsageFlags usage_, u64 size, bool readback_)
|
vk::BufferUsageFlags usage_, u64 size, BufferType type_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, usage{usage_}, readback{readback_} {
|
: instance{instance_}, scheduler{scheduler_},
|
||||||
|
stream_buffer_size{size}, usage{usage_}, type{type_} {
|
||||||
CreateBuffers(size);
|
CreateBuffers(size);
|
||||||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::~StreamBuffer() {
|
StreamBuffer::~StreamBuffer() {
|
||||||
const vk::Device device = instance.GetDevice();
|
vmaDestroyBuffer(instance.GetAllocator(), buffer, allocation);
|
||||||
device.unmapMemory(memory);
|
|
||||||
device.destroyBuffer(buffer);
|
|
||||||
device.freeMemory(memory);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
|
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||||
@ -109,31 +89,35 @@ void StreamBuffer::Commit(u64 size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::CreateBuffers(u64 prefered_size) {
|
void StreamBuffer::CreateBuffers(u64 prefered_size) {
|
||||||
const vk::Device device = instance.GetDevice();
|
const vk::BufferCreateInfo buffer_info = {
|
||||||
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
|
.size = prefered_size,
|
||||||
const u32 preferred_type = GetMemoryType(memory_properties, readback);
|
|
||||||
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
|
|
||||||
|
|
||||||
// Substract from the preferred heap size some bytes to avoid getting out of memory.
|
|
||||||
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
|
|
||||||
// As per DXVK's example, using `heap_size / 2`
|
|
||||||
const VkDeviceSize allocable_size = heap_size / 2;
|
|
||||||
buffer = device.createBuffer({
|
|
||||||
.size = std::min(prefered_size, allocable_size),
|
|
||||||
.usage = usage,
|
.usage = usage,
|
||||||
});
|
};
|
||||||
|
|
||||||
const auto requirements = device.getBufferMemoryRequirements(buffer);
|
const VmaAllocationCreateInfo alloc_create_info = {
|
||||||
const u32 required_flags = requirements.memoryTypeBits;
|
.flags = MakeVMAFlags(type) | VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||||
stream_buffer_size = static_cast<u64>(requirements.size);
|
.usage = VMA_MEMORY_USAGE_AUTO,
|
||||||
|
};
|
||||||
|
|
||||||
memory = device.allocateMemory({
|
VkBuffer unsafe_buffer{};
|
||||||
.allocationSize = requirements.size,
|
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||||
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
|
VmaAllocationInfo alloc_info{};
|
||||||
});
|
vmaCreateBuffer(instance.GetAllocator(), &unsafe_buffer_info, &alloc_create_info,
|
||||||
|
&unsafe_buffer, &allocation, &alloc_info);
|
||||||
|
buffer = vk::Buffer{unsafe_buffer};
|
||||||
|
|
||||||
device.bindBufferMemory(buffer, memory, 0);
|
VkMemoryPropertyFlags memory_flags{};
|
||||||
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
|
vmaGetAllocationMemoryProperties(instance.GetAllocator(), allocation, &memory_flags);
|
||||||
|
if (type == BufferType::Stream) {
|
||||||
|
ASSERT_MSG(memory_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
|
||||||
|
"Stream buffer must be host visible!");
|
||||||
|
if (!(memory_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
|
||||||
|
LOG_WARNING(Render_Vulkan,
|
||||||
|
"Unable to use device local memory for stream buffer. It will be slower!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mapped = reinterpret_cast<u8*>(alloc_info.pMappedData);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||||
|
@ -10,8 +10,16 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
|
VK_DEFINE_HANDLE(VmaAllocation)
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
|
enum class BufferType : u32 {
|
||||||
|
Upload = 0,
|
||||||
|
Download = 1,
|
||||||
|
Stream = 2,
|
||||||
|
};
|
||||||
|
|
||||||
class Instance;
|
class Instance;
|
||||||
class Scheduler;
|
class Scheduler;
|
||||||
|
|
||||||
@ -20,7 +28,8 @@ class StreamBuffer final {
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
|
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
|
||||||
vk::BufferUsageFlags usage, u64 size, bool readback = false);
|
vk::BufferUsageFlags usage, u64 size,
|
||||||
|
BufferType type = BufferType::Stream);
|
||||||
~StreamBuffer();
|
~StreamBuffer();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -60,11 +69,11 @@ private:
|
|||||||
Scheduler& scheduler; ///< Command scheduler.
|
Scheduler& scheduler; ///< Command scheduler.
|
||||||
|
|
||||||
vk::Buffer buffer; ///< Mapped buffer.
|
vk::Buffer buffer; ///< Mapped buffer.
|
||||||
vk::DeviceMemory memory; ///< Memory allocation.
|
VmaAllocation allocation; ///< VMA allocation
|
||||||
u8* mapped{}; ///< Pointer to the mapped memory
|
u8* mapped{}; ///< Pointer to the mapped memory
|
||||||
u64 stream_buffer_size{}; ///< Stream buffer size.
|
u64 stream_buffer_size{}; ///< Stream buffer size.
|
||||||
vk::BufferUsageFlags usage{};
|
vk::BufferUsageFlags usage{};
|
||||||
bool readback{}; ///< Flag indicating if the buffer should use cached memory
|
BufferType type;
|
||||||
|
|
||||||
u64 offset{}; ///< Buffer iterator.
|
u64 offset{}; ///< Buffer iterator.
|
||||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||||
|
@ -106,7 +106,7 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
|
|||||||
return depth_offset;
|
return depth_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u64 UPLOAD_BUFFER_SIZE = 128 * 1024 * 1024;
|
constexpr u64 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||||
constexpr u64 DOWNLOAD_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u64 DOWNLOAD_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
@ -115,9 +115,10 @@ TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
|||||||
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
||||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
||||||
blit_helper{instance, scheduler, desc_manager, renderpass_cache},
|
blit_helper{instance, scheduler, desc_manager, renderpass_cache},
|
||||||
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
|
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE,
|
||||||
|
BufferType::Upload},
|
||||||
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
|
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
|
||||||
DOWNLOAD_BUFFER_SIZE, true} {
|
DOWNLOAD_BUFFER_SIZE, BufferType::Download} {
|
||||||
|
|
||||||
auto Register = [this](VideoCore::PixelFormat dest,
|
auto Register = [this](VideoCore::PixelFormat dest,
|
||||||
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||||
|
Reference in New Issue
Block a user