Revert "vulkan: automatically use larger staging buffer sizes when possible"

This commit is contained in:
liamwhite 2022-10-07 04:49:08 -04:00 committed by GitHub
parent 61883d8820
commit 20cf09471a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 27 additions and 60 deletions

View File

@ -26,39 +26,20 @@ using namespace Common::Literals;
constexpr VkDeviceSize MAX_ALIGNMENT = 256; constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Maximum size to put elements in the stream buffer // Maximum size to put elements in the stream buffer
constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
// Stream buffer size in bytes
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS = constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS; constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
static bool IsStreamHeap(VkMemoryHeap heap, size_t staging_buffer_size) noexcept { bool IsStreamHeap(VkMemoryHeap heap) noexcept {
return staging_buffer_size < (heap.size * 2) / 3; return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
}
static bool HasLargeDeviceLocalHostVisibleMemory(const VkPhysicalDeviceMemoryProperties& props) {
const auto flags{VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT};
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
const auto& memory_type{props.memoryTypes[type_index]};
if ((memory_type.propertyFlags & flags) != flags) {
// Memory must be device local and host visible
continue;
}
const auto& heap{props.memoryHeaps[memory_type.heapIndex]};
if (heap.size >= 7168_MiB) {
// This is the right type of memory
return true;
}
}
return false;
} }
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
VkMemoryPropertyFlags flags, VkMemoryPropertyFlags flags) noexcept {
size_t staging_buffer_size) noexcept {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) { for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
if (((type_mask >> type_index) & 1) == 0) { if (((type_mask >> type_index) & 1) == 0) {
// Memory type is incompatible // Memory type is incompatible
@ -69,7 +50,7 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
// Memory type doesn't have the flags we want // Memory type doesn't have the flags we want
continue; continue;
} }
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex], staging_buffer_size)) { if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
// Memory heap is not suitable for streaming // Memory heap is not suitable for streaming
continue; continue;
} }
@ -80,17 +61,17 @@ std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& p
} }
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask, u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
bool try_device_local, size_t staging_buffer_size) { bool try_device_local) {
std::optional<u32> type; std::optional<u32> type;
if (try_device_local) { if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this // Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS, staging_buffer_size); type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) { if (type) {
return *type; return *type;
} }
} }
// Otherwise try without the DEVICE_LOCAL_BIT // Otherwise try without the DEVICE_LOCAL_BIT
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS, staging_buffer_size); type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
if (type) { if (type) {
return *type; return *type;
} }
@ -98,32 +79,20 @@ u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
} }
size_t Region(size_t iterator, size_t region_size) noexcept { size_t Region(size_t iterator) noexcept {
return iterator / region_size; return iterator / REGION_SIZE;
} }
} // Anonymous namespace } // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_) Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
const auto memory_properties{device.GetPhysical().GetMemoryProperties().memoryProperties};
if (HasLargeDeviceLocalHostVisibleMemory(memory_properties)) {
// Possible on many integrated and newer discrete cards
staging_buffer_size = 1_GiB;
} else {
// Well-supported default size used by most Vulkan PC games
staging_buffer_size = 256_MiB;
}
region_size = staging_buffer_size / StagingBufferPool::NUM_SYNCS;
const vk::Device& dev = device.GetLogical(); const vk::Device& dev = device.GetLogical();
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{ stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,
.flags = 0, .flags = 0,
.size = staging_buffer_size, .size = STREAM_BUFFER_SIZE,
.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE, .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
@ -148,18 +117,19 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.image = nullptr, .image = nullptr,
.buffer = *stream_buffer, .buffer = *stream_buffer,
}; };
const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
VkMemoryAllocateInfo stream_memory_info{ VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr, .pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size, .allocationSize = requirements.size,
.memoryTypeIndex = FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true, .memoryTypeIndex =
staging_buffer_size), FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
}; };
stream_memory = dev.TryAllocateMemory(stream_memory_info); stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) { if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory"); LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
stream_memory_info.memoryTypeIndex = FindMemoryTypeIndex( stream_memory_info.memoryTypeIndex =
memory_properties, requirements.memoryTypeBits, false, staging_buffer_size); FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
stream_memory = dev.AllocateMemory(stream_memory_info); stream_memory = dev.AllocateMemory(stream_memory_info);
} }
@ -167,7 +137,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
stream_memory.SetObjectNameEXT("Stream Buffer Memory"); stream_memory.SetObjectNameEXT("Stream Buffer Memory");
} }
stream_buffer.BindMemory(*stream_memory, 0); stream_buffer.BindMemory(*stream_memory, 0);
stream_pointer = stream_memory.Map(0, staging_buffer_size); stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
} }
StagingBufferPool::~StagingBufferPool() = default; StagingBufferPool::~StagingBufferPool() = default;
@ -188,25 +158,25 @@ void StagingBufferPool::TickFrame() {
} }
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
if (AreRegionsActive(Region(free_iterator, region_size) + 1, if (AreRegionsActive(Region(free_iterator) + 1,
std::min(Region(iterator + size, region_size) + 1, NUM_SYNCS))) { std::min(Region(iterator + size) + 1, NUM_SYNCS))) {
// Avoid waiting for the previous usages to be free // Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload); return GetStagingBuffer(size, MemoryUsage::Upload);
} }
const u64 current_tick = scheduler.CurrentTick(); const u64 current_tick = scheduler.CurrentTick();
std::fill(sync_ticks.begin() + Region(used_iterator, region_size), std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
sync_ticks.begin() + Region(iterator, region_size), current_tick); current_tick);
used_iterator = iterator; used_iterator = iterator;
free_iterator = std::max(free_iterator, iterator + size); free_iterator = std::max(free_iterator, iterator + size);
if (iterator + size >= staging_buffer_size) { if (iterator + size >= STREAM_BUFFER_SIZE) {
std::fill(sync_ticks.begin() + Region(used_iterator, region_size), std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
sync_ticks.begin() + NUM_SYNCS, current_tick); current_tick);
used_iterator = 0; used_iterator = 0;
iterator = 0; iterator = 0;
free_iterator = size; free_iterator = size;
if (AreRegionsActive(0, Region(size, region_size) + 1)) { if (AreRegionsActive(0, Region(size) + 1)) {
// Avoid waiting for the previous usages to be free // Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload); return GetStagingBuffer(size, MemoryUsage::Upload);
} }

View File

@ -93,9 +93,6 @@ private:
size_t free_iterator = 0; size_t free_iterator = 0;
std::array<u64, NUM_SYNCS> sync_ticks{}; std::array<u64, NUM_SYNCS> sync_ticks{};
size_t staging_buffer_size = 0;
size_t region_size = 0;
StagingBuffersCache device_local_cache; StagingBuffersCache device_local_cache;
StagingBuffersCache upload_cache; StagingBuffersCache upload_cache;
StagingBuffersCache download_cache; StagingBuffersCache download_cache;