renderer_vulkan: Revert some stream buffer changes
* The previous design was much less prone to errors so switch back to that. Also make 16 byte alignment standard
This commit is contained in:
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
|
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
@ -177,7 +177,7 @@ void RasterizerVulkan::SyncFixedState() {
|
|||||||
|
|
||||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||||
u32 vs_input_index_max) {
|
u32 vs_input_index_max) {
|
||||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||||
@ -402,7 +402,7 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
|
|||||||
regs.pipeline.index_array.offset);
|
regs.pipeline.index_array.offset);
|
||||||
|
|
||||||
// Upload index buffer data to the GPU
|
// Upload index buffer data to the GPU
|
||||||
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size, 4);
|
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size);
|
||||||
std::memcpy(index_ptr, index_data, index_buffer_size);
|
std::memcpy(index_ptr, index_data, index_buffer_size);
|
||||||
index_buffer.Commit(index_buffer_size);
|
index_buffer.Commit(index_buffer_size);
|
||||||
|
|
||||||
@ -744,7 +744,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
|||||||
const u32 vertex_size = vertices * sizeof(HardwareVertex);
|
const u32 vertex_size = vertices * sizeof(HardwareVertex);
|
||||||
|
|
||||||
// Copy vertex data
|
// Copy vertex data
|
||||||
auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size, sizeof(HardwareVertex));
|
auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size);
|
||||||
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
|
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
|
||||||
vertex_buffer.Commit(vertex_size);
|
vertex_buffer.Commit(vertex_size);
|
||||||
|
|
||||||
@ -1266,7 +1266,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::size_t bytes_used = 0;
|
std::size_t bytes_used = 0;
|
||||||
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
|
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size);
|
||||||
|
|
||||||
// Sync the lighting luts
|
// Sync the lighting luts
|
||||||
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
||||||
@ -1332,7 +1332,7 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::size_t bytes_used = 0;
|
std::size_t bytes_used = 0;
|
||||||
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
|
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size);
|
||||||
|
|
||||||
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
||||||
auto SyncProcTexValueLUT =
|
auto SyncProcTexValueLUT =
|
||||||
@ -1434,8 +1434,7 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
|
|||||||
|
|
||||||
u32 used_bytes = 0;
|
u32 used_bytes = 0;
|
||||||
const u32 uniform_size = static_cast<u32>(uniform_size_aligned_vs + uniform_size_aligned_fs);
|
const u32 uniform_size = static_cast<u32>(uniform_size_aligned_vs + uniform_size_aligned_fs);
|
||||||
auto [uniforms, offset, invalidate] =
|
auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size);
|
||||||
uniform_buffer.Map(uniform_size, static_cast<u32>(uniform_buffer_alignment));
|
|
||||||
|
|
||||||
if (sync_vs) {
|
if (sync_vs) {
|
||||||
Pica::Shader::VSUniformData vs_uniforms;
|
Pica::Shader::VSUniformData vs_uniforms;
|
||||||
|
@ -80,15 +80,16 @@ StagingBuffer::~StagingBuffer() {
|
|||||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback)
|
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
||||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, total_size{size},
|
bool readback)
|
||||||
bucket_size{size / BUCKET_COUNT}, readback{readback} {}
|
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback},
|
||||||
|
total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {}
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
||||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
|
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
|
||||||
bool readback)
|
bool readback)
|
||||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, usage{usage},
|
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback},
|
||||||
total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {
|
usage{usage}, total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {
|
||||||
const vk::BufferCreateInfo buffer_info = {
|
const vk::BufferCreateInfo buffer_info = {
|
||||||
.size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
|
.size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
|
||||||
|
|
||||||
@ -128,57 +129,51 @@ StreamBuffer::~StreamBuffer() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size) {
|
||||||
ASSERT(size <= total_size && alignment <= total_size);
|
ASSERT(size <= total_size);
|
||||||
|
size = Common::AlignUp(size, 16);
|
||||||
|
|
||||||
if (alignment > 0) {
|
Bucket& bucket = buckets[bucket_index];
|
||||||
buffer_offset = Common::AlignUp(buffer_offset, alignment);
|
|
||||||
}
|
// If we reach bucket boundaries move over to the next one
|
||||||
|
if (bucket.cursor + size > bucket_size) {
|
||||||
bool invalidate = false;
|
bucket.gpu_tick = scheduler.CurrentTick();
|
||||||
const u32 new_offset = buffer_offset + size;
|
MoveNextBucket();
|
||||||
if (u32 new_index = new_offset / bucket_size; new_index != bucket_index) {
|
return Map(size);
|
||||||
if (new_index >= BUCKET_COUNT) {
|
|
||||||
if (readback) {
|
|
||||||
Invalidate();
|
|
||||||
} else {
|
|
||||||
Flush();
|
|
||||||
}
|
|
||||||
buffer_offset = 0;
|
|
||||||
flush_offset = 0;
|
|
||||||
new_index = 0;
|
|
||||||
invalidate = true;
|
|
||||||
}
|
|
||||||
ticks[bucket_index] = scheduler.CurrentTick();
|
|
||||||
scheduler.Wait(ticks[new_index]);
|
|
||||||
bucket_index = new_index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const bool invalidate = std::exchange(bucket.invalid, false);
|
||||||
|
const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
|
||||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
||||||
|
|
||||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
return std::make_tuple(mapped, buffer_offset, invalidate);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::Commit(u32 size) {
|
void StreamBuffer::Commit(u32 size) {
|
||||||
buffer_offset += size;
|
size = Common::AlignUp(size, 16);
|
||||||
|
buckets[bucket_index].cursor += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::Flush() {
|
void StreamBuffer::Flush() {
|
||||||
if (readback) {
|
if (readback) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 flush_size = buffer_offset - flush_offset;
|
Bucket& bucket = buckets[bucket_index];
|
||||||
ASSERT(flush_size <= total_size);
|
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
||||||
ASSERT(flush_offset + flush_size <= total_size);
|
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
||||||
|
ASSERT(flush_size <= bucket_size);
|
||||||
|
ASSERT(flush_start + flush_size <= total_size);
|
||||||
|
|
||||||
if (flush_size > 0) [[likely]] {
|
if (flush_size > 0) [[likely]] {
|
||||||
|
// Ensure all staging writes are visible to the host memory domain
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
VmaAllocator allocator = instance.GetAllocator();
|
||||||
vmaFlushAllocation(allocator, staging.allocation, flush_offset, flush_size);
|
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||||
if (gpu_buffer) {
|
if (gpu_buffer) {
|
||||||
scheduler.Record([this, flush_offset = flush_offset,
|
scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
||||||
flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
|
|
||||||
const vk::BufferCopy copy_region = {
|
const vk::BufferCopy copy_region = {
|
||||||
.srcOffset = flush_offset, .dstOffset = flush_offset, .size = flush_size};
|
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
|
||||||
|
|
||||||
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
|
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
|
||||||
|
|
||||||
@ -188,15 +183,15 @@ void StreamBuffer::Flush() {
|
|||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.buffer = gpu_buffer,
|
.buffer = gpu_buffer,
|
||||||
.offset = flush_offset,
|
.offset = flush_start,
|
||||||
.size = flush_size};
|
.size = flush_size};
|
||||||
|
|
||||||
upload_cmdbuf.pipelineBarrier(
|
upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, MakePipelineStage(usage),
|
||||||
vk::PipelineStageFlagBits::eTransfer, MakePipelineStage(usage),
|
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
|
||||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
{});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
flush_offset = buffer_offset;
|
bucket.flush_cursor += flush_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -205,15 +200,33 @@ void StreamBuffer::Invalidate() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 flush_size = buffer_offset - flush_offset;
|
Bucket& bucket = buckets[bucket_index];
|
||||||
ASSERT(flush_size <= total_size);
|
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
||||||
ASSERT(flush_offset + flush_size <= total_size);
|
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
||||||
|
ASSERT(flush_size <= bucket_size);
|
||||||
|
|
||||||
if (flush_size > 0) [[likely]] {
|
if (flush_size > 0) [[likely]] {
|
||||||
|
// Ensure the staging memory can be read by the host
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
VmaAllocator allocator = instance.GetAllocator();
|
||||||
vmaInvalidateAllocation(allocator, staging.allocation, flush_offset, flush_size);
|
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||||
flush_offset = buffer_offset;
|
bucket.flush_cursor += flush_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void StreamBuffer::MoveNextBucket() {
|
||||||
|
// Flush and Invalidate are bucket local operations for simplicity so perform them here
|
||||||
|
if (readback) {
|
||||||
|
Invalidate();
|
||||||
|
} else {
|
||||||
|
Flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
bucket_index = (bucket_index + 1) % BUCKET_COUNT;
|
||||||
|
Bucket& next_bucket = buckets[bucket_index];
|
||||||
|
scheduler.Wait(next_bucket.gpu_tick);
|
||||||
|
next_bucket.cursor = 0;
|
||||||
|
next_bucket.flush_cursor = 0;
|
||||||
|
next_bucket.invalid = true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -29,11 +29,11 @@ struct StagingBuffer {
|
|||||||
|
|
||||||
class StreamBuffer {
|
class StreamBuffer {
|
||||||
static constexpr u32 MAX_BUFFER_VIEWS = 3;
|
static constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||||
static constexpr u32 BUCKET_COUNT = 8;
|
static constexpr u32 BUCKET_COUNT = 4;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Staging only constructor
|
/// Staging only constructor
|
||||||
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback = false);
|
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
||||||
|
bool readback = false);
|
||||||
/// Staging + GPU streaming constructor
|
/// Staging + GPU streaming constructor
|
||||||
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
||||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
|
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
|
||||||
@ -44,7 +44,7 @@ public:
|
|||||||
StreamBuffer& operator=(const StreamBuffer&) = delete;
|
StreamBuffer& operator=(const StreamBuffer&) = delete;
|
||||||
|
|
||||||
/// Maps aligned staging memory of size bytes
|
/// Maps aligned staging memory of size bytes
|
||||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
std::tuple<u8*, u32, bool> Map(u32 size);
|
||||||
|
|
||||||
/// Commits size bytes from the currently mapped staging memory
|
/// Commits size bytes from the currently mapped staging memory
|
||||||
void Commit(u32 size = 0);
|
void Commit(u32 size = 0);
|
||||||
@ -71,6 +71,17 @@ public:
|
|||||||
return views[index];
|
return views[index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Moves to the next bucket
|
||||||
|
void MoveNextBucket();
|
||||||
|
|
||||||
|
struct Bucket {
|
||||||
|
bool invalid = false;
|
||||||
|
u32 gpu_tick = 0;
|
||||||
|
u32 cursor = 0;
|
||||||
|
u32 flush_cursor = 0;
|
||||||
|
};
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler;
|
||||||
@ -79,14 +90,12 @@ private:
|
|||||||
VmaAllocation allocation{};
|
VmaAllocation allocation{};
|
||||||
vk::BufferUsageFlagBits usage;
|
vk::BufferUsageFlagBits usage;
|
||||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||||
|
std::array<Bucket, BUCKET_COUNT> buckets;
|
||||||
std::size_t view_count = 0;
|
std::size_t view_count = 0;
|
||||||
u32 total_size = 0;
|
u32 total_size = 0;
|
||||||
u32 bucket_size = 0;
|
u32 bucket_size = 0;
|
||||||
u32 buffer_offset = 0;
|
|
||||||
u32 flush_offset = 0;
|
|
||||||
u32 bucket_index = 0;
|
u32 bucket_index = 0;
|
||||||
bool readback = false;
|
bool readback = false;
|
||||||
std::array<u64, BUCKET_COUNT> ticks{};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@ -111,7 +111,7 @@ TextureRuntime::~TextureRuntime() {
|
|||||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||||
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
|
// Depth uploads require 4 byte alignment, doesn't hurt to do it for everyone
|
||||||
auto& buffer = upload ? upload_buffer : download_buffer;
|
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||||
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
auto [data, offset, invalidate] = buffer.Map(size);
|
||||||
|
|
||||||
return StagingData{.buffer = buffer.GetStagingHandle(),
|
return StagingData{.buffer = buffer.GetStagingHandle(),
|
||||||
.size = size,
|
.size = size,
|
||||||
|
Reference in New Issue
Block a user