renderer_vulkan: Rewrite data streaming
* Most GPUs nowadays provide a device local/host visible memory heap which is useful for avoiding copies between staging and local memory and especially beneficial for mobile and APUs that are mostly the target of this backend. * This commit ports the old yuzu stream buffer with some changes to suit our needs and gets rid of the buffer flush methods
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <limits>
|
||||
#include "common/alignment.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/pica_state.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
@@ -210,7 +211,7 @@ RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
vs_input_size += Common::AlignUp(loader.byte_count * vertex_num, 4);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -98,13 +98,12 @@ RendererVulkan::RendererVulkan(Frontend::EmuWindow& window, Frontend::EmuWindow*
|
||||
: RendererBase{window, secondary_window},
|
||||
telemetry_session{Core::System::GetInstance().TelemetrySession()},
|
||||
instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance,
|
||||
renderpass_cache,
|
||||
*this},
|
||||
renderpass_cache},
|
||||
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
|
||||
runtime{instance, scheduler, renderpass_cache, desc_manager}, swapchain{instance, scheduler,
|
||||
renderpass_cache},
|
||||
vertex_buffer{
|
||||
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
|
||||
vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer,
|
||||
VERTEX_BUFFER_SIZE},
|
||||
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
|
||||
Report();
|
||||
window.mailbox = nullptr;
|
||||
@@ -601,7 +600,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
|
||||
const auto& texcoords = screen_info.display_texcoords;
|
||||
|
||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||
|
||||
const std::array vertices = {
|
||||
ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
|
||||
@@ -633,7 +632,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
|
||||
vk::ShaderStageFlagBits::eVertex,
|
||||
0, sizeof(info), &info);
|
||||
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||
});
|
||||
}
|
||||
@@ -643,7 +642,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
|
||||
const auto& texcoords = screen_info.display_texcoords;
|
||||
|
||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||
|
||||
const std::array vertices = {
|
||||
ScreenRectVertex{x, y, texcoords.bottom, texcoords.right},
|
||||
@@ -672,7 +671,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
|
||||
vk::ShaderStageFlagBits::eVertex,
|
||||
0, sizeof(info), &info);
|
||||
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||
});
|
||||
}
|
||||
@@ -683,7 +682,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
|
||||
const auto& texcoords = screen_info_l.display_texcoords;
|
||||
|
||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||
|
||||
const std::array vertices = {ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
|
||||
ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right},
|
||||
@@ -712,7 +711,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
|
||||
vk::ShaderStageFlagBits::eVertex,
|
||||
0, sizeof(info), &info);
|
||||
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||
});
|
||||
}
|
||||
@@ -723,7 +722,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
|
||||
const auto& texcoords = screen_info_l.display_texcoords;
|
||||
|
||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||
|
||||
const std::array<ScreenRectVertex, 4> vertices = {{
|
||||
ScreenRectVertex(x, y, texcoords.bottom, texcoords.right),
|
||||
@@ -754,7 +753,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
|
||||
vk::ShaderStageFlagBits::eVertex,
|
||||
0, sizeof(info), &info);
|
||||
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||
});
|
||||
}
|
||||
@@ -967,12 +966,6 @@ void RendererVulkan::SwapBuffers() {
|
||||
}
|
||||
}
|
||||
|
||||
void RendererVulkan::FlushBuffers() {
|
||||
vertex_buffer.Flush();
|
||||
rasterizer.FlushBuffers();
|
||||
runtime.FlushBuffers();
|
||||
}
|
||||
|
||||
void RendererVulkan::Report() const {
|
||||
const std::string vendor_name{instance.GetVendorName()};
|
||||
const std::string model_name{instance.GetModelName()};
|
||||
|
@@ -78,7 +78,6 @@ public:
|
||||
void PrepareVideoDumping() override {}
|
||||
void CleanupVideoDumping() override {}
|
||||
void Sync() override;
|
||||
void FlushBuffers();
|
||||
|
||||
private:
|
||||
void ReloadSampler();
|
||||
|
@@ -238,6 +238,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index)
|
||||
|
||||
physical_device = physical_devices[physical_device_index];
|
||||
properties = physical_device.getProperties();
|
||||
limits = properties.limits;
|
||||
|
||||
LOG_INFO(Render_Vulkan, "Creating logical device for physical device: {}",
|
||||
properties.deviceName);
|
||||
|
@@ -177,7 +177,12 @@ public:
|
||||
|
||||
/// Returns the minimum required alignment for uniforms
|
||||
vk::DeviceSize UniformMinAlignment() const {
|
||||
return properties.limits.minUniformBufferOffsetAlignment;
|
||||
return limits.minUniformBufferOffsetAlignment;
|
||||
}
|
||||
|
||||
/// Returns the maximum supported elements in a texel buffer
|
||||
u32 MaxTexelBufferElements() const {
|
||||
return limits.maxTexelBufferElements;
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -204,6 +209,7 @@ private:
|
||||
vk::SurfaceKHR surface;
|
||||
vk::PhysicalDeviceProperties properties;
|
||||
vk::PhysicalDeviceFeatures features;
|
||||
vk::PhysicalDeviceLimits limits;
|
||||
vk::DriverIdKHR driver_id;
|
||||
vk::DebugUtilsMessengerEXT debug_messenger;
|
||||
std::string vendor_name;
|
||||
|
@@ -17,23 +17,16 @@
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 TEXTURE_BUFFER_SIZE = 512 * 1024;
|
||||
constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||
|
||||
constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {
|
||||
vk::Format::eR32G32Sfloat,
|
||||
};
|
||||
constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||
vk::BufferUsageFlagBits::eIndexBuffer |
|
||||
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||
|
||||
constexpr std::array TEXTURE_BUFFER_FORMATS = {
|
||||
vk::Format::eR32G32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat,
|
||||
};
|
||||
constexpr vk::BufferUsageFlags TEX_BUFFER_USAGE = vk::BufferUsageFlagBits::eUniformTexelBuffer;
|
||||
|
||||
constexpr VideoCore::SurfaceParams NULL_PARAMS = {
|
||||
.width = 1,
|
||||
@@ -55,6 +48,13 @@ struct DrawParams {
|
||||
bool is_indexed;
|
||||
};
|
||||
|
||||
[[nodiscard]] u64 TextureBufferSize(const Instance& instance) {
|
||||
// Use the smallest texel size from the texel views
|
||||
// which corresponds to eR32G32Sfloat
|
||||
const u64 max_size = instance.MaxTexelBufferElements() * 8;
|
||||
return std::min(max_size, TEXTURE_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
|
||||
Scheduler& scheduler, DescriptorManager& desc_manager,
|
||||
TextureRuntime& runtime, RenderpassCache& renderpass_cache)
|
||||
@@ -63,24 +63,17 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
|
||||
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
|
||||
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
|
||||
vertex_buffer{
|
||||
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
|
||||
uniform_buffer{
|
||||
instance, scheduler, UNIFORM_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformBuffer, {}},
|
||||
index_buffer{
|
||||
instance, scheduler, INDEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eIndexBuffer, {}},
|
||||
texture_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
|
||||
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_FORMATS},
|
||||
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
|
||||
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
|
||||
stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE},
|
||||
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
|
||||
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
|
||||
|
||||
vertex_buffers.fill(vertex_buffer.GetHandle());
|
||||
vertex_buffers.fill(stream_buffer.Handle());
|
||||
|
||||
uniform_buffer_alignment = instance.UniformMinAlignment();
|
||||
uniform_size_aligned_vs =
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
|
||||
Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
|
||||
uniform_size_aligned_fs =
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||
Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||
|
||||
// Define vertex layout for software shaders
|
||||
MakeSoftwareVertexLayout();
|
||||
@@ -96,15 +89,31 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
|
||||
default_sampler = CreateSampler(default_sampler_info);
|
||||
|
||||
const vk::Device device = instance.GetDevice();
|
||||
texture_lf_view = device.createBufferView({
|
||||
.buffer = texture_lf_buffer.Handle(),
|
||||
.format = vk::Format::eR32G32Sfloat,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
});
|
||||
texture_rg_view = device.createBufferView({
|
||||
.buffer = texture_buffer.Handle(),
|
||||
.format = vk::Format::eR32G32Sfloat,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
});
|
||||
texture_rgba_view = device.createBufferView({
|
||||
.buffer = texture_buffer.Handle(),
|
||||
.format = vk::Format::eR32G32B32A32Sfloat,
|
||||
.offset = 0,
|
||||
.range = VK_WHOLE_SIZE,
|
||||
});
|
||||
|
||||
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
|
||||
// all descriptor sets even the ones we don't use. Use default_texture for this
|
||||
const u32 vs_uniform_size = sizeof(Pica::Shader::VSUniformData);
|
||||
const u32 fs_uniform_size = sizeof(Pica::Shader::UniformData);
|
||||
pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), 0, vs_uniform_size);
|
||||
pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), vs_uniform_size, fs_uniform_size);
|
||||
pipeline_cache.BindTexelBuffer(2, texture_lf_buffer.GetView());
|
||||
pipeline_cache.BindTexelBuffer(3, texture_buffer.GetView(0));
|
||||
pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1));
|
||||
pipeline_cache.BindTexelBuffer(2, texture_lf_view);
|
||||
pipeline_cache.BindTexelBuffer(3, texture_rg_view);
|
||||
pipeline_cache.BindTexelBuffer(4, texture_rgba_view);
|
||||
|
||||
for (u32 i = 0; i < 4; i++) {
|
||||
pipeline_cache.BindTexture(i, null_surface.GetImageView());
|
||||
@@ -122,8 +131,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
|
||||
RasterizerVulkan::~RasterizerVulkan() {
|
||||
scheduler.Finish();
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
const vk::Device device = instance.GetDevice();
|
||||
|
||||
for (auto& [key, sampler] : samplers) {
|
||||
device.destroySampler(sampler);
|
||||
@@ -134,6 +142,9 @@ RasterizerVulkan::~RasterizerVulkan() {
|
||||
}
|
||||
|
||||
device.destroySampler(default_sampler);
|
||||
device.destroyBufferView(texture_lf_view);
|
||||
device.destroyBufferView(texture_rg_view);
|
||||
device.destroyBufferView(texture_rgba_view);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
@@ -189,7 +200,7 @@ void RasterizerVulkan::SyncFixedState() {
|
||||
|
||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||
u32 vs_input_index_max) {
|
||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size);
|
||||
auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16);
|
||||
|
||||
/**
|
||||
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||
@@ -262,11 +273,11 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
|
||||
// Keep track of the binding offsets so we can bind the vertex buffer later
|
||||
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
|
||||
buffer_offset += Common::AlignUp(data_size, 16);
|
||||
buffer_offset += Common::AlignUp(data_size, 4);
|
||||
}
|
||||
|
||||
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
|
||||
vertex_buffer.Commit(buffer_offset);
|
||||
stream_buffer.Commit(buffer_offset);
|
||||
|
||||
// Assign the rest of the attributes to the last binding
|
||||
SetupFixedAttribs();
|
||||
@@ -283,7 +294,7 @@ void RasterizerVulkan::SetupFixedAttribs() {
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
VertexLayout& layout = pipeline_info.vertex_layout;
|
||||
|
||||
auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
|
||||
auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0);
|
||||
|
||||
// Reserve the last binding for fixed and default attributes
|
||||
// Place the default attrib at offset zero for easy access
|
||||
@@ -336,7 +347,7 @@ void RasterizerVulkan::SetupFixedAttribs() {
|
||||
binding.fixed.Assign(1);
|
||||
binding.stride.Assign(offset);
|
||||
|
||||
vertex_buffer.Commit(offset);
|
||||
stream_buffer.Commit(offset);
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||
@@ -430,7 +441,7 @@ void RasterizerVulkan::SetupIndexArray() {
|
||||
regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
|
||||
regs.pipeline.index_array.offset);
|
||||
|
||||
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size);
|
||||
auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
|
||||
if (index_u8 && !native_u8) {
|
||||
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
|
||||
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
|
||||
@@ -440,11 +451,11 @@ void RasterizerVulkan::SetupIndexArray() {
|
||||
std::memcpy(index_ptr, index_data, index_buffer_size);
|
||||
}
|
||||
|
||||
index_buffer.Commit(index_buffer_size);
|
||||
stream_buffer.Commit(index_buffer_size);
|
||||
|
||||
scheduler.Record([this, index_offset = index_offset,
|
||||
index_type = index_type](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type);
|
||||
render_cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -759,13 +770,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
const u32 vertex_size = vertices * sizeof(HardwareVertex);
|
||||
|
||||
// Copy vertex data
|
||||
auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size);
|
||||
auto [array_ptr, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex));
|
||||
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
|
||||
vertex_buffer.Commit(vertex_size);
|
||||
stream_buffer.Commit(vertex_size);
|
||||
|
||||
scheduler.Record([this, vertices, base_vertex,
|
||||
offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
|
||||
render_cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset);
|
||||
render_cmdbuf.draw(vertices, 1, base_vertex, 0);
|
||||
});
|
||||
}
|
||||
@@ -787,11 +798,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
depth_surface);
|
||||
}
|
||||
|
||||
static int submit_threshold = 50;
|
||||
static int submit_threshold = 20;
|
||||
submit_threshold--;
|
||||
if (!submit_threshold) {
|
||||
submit_threshold = 50;
|
||||
scheduler.DispatchWork();
|
||||
submit_threshold = 20;
|
||||
scheduler.Flush();
|
||||
}
|
||||
|
||||
return succeeded;
|
||||
@@ -1152,14 +1163,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info)
|
||||
return device.createFramebuffer(framebuffer_info);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::FlushBuffers() {
|
||||
vertex_buffer.Flush();
|
||||
uniform_buffer.Flush();
|
||||
index_buffer.Flush();
|
||||
texture_buffer.Flush();
|
||||
texture_lf_buffer.Flush();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncClipEnabled() {
|
||||
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
||||
}
|
||||
@@ -1294,7 +1297,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
|
||||
}
|
||||
|
||||
std::size_t bytes_used = 0;
|
||||
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size);
|
||||
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||
|
||||
// Sync the lighting luts
|
||||
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
||||
@@ -1360,7 +1363,7 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
|
||||
}
|
||||
|
||||
std::size_t bytes_used = 0;
|
||||
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size);
|
||||
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||
|
||||
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
||||
auto SyncProcTexValueLUT =
|
||||
@@ -1460,16 +1463,16 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 used_bytes = 0;
|
||||
const u32 uniform_size = static_cast<u32>(uniform_size_aligned_vs + uniform_size_aligned_fs);
|
||||
auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size);
|
||||
const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs;
|
||||
auto [uniforms, offset, invalidate] = stream_buffer.Map(uniform_size, uniform_buffer_alignment);
|
||||
|
||||
u32 used_bytes = 0;
|
||||
if (sync_vs) {
|
||||
Pica::Shader::VSUniformData vs_uniforms;
|
||||
vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs);
|
||||
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
||||
|
||||
pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), offset + used_bytes,
|
||||
pipeline_cache.BindBuffer(0, stream_buffer.Handle(), offset + used_bytes,
|
||||
sizeof(vs_uniforms));
|
||||
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
|
||||
}
|
||||
@@ -1478,13 +1481,13 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
|
||||
std::memcpy(uniforms + used_bytes, &uniform_block_data.data,
|
||||
sizeof(Pica::Shader::UniformData));
|
||||
|
||||
pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), offset + used_bytes,
|
||||
pipeline_cache.BindBuffer(1, stream_buffer.Handle(), offset + used_bytes,
|
||||
sizeof(uniform_block_data.data));
|
||||
uniform_block_data.dirty = false;
|
||||
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
|
||||
}
|
||||
|
||||
uniform_buffer.Commit(used_bytes);
|
||||
stream_buffer.Commit(used_bytes);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -7,6 +7,7 @@
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
|
||||
@@ -101,9 +102,6 @@ public:
|
||||
/// Sync fixed function pipeline state
|
||||
void SyncFixedState();
|
||||
|
||||
/// Flushes all rasterizer owned buffers
|
||||
void FlushBuffers();
|
||||
|
||||
private:
|
||||
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
|
||||
|
||||
@@ -201,16 +199,17 @@ private:
|
||||
SamplerInfo texture_cube_sampler;
|
||||
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
|
||||
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
|
||||
|
||||
StreamBuffer vertex_buffer;
|
||||
StreamBuffer uniform_buffer;
|
||||
StreamBuffer index_buffer;
|
||||
StreamBuffer texture_buffer;
|
||||
StreamBuffer texture_lf_buffer;
|
||||
PipelineInfo pipeline_info;
|
||||
std::size_t uniform_buffer_alignment;
|
||||
std::size_t uniform_size_aligned_vs;
|
||||
std::size_t uniform_size_aligned_fs;
|
||||
|
||||
StreamBuffer stream_buffer; ///< Vertex+Index+Uniform buffer
|
||||
StreamBuffer texture_buffer; ///< Texture buffer
|
||||
StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer
|
||||
vk::BufferView texture_lf_view;
|
||||
vk::BufferView texture_rg_view;
|
||||
vk::BufferView texture_rgba_view;
|
||||
u64 uniform_buffer_alignment;
|
||||
u64 uniform_size_aligned_vs;
|
||||
u64 uniform_size_aligned_fs;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -26,10 +26,9 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf,
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
|
||||
RendererVulkan& renderer)
|
||||
: instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer},
|
||||
master_semaphore{instance}, command_pool{instance, master_semaphore}, stop_requested{false},
|
||||
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache)
|
||||
: instance{instance}, renderpass_cache{renderpass_cache}, master_semaphore{instance},
|
||||
command_pool{instance, master_semaphore}, stop_requested{false},
|
||||
use_worker_thread{Settings::values.async_command_recording} {
|
||||
AllocateWorkerCommandBuffers();
|
||||
if (use_worker_thread) {
|
||||
@@ -133,10 +132,9 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
|
||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||
const auto handle = master_semaphore.Handle();
|
||||
const vk::Semaphore handle = master_semaphore.Handle();
|
||||
const u64 signal_value = master_semaphore.NextTick();
|
||||
state = StateFlags::AllDirty;
|
||||
renderer.FlushBuffers();
|
||||
|
||||
renderpass_cache.ExitRenderpass();
|
||||
Record([signal_semaphore, wait_semaphore, handle, signal_value,
|
||||
|
@@ -28,14 +28,12 @@ DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
|
||||
|
||||
class Instance;
|
||||
class RenderpassCache;
|
||||
class RendererVulkan;
|
||||
|
||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||
/// OpenGL-like operations on Vulkan command buffers.
|
||||
class Scheduler {
|
||||
public:
|
||||
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
|
||||
RendererVulkan& renderer);
|
||||
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache);
|
||||
~Scheduler();
|
||||
|
||||
/// Sends the current execution context to the GPU.
|
||||
@@ -198,7 +196,6 @@ private:
|
||||
private:
|
||||
const Instance& instance;
|
||||
RenderpassCache& renderpass_cache;
|
||||
RendererVulkan& renderer;
|
||||
MasterSemaphore master_semaphore;
|
||||
CommandPool command_pool;
|
||||
std::unique_ptr<CommandChunk> chunk;
|
||||
|
@@ -1,243 +1,155 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
[[nodiscard]] vk::AccessFlags MakeAccessFlags(vk::BufferUsageFlagBits usage) {
|
||||
switch (usage) {
|
||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
||||
return vk::AccessFlagBits::eVertexAttributeRead;
|
||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
||||
return vk::AccessFlagBits::eIndexRead;
|
||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
||||
return vk::AccessFlagBits::eUniformRead;
|
||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
||||
return vk::AccessFlagBits::eShaderRead;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
||||
UNREACHABLE();
|
||||
namespace {
|
||||
|
||||
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||
|
||||
/// Find a memory type with the passed requirements
|
||||
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
|
||||
vk::MemoryPropertyFlags wanted,
|
||||
u32 filter = std::numeric_limits<u32>::max()) {
|
||||
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
||||
const auto flags = properties.memoryTypes[i].propertyFlags;
|
||||
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return vk::AccessFlagBits::eNone;
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
[[nodiscard]] vk::PipelineStageFlags MakePipelineStage(vk::BufferUsageFlagBits usage) {
|
||||
switch (usage) {
|
||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
||||
return vk::PipelineStageFlagBits::eVertexInput;
|
||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
||||
return vk::PipelineStageFlagBits::eVertexInput;
|
||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
||||
return vk::PipelineStageFlagBits::eVertexShader |
|
||||
vk::PipelineStageFlagBits::eFragmentShader;
|
||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
||||
return vk::PipelineStageFlagBits::eFragmentShader;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
||||
UNREACHABLE();
|
||||
/// Get the preferred host visible memory type.
|
||||
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, bool readback,
|
||||
u32 filter = std::numeric_limits<u32>::max()) {
|
||||
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
|
||||
// Otherwise search for a host visible allocation.
|
||||
const vk::MemoryPropertyFlags HOST_MEMORY =
|
||||
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||
const vk::MemoryPropertyFlags DYNAMIC_MEMORY =
|
||||
HOST_MEMORY | (readback ? vk::MemoryPropertyFlagBits::eHostCached
|
||||
: vk::MemoryPropertyFlagBits::eDeviceLocal);
|
||||
|
||||
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
|
||||
if (!preferred_type) {
|
||||
preferred_type = FindMemoryType(properties, HOST_MEMORY);
|
||||
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
|
||||
}
|
||||
return vk::PipelineStageFlagBits::eNone;
|
||||
return preferred_type.value_or(0);
|
||||
}
|
||||
|
||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
|
||||
: instance{instance} {
|
||||
const vk::BufferUsageFlags usage =
|
||||
readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc;
|
||||
const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};
|
||||
} // Anonymous namespace
|
||||
|
||||
const VmaAllocationCreateFlags flags =
|
||||
readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
|
||||
: VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.flags = flags | VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
|
||||
&alloc_info);
|
||||
|
||||
buffer = vk::Buffer{unsafe_buffer};
|
||||
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
|
||||
}
|
||||
|
||||
StagingBuffer::~StagingBuffer() {
|
||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
||||
}
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback)
|
||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, total_size{size},
|
||||
bucket_size{size / BUCKET_COUNT}, readback{readback} {}
|
||||
|
||||
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
|
||||
bool readback)
|
||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, usage{usage},
|
||||
total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {
|
||||
|
||||
const vk::BufferCreateInfo buffer_info = {
|
||||
.size = total_size,
|
||||
.usage = usage | vk::BufferUsageFlagBits::eTransferDst,
|
||||
};
|
||||
|
||||
const VmaAllocationCreateInfo alloc_create_info = {
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
};
|
||||
|
||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
||||
VmaAllocationInfo alloc_info;
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
|
||||
&alloc_info);
|
||||
|
||||
gpu_buffer = vk::Buffer{unsafe_buffer};
|
||||
|
||||
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
|
||||
|
||||
vk::Device device = instance.GetDevice();
|
||||
for (std::size_t i = 0; i < view_formats.size(); i++) {
|
||||
const vk::BufferViewCreateInfo view_info = {
|
||||
.buffer = gpu_buffer,
|
||||
.format = view_formats[i],
|
||||
.offset = 0,
|
||||
.range = total_size,
|
||||
};
|
||||
|
||||
views[i] = device.createBufferView(view_info);
|
||||
}
|
||||
|
||||
view_count = view_formats.size();
|
||||
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
|
||||
vk::BufferUsageFlags usage_, u64 size, bool readback_)
|
||||
: instance{instance_}, scheduler{scheduler_}, usage{usage_}, readback{readback_} {
|
||||
CreateBuffers(size);
|
||||
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||
}
|
||||
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
if (gpu_buffer) {
|
||||
vk::Device device = instance.GetDevice();
|
||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(gpu_buffer), allocation);
|
||||
for (std::size_t i = 0; i < view_count; i++) {
|
||||
device.destroyBufferView(views[i]);
|
||||
}
|
||||
}
|
||||
const vk::Device device = instance.GetDevice();
|
||||
device.unmapMemory(memory);
|
||||
device.destroyBuffer(buffer);
|
||||
device.freeMemory(memory);
|
||||
}
|
||||
|
||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size) {
|
||||
ASSERT(size <= total_size);
|
||||
size = Common::AlignUp(size, 16);
|
||||
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||
ASSERT(size <= stream_buffer_size);
|
||||
mapped_size = size;
|
||||
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
|
||||
if (bucket.cursor + size > bucket_size) {
|
||||
bucket.gpu_tick = scheduler.CurrentTick();
|
||||
MoveNextBucket();
|
||||
return Map(size);
|
||||
if (alignment > 0) {
|
||||
offset = Common::AlignUp(offset, alignment);
|
||||
}
|
||||
|
||||
const bool invalidate = std::exchange(bucket.invalid, false);
|
||||
const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
|
||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
||||
WaitPendingOperations(offset);
|
||||
|
||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
||||
bool invalidate{false};
|
||||
if (offset + size > stream_buffer_size) {
|
||||
// The buffer would overflow, save the amount of used watches and reset the state.
|
||||
invalidate = true;
|
||||
invalidation_mark = current_watch_cursor;
|
||||
current_watch_cursor = 0;
|
||||
offset = 0;
|
||||
|
||||
// Swap watches and reset waiting cursors.
|
||||
std::swap(previous_watches, current_watches);
|
||||
wait_cursor = 0;
|
||||
wait_bound = 0;
|
||||
}
|
||||
|
||||
return std::make_tuple(mapped + offset, offset, invalidate);
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u32 size) {
|
||||
size = Common::AlignUp(size, 16);
|
||||
buckets[bucket_index].cursor += size;
|
||||
void StreamBuffer::Commit(u64 size) {
|
||||
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
|
||||
size);
|
||||
|
||||
offset += size;
|
||||
|
||||
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||
// Ensure that there are enough watches.
|
||||
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||
}
|
||||
auto& watch = current_watches[current_watch_cursor++];
|
||||
watch.upper_bound = offset;
|
||||
watch.tick = scheduler.CurrentTick();
|
||||
}
|
||||
|
||||
void StreamBuffer::Flush() {
|
||||
if (readback) {
|
||||
LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
|
||||
void StreamBuffer::CreateBuffers(u64 prefered_size) {
|
||||
const vk::Device device = instance.GetDevice();
|
||||
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
|
||||
const u32 preferred_type = GetMemoryType(memory_properties, readback);
|
||||
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
|
||||
|
||||
// Substract from the preferred heap size some bytes to avoid getting out of memory.
|
||||
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
|
||||
// As per DXVK's example, using `heap_size / 2`
|
||||
const VkDeviceSize allocable_size = heap_size / 2;
|
||||
buffer = device.createBuffer({
|
||||
.size = std::min(prefered_size, allocable_size),
|
||||
.usage = usage,
|
||||
});
|
||||
|
||||
const auto requirements = device.getBufferMemoryRequirements(buffer);
|
||||
const u32 required_flags = requirements.memoryTypeBits;
|
||||
stream_buffer_size = static_cast<u64>(requirements.size);
|
||||
|
||||
memory = device.allocateMemory({
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
|
||||
});
|
||||
|
||||
device.bindBufferMemory(buffer, memory, 0);
|
||||
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
|
||||
}
|
||||
|
||||
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||
watches.resize(watches.size() + grow_size);
|
||||
}
|
||||
|
||||
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||
if (!invalidation_mark) {
|
||||
return;
|
||||
}
|
||||
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
||||
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
ASSERT(flush_start + flush_size <= total_size);
|
||||
|
||||
// Ensure all staging writes are visible to the host memory domain
|
||||
if (flush_size > 0) [[likely]] {
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
if (gpu_buffer) {
|
||||
scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer,
|
||||
vk::CommandBuffer upload_cmdbuf) {
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_start,
|
||||
.dstOffset = flush_start,
|
||||
.size = flush_size,
|
||||
};
|
||||
|
||||
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
|
||||
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = MakeAccessFlags(usage),
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = gpu_buffer,
|
||||
.offset = flush_start,
|
||||
.size = flush_size,
|
||||
};
|
||||
|
||||
upload_cmdbuf.pipelineBarrier(
|
||||
vk::PipelineStageFlagBits::eTransfer, MakePipelineStage(usage),
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
||||
});
|
||||
}
|
||||
bucket.flush_cursor += flush_size;
|
||||
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
|
||||
auto& watch = previous_watches[wait_cursor];
|
||||
wait_bound = watch.upper_bound;
|
||||
scheduler.Wait(watch.tick);
|
||||
++wait_cursor;
|
||||
}
|
||||
}
|
||||
|
||||
void StreamBuffer::Invalidate() {
|
||||
if (!readback) {
|
||||
return;
|
||||
}
|
||||
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
||||
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) [[likely]] {
|
||||
// Ensure the staging memory can be read by the host
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
bucket.flush_cursor += flush_size;
|
||||
}
|
||||
}
|
||||
|
||||
void StreamBuffer::MoveNextBucket() {
|
||||
// Flush and Invalidate are bucket local operations for simplicity so perform them here
|
||||
if (readback) {
|
||||
Invalidate();
|
||||
} else {
|
||||
Flush();
|
||||
}
|
||||
|
||||
bucket_index = (bucket_index + 1) % BUCKET_COUNT;
|
||||
Bucket& next_bucket = buckets[bucket_index];
|
||||
scheduler.Wait(next_bucket.gpu_tick);
|
||||
next_bucket.cursor = 0;
|
||||
next_bucket.flush_cursor = 0;
|
||||
next_bucket.invalid = true;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -1,101 +1,81 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include "common/assert.h"
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
VK_DEFINE_HANDLE(VmaAllocation)
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Instance;
|
||||
class Scheduler;
|
||||
|
||||
struct StagingBuffer {
|
||||
StagingBuffer(const Instance& instance, u32 size, bool readback);
|
||||
~StagingBuffer();
|
||||
|
||||
const Instance& instance;
|
||||
vk::Buffer buffer{};
|
||||
VmaAllocation allocation{};
|
||||
std::span<std::byte> mapped{};
|
||||
};
|
||||
|
||||
class StreamBuffer {
|
||||
static constexpr u32 MAX_BUFFER_VIEWS = 3;
|
||||
static constexpr u32 BUCKET_COUNT = 2;
|
||||
class StreamBuffer final {
|
||||
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
|
||||
|
||||
public:
|
||||
/// Staging only constructor
|
||||
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback = false);
|
||||
/// Staging + GPU streaming constructor
|
||||
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
|
||||
bool readback = false);
|
||||
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
|
||||
vk::BufferUsageFlags usage, u64 size, bool readback = false);
|
||||
~StreamBuffer();
|
||||
|
||||
StreamBuffer(const StreamBuffer&) = delete;
|
||||
StreamBuffer& operator=(const StreamBuffer&) = delete;
|
||||
/**
|
||||
* Reserves a region of memory from the stream buffer.
|
||||
* @param size Size to reserve.
|
||||
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
|
||||
*/
|
||||
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
|
||||
|
||||
/// Maps aligned staging memory of size bytes
|
||||
std::tuple<u8*, u32, bool> Map(u32 size);
|
||||
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||
void Commit(u64 size);
|
||||
|
||||
/// Commits size bytes from the currently mapped staging memory
|
||||
void Commit(u32 size = 0);
|
||||
|
||||
/// Flushes staging memory to the GPU buffer
|
||||
void Flush();
|
||||
|
||||
/// Invalidates staging memory for reading
|
||||
void Invalidate();
|
||||
|
||||
/// Returns the GPU buffer handle
|
||||
[[nodiscard]] vk::Buffer GetHandle() const {
|
||||
return gpu_buffer;
|
||||
vk::Buffer Handle() const noexcept {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/// Returns the staging buffer handle
|
||||
[[nodiscard]] vk::Buffer GetStagingHandle() const {
|
||||
return staging.buffer;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the requested buffer view
|
||||
[[nodiscard]] const vk::BufferView& GetView(u32 index = 0) const {
|
||||
ASSERT(index < view_count);
|
||||
return views[index];
|
||||
u64 Address() const noexcept {
|
||||
return 0;
|
||||
}
|
||||
|
||||
private:
|
||||
/// Moves to the next bucket
|
||||
void MoveNextBucket();
|
||||
|
||||
struct Bucket {
|
||||
bool invalid = false;
|
||||
u32 gpu_tick = 0;
|
||||
u32 cursor = 0;
|
||||
u32 flush_cursor = 0;
|
||||
struct Watch {
|
||||
u64 tick{};
|
||||
u64 upper_bound{};
|
||||
};
|
||||
|
||||
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||
void CreateBuffers(u64 prefered_size);
|
||||
|
||||
/// Increases the amount of watches available.
|
||||
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||
|
||||
void WaitPendingOperations(u64 requested_upper_bound);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
Scheduler& scheduler;
|
||||
StagingBuffer staging;
|
||||
vk::Buffer gpu_buffer{};
|
||||
VmaAllocation allocation{};
|
||||
vk::BufferUsageFlagBits usage;
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||
std::array<Bucket, BUCKET_COUNT> buckets;
|
||||
std::size_t view_count = 0;
|
||||
u32 total_size = 0;
|
||||
u32 bucket_size = 0;
|
||||
u32 bucket_index = 0;
|
||||
bool readback = false;
|
||||
const Instance& instance; ///< Vulkan instance.
|
||||
Scheduler& scheduler; ///< Command scheduler.
|
||||
|
||||
vk::Buffer buffer; ///< Mapped buffer.
|
||||
vk::DeviceMemory memory; ///< Memory allocation.
|
||||
u8* mapped{}; ///< Pointer to the mapped memory
|
||||
u64 stream_buffer_size{}; ///< Stream buffer size.
|
||||
vk::BufferUsageFlags usage{};
|
||||
bool readback{}; ///< Flag indicating if the buffer should use cached memory
|
||||
|
||||
u64 offset{}; ///< Buffer iterator.
|
||||
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||
|
||||
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
|
||||
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
|
||||
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
|
||||
|
||||
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
|
||||
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
|
||||
u64 wait_bound{}; ///< Highest offset being watched for completion.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@@ -103,16 +103,16 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
|
||||
return depth_offset;
|
||||
}
|
||||
|
||||
constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
|
||||
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr u64 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
||||
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
||||
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager},
|
||||
upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE}, download_buffer{instance, scheduler,
|
||||
DOWNLOAD_BUFFER_SIZE,
|
||||
true} {
|
||||
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
|
||||
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
|
||||
DOWNLOAD_BUFFER_SIZE, true} {
|
||||
|
||||
auto Register = [this](VideoCore::PixelFormat dest,
|
||||
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
@@ -153,25 +153,20 @@ TextureRuntime::~TextureRuntime() {
|
||||
|
||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||
auto [data, offset, invalidate] = buffer.Map(size);
|
||||
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
||||
|
||||
return StagingData{
|
||||
.buffer = buffer.GetStagingHandle(),
|
||||
.buffer = buffer.Handle(),
|
||||
.size = size,
|
||||
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||
.buffer_offset = offset,
|
||||
};
|
||||
}
|
||||
|
||||
void TextureRuntime::FlushBuffers() {
|
||||
upload_buffer.Flush();
|
||||
}
|
||||
|
||||
MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
|
||||
void TextureRuntime::Finish() {
|
||||
MICROPROFILE_SCOPE(Vulkan_Finish);
|
||||
scheduler.Finish();
|
||||
download_buffer.Invalidate();
|
||||
}
|
||||
|
||||
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
@@ -415,7 +410,8 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
|
||||
};
|
||||
|
||||
if (clear.texture_rect == surface.GetScaledRect()) {
|
||||
scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf,
|
||||
vk::CommandBuffer) {
|
||||
const vk::ImageSubresourceRange range = {
|
||||
.aspectMask = params.aspect,
|
||||
.baseMipLevel = clear.texture_level,
|
||||
@@ -458,20 +454,25 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
|
||||
},
|
||||
};
|
||||
|
||||
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
|
||||
render_cmdbuf.pipelineBarrier(params.pipeline_flags,
|
||||
vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||
|
||||
const bool is_color = static_cast<bool>(params.aspect & vk::ImageAspectFlagBits::eColor);
|
||||
const bool is_color =
|
||||
static_cast<bool>(params.aspect & vk::ImageAspectFlagBits::eColor);
|
||||
if (is_color) {
|
||||
render_cmdbuf.clearColorImage(params.src_image, vk::ImageLayout::eTransferDstOptimal,
|
||||
render_cmdbuf.clearColorImage(params.src_image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeClearColorValue(value), range);
|
||||
} else {
|
||||
render_cmdbuf.clearDepthStencilImage(params.src_image, vk::ImageLayout::eTransferDstOptimal,
|
||||
render_cmdbuf.clearDepthStencilImage(params.src_image,
|
||||
vk::ImageLayout::eTransferDstOptimal,
|
||||
MakeClearDepthStencilValue(value), range);
|
||||
}
|
||||
|
||||
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
||||
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||
params.pipeline_flags, vk::DependencyFlagBits::eByRegion,
|
||||
{}, {}, post_barrier);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
@@ -528,34 +529,34 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface,
|
||||
.src_image = surface.alloc.image,
|
||||
};
|
||||
|
||||
scheduler.Record(
|
||||
[params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
const vk::ImageMemoryBarrier pre_barrier = {
|
||||
.srcAccessMask = params.src_access,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = params.src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = params.aspect,
|
||||
.baseMipLevel = level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf,
|
||||
vk::CommandBuffer) {
|
||||
const vk::ImageMemoryBarrier pre_barrier = {
|
||||
.srcAccessMask = params.src_access,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.oldLayout = vk::ImageLayout::eGeneral,
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = params.src_image,
|
||||
.subresourceRange{
|
||||
.aspectMask = params.aspect,
|
||||
.baseMipLevel = level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
};
|
||||
|
||||
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||
});
|
||||
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
|
||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||
});
|
||||
|
||||
renderpass_cache.EnterRenderpass(clear_info);
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
scheduler.Record([params, level = clear.texture_level]
|
||||
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||
scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf,
|
||||
vk::CommandBuffer) {
|
||||
const vk::ImageMemoryBarrier post_barrier = {
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = params.src_access,
|
||||
|
@@ -15,13 +15,15 @@
|
||||
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
||||
VK_DEFINE_HANDLE(VmaAllocation)
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
struct StagingData {
|
||||
vk::Buffer buffer;
|
||||
u32 size = 0;
|
||||
std::span<std::byte> mapped{};
|
||||
u32 buffer_offset = 0;
|
||||
u64 buffer_offset = 0;
|
||||
};
|
||||
|
||||
struct ImageAlloc {
|
||||
@@ -127,9 +129,6 @@ public:
|
||||
/// Generates mipmaps for all the available levels of the texture
|
||||
void GenerateMipmaps(Surface& surface, u32 max_level);
|
||||
|
||||
/// Flushes staging buffers
|
||||
void FlushBuffers();
|
||||
|
||||
/// Returns all source formats that support reinterpretation to the dest format
|
||||
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
||||
VideoCore::PixelFormat dest_format) const;
|
||||
|
Reference in New Issue
Block a user