renderer_vulkan: Rewrite data streaming
* Most GPUs nowadays provide a device local/host visible memory heap which is useful for avoiding copies between staging and local memory and especially beneficial for mobile and APUs that are mostly the target of this backend. * This commit ports the old yuzu stream buffer with some changes to suit our needs and gets rid of the buffer flush methods
This commit is contained in:
@@ -3,6 +3,7 @@
|
|||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <limits>
|
#include <limits>
|
||||||
|
#include "common/alignment.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/pica_state.h"
|
#include "video_core/pica_state.h"
|
||||||
#include "video_core/rasterizer_accelerated.h"
|
#include "video_core/rasterizer_accelerated.h"
|
||||||
@@ -210,7 +211,7 @@ RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray
|
|||||||
u32 vs_input_size = 0;
|
u32 vs_input_size = 0;
|
||||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||||
if (loader.component_count != 0) {
|
if (loader.component_count != 0) {
|
||||||
vs_input_size += loader.byte_count * vertex_num;
|
vs_input_size += Common::AlignUp(loader.byte_count * vertex_num, 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -98,13 +98,12 @@ RendererVulkan::RendererVulkan(Frontend::EmuWindow& window, Frontend::EmuWindow*
|
|||||||
: RendererBase{window, secondary_window},
|
: RendererBase{window, secondary_window},
|
||||||
telemetry_session{Core::System::GetInstance().TelemetrySession()},
|
telemetry_session{Core::System::GetInstance().TelemetrySession()},
|
||||||
instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance,
|
instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance,
|
||||||
renderpass_cache,
|
renderpass_cache},
|
||||||
*this},
|
|
||||||
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
|
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
|
||||||
runtime{instance, scheduler, renderpass_cache, desc_manager}, swapchain{instance, scheduler,
|
runtime{instance, scheduler, renderpass_cache, desc_manager}, swapchain{instance, scheduler,
|
||||||
renderpass_cache},
|
renderpass_cache},
|
||||||
vertex_buffer{
|
vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer,
|
||||||
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
|
VERTEX_BUFFER_SIZE},
|
||||||
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
|
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
|
||||||
Report();
|
Report();
|
||||||
window.mailbox = nullptr;
|
window.mailbox = nullptr;
|
||||||
@@ -601,7 +600,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
|
|||||||
const auto& texcoords = screen_info.display_texcoords;
|
const auto& texcoords = screen_info.display_texcoords;
|
||||||
|
|
||||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||||
|
|
||||||
const std::array vertices = {
|
const std::array vertices = {
|
||||||
ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
|
ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
|
||||||
@@ -633,7 +632,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
|
|||||||
vk::ShaderStageFlagBits::eVertex,
|
vk::ShaderStageFlagBits::eVertex,
|
||||||
0, sizeof(info), &info);
|
0, sizeof(info), &info);
|
||||||
|
|
||||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -643,7 +642,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
|
|||||||
const auto& texcoords = screen_info.display_texcoords;
|
const auto& texcoords = screen_info.display_texcoords;
|
||||||
|
|
||||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||||
|
|
||||||
const std::array vertices = {
|
const std::array vertices = {
|
||||||
ScreenRectVertex{x, y, texcoords.bottom, texcoords.right},
|
ScreenRectVertex{x, y, texcoords.bottom, texcoords.right},
|
||||||
@@ -672,7 +671,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
|
|||||||
vk::ShaderStageFlagBits::eVertex,
|
vk::ShaderStageFlagBits::eVertex,
|
||||||
0, sizeof(info), &info);
|
0, sizeof(info), &info);
|
||||||
|
|
||||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -683,7 +682,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
|
|||||||
const auto& texcoords = screen_info_l.display_texcoords;
|
const auto& texcoords = screen_info_l.display_texcoords;
|
||||||
|
|
||||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||||
|
|
||||||
const std::array vertices = {ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
|
const std::array vertices = {ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
|
||||||
ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right},
|
ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right},
|
||||||
@@ -712,7 +711,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
|
|||||||
vk::ShaderStageFlagBits::eVertex,
|
vk::ShaderStageFlagBits::eVertex,
|
||||||
0, sizeof(info), &info);
|
0, sizeof(info), &info);
|
||||||
|
|
||||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -723,7 +722,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
|
|||||||
const auto& texcoords = screen_info_l.display_texcoords;
|
const auto& texcoords = screen_info_l.display_texcoords;
|
||||||
|
|
||||||
u32 size = sizeof(ScreenRectVertex) * 4;
|
u32 size = sizeof(ScreenRectVertex) * 4;
|
||||||
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
|
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
|
||||||
|
|
||||||
const std::array<ScreenRectVertex, 4> vertices = {{
|
const std::array<ScreenRectVertex, 4> vertices = {{
|
||||||
ScreenRectVertex(x, y, texcoords.bottom, texcoords.right),
|
ScreenRectVertex(x, y, texcoords.bottom, texcoords.right),
|
||||||
@@ -754,7 +753,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
|
|||||||
vk::ShaderStageFlagBits::eVertex,
|
vk::ShaderStageFlagBits::eVertex,
|
||||||
0, sizeof(info), &info);
|
0, sizeof(info), &info);
|
||||||
|
|
||||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
|
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
|
||||||
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -967,12 +966,6 @@ void RendererVulkan::SwapBuffers() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererVulkan::FlushBuffers() {
|
|
||||||
vertex_buffer.Flush();
|
|
||||||
rasterizer.FlushBuffers();
|
|
||||||
runtime.FlushBuffers();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RendererVulkan::Report() const {
|
void RendererVulkan::Report() const {
|
||||||
const std::string vendor_name{instance.GetVendorName()};
|
const std::string vendor_name{instance.GetVendorName()};
|
||||||
const std::string model_name{instance.GetModelName()};
|
const std::string model_name{instance.GetModelName()};
|
||||||
|
@@ -78,7 +78,6 @@ public:
|
|||||||
void PrepareVideoDumping() override {}
|
void PrepareVideoDumping() override {}
|
||||||
void CleanupVideoDumping() override {}
|
void CleanupVideoDumping() override {}
|
||||||
void Sync() override;
|
void Sync() override;
|
||||||
void FlushBuffers();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void ReloadSampler();
|
void ReloadSampler();
|
||||||
|
@@ -238,6 +238,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index)
|
|||||||
|
|
||||||
physical_device = physical_devices[physical_device_index];
|
physical_device = physical_devices[physical_device_index];
|
||||||
properties = physical_device.getProperties();
|
properties = physical_device.getProperties();
|
||||||
|
limits = properties.limits;
|
||||||
|
|
||||||
LOG_INFO(Render_Vulkan, "Creating logical device for physical device: {}",
|
LOG_INFO(Render_Vulkan, "Creating logical device for physical device: {}",
|
||||||
properties.deviceName);
|
properties.deviceName);
|
||||||
|
@@ -177,7 +177,12 @@ public:
|
|||||||
|
|
||||||
/// Returns the minimum required alignment for uniforms
|
/// Returns the minimum required alignment for uniforms
|
||||||
vk::DeviceSize UniformMinAlignment() const {
|
vk::DeviceSize UniformMinAlignment() const {
|
||||||
return properties.limits.minUniformBufferOffsetAlignment;
|
return limits.minUniformBufferOffsetAlignment;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the maximum supported elements in a texel buffer
|
||||||
|
u32 MaxTexelBufferElements() const {
|
||||||
|
return limits.maxTexelBufferElements;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -204,6 +209,7 @@ private:
|
|||||||
vk::SurfaceKHR surface;
|
vk::SurfaceKHR surface;
|
||||||
vk::PhysicalDeviceProperties properties;
|
vk::PhysicalDeviceProperties properties;
|
||||||
vk::PhysicalDeviceFeatures features;
|
vk::PhysicalDeviceFeatures features;
|
||||||
|
vk::PhysicalDeviceLimits limits;
|
||||||
vk::DriverIdKHR driver_id;
|
vk::DriverIdKHR driver_id;
|
||||||
vk::DebugUtilsMessengerEXT debug_messenger;
|
vk::DebugUtilsMessengerEXT debug_messenger;
|
||||||
std::string vendor_name;
|
std::string vendor_name;
|
||||||
|
@@ -17,23 +17,16 @@
|
|||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/video_core.h"
|
#include "video_core/video_core.h"
|
||||||
|
|
||||||
#include <vk_mem_alloc.h>
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
|
constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
|
||||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
|
||||||
constexpr u32 TEXTURE_BUFFER_SIZE = 512 * 1024;
|
|
||||||
|
|
||||||
constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {
|
constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer |
|
||||||
vk::Format::eR32G32Sfloat,
|
vk::BufferUsageFlagBits::eIndexBuffer |
|
||||||
};
|
vk::BufferUsageFlagBits::eUniformBuffer;
|
||||||
|
|
||||||
constexpr std::array TEXTURE_BUFFER_FORMATS = {
|
constexpr vk::BufferUsageFlags TEX_BUFFER_USAGE = vk::BufferUsageFlagBits::eUniformTexelBuffer;
|
||||||
vk::Format::eR32G32Sfloat,
|
|
||||||
vk::Format::eR32G32B32A32Sfloat,
|
|
||||||
};
|
|
||||||
|
|
||||||
constexpr VideoCore::SurfaceParams NULL_PARAMS = {
|
constexpr VideoCore::SurfaceParams NULL_PARAMS = {
|
||||||
.width = 1,
|
.width = 1,
|
||||||
@@ -55,6 +48,13 @@ struct DrawParams {
|
|||||||
bool is_indexed;
|
bool is_indexed;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
[[nodiscard]] u64 TextureBufferSize(const Instance& instance) {
|
||||||
|
// Use the smallest texel size from the texel views
|
||||||
|
// which corresponds to eR32G32Sfloat
|
||||||
|
const u64 max_size = instance.MaxTexelBufferElements() * 8;
|
||||||
|
return std::min(max_size, TEXTURE_BUFFER_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
|
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
|
||||||
Scheduler& scheduler, DescriptorManager& desc_manager,
|
Scheduler& scheduler, DescriptorManager& desc_manager,
|
||||||
TextureRuntime& runtime, RenderpassCache& renderpass_cache)
|
TextureRuntime& runtime, RenderpassCache& renderpass_cache)
|
||||||
@@ -63,24 +63,17 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
|||||||
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
|
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
|
||||||
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
|
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
|
||||||
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
|
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
|
||||||
vertex_buffer{
|
stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE},
|
||||||
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
|
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
|
||||||
uniform_buffer{
|
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
|
||||||
instance, scheduler, UNIFORM_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformBuffer, {}},
|
|
||||||
index_buffer{
|
|
||||||
instance, scheduler, INDEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eIndexBuffer, {}},
|
|
||||||
texture_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
|
|
||||||
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_FORMATS},
|
|
||||||
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
|
|
||||||
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
|
|
||||||
|
|
||||||
vertex_buffers.fill(vertex_buffer.GetHandle());
|
vertex_buffers.fill(stream_buffer.Handle());
|
||||||
|
|
||||||
uniform_buffer_alignment = instance.UniformMinAlignment();
|
uniform_buffer_alignment = instance.UniformMinAlignment();
|
||||||
uniform_size_aligned_vs =
|
uniform_size_aligned_vs =
|
||||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
|
Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
|
||||||
uniform_size_aligned_fs =
|
uniform_size_aligned_fs =
|
||||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||||
|
|
||||||
// Define vertex layout for software shaders
|
// Define vertex layout for software shaders
|
||||||
MakeSoftwareVertexLayout();
|
MakeSoftwareVertexLayout();
|
||||||
@@ -96,15 +89,31 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
|||||||
|
|
||||||
default_sampler = CreateSampler(default_sampler_info);
|
default_sampler = CreateSampler(default_sampler_info);
|
||||||
|
|
||||||
|
const vk::Device device = instance.GetDevice();
|
||||||
|
texture_lf_view = device.createBufferView({
|
||||||
|
.buffer = texture_lf_buffer.Handle(),
|
||||||
|
.format = vk::Format::eR32G32Sfloat,
|
||||||
|
.offset = 0,
|
||||||
|
.range = VK_WHOLE_SIZE,
|
||||||
|
});
|
||||||
|
texture_rg_view = device.createBufferView({
|
||||||
|
.buffer = texture_buffer.Handle(),
|
||||||
|
.format = vk::Format::eR32G32Sfloat,
|
||||||
|
.offset = 0,
|
||||||
|
.range = VK_WHOLE_SIZE,
|
||||||
|
});
|
||||||
|
texture_rgba_view = device.createBufferView({
|
||||||
|
.buffer = texture_buffer.Handle(),
|
||||||
|
.format = vk::Format::eR32G32B32A32Sfloat,
|
||||||
|
.offset = 0,
|
||||||
|
.range = VK_WHOLE_SIZE,
|
||||||
|
});
|
||||||
|
|
||||||
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
|
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
|
||||||
// all descriptor sets even the ones we don't use. Use default_texture for this
|
// all descriptor sets even the ones we don't use. Use default_texture for this
|
||||||
const u32 vs_uniform_size = sizeof(Pica::Shader::VSUniformData);
|
pipeline_cache.BindTexelBuffer(2, texture_lf_view);
|
||||||
const u32 fs_uniform_size = sizeof(Pica::Shader::UniformData);
|
pipeline_cache.BindTexelBuffer(3, texture_rg_view);
|
||||||
pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), 0, vs_uniform_size);
|
pipeline_cache.BindTexelBuffer(4, texture_rgba_view);
|
||||||
pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), vs_uniform_size, fs_uniform_size);
|
|
||||||
pipeline_cache.BindTexelBuffer(2, texture_lf_buffer.GetView());
|
|
||||||
pipeline_cache.BindTexelBuffer(3, texture_buffer.GetView(0));
|
|
||||||
pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1));
|
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
pipeline_cache.BindTexture(i, null_surface.GetImageView());
|
pipeline_cache.BindTexture(i, null_surface.GetImageView());
|
||||||
@@ -122,8 +131,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
|||||||
|
|
||||||
RasterizerVulkan::~RasterizerVulkan() {
|
RasterizerVulkan::~RasterizerVulkan() {
|
||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
|
const vk::Device device = instance.GetDevice();
|
||||||
vk::Device device = instance.GetDevice();
|
|
||||||
|
|
||||||
for (auto& [key, sampler] : samplers) {
|
for (auto& [key, sampler] : samplers) {
|
||||||
device.destroySampler(sampler);
|
device.destroySampler(sampler);
|
||||||
@@ -134,6 +142,9 @@ RasterizerVulkan::~RasterizerVulkan() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
device.destroySampler(default_sampler);
|
device.destroySampler(default_sampler);
|
||||||
|
device.destroyBufferView(texture_lf_view);
|
||||||
|
device.destroyBufferView(texture_rg_view);
|
||||||
|
device.destroyBufferView(texture_rgba_view);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
|
void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||||
@@ -189,7 +200,7 @@ void RasterizerVulkan::SyncFixedState() {
|
|||||||
|
|
||||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||||
u32 vs_input_index_max) {
|
u32 vs_input_index_max) {
|
||||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size);
|
auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||||
@@ -262,11 +273,11 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
|||||||
|
|
||||||
// Keep track of the binding offsets so we can bind the vertex buffer later
|
// Keep track of the binding offsets so we can bind the vertex buffer later
|
||||||
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
|
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
|
||||||
buffer_offset += Common::AlignUp(data_size, 16);
|
buffer_offset += Common::AlignUp(data_size, 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
|
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
|
||||||
vertex_buffer.Commit(buffer_offset);
|
stream_buffer.Commit(buffer_offset);
|
||||||
|
|
||||||
// Assign the rest of the attributes to the last binding
|
// Assign the rest of the attributes to the last binding
|
||||||
SetupFixedAttribs();
|
SetupFixedAttribs();
|
||||||
@@ -283,7 +294,7 @@ void RasterizerVulkan::SetupFixedAttribs() {
|
|||||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||||
VertexLayout& layout = pipeline_info.vertex_layout;
|
VertexLayout& layout = pipeline_info.vertex_layout;
|
||||||
|
|
||||||
auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
|
auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0);
|
||||||
|
|
||||||
// Reserve the last binding for fixed and default attributes
|
// Reserve the last binding for fixed and default attributes
|
||||||
// Place the default attrib at offset zero for easy access
|
// Place the default attrib at offset zero for easy access
|
||||||
@@ -336,7 +347,7 @@ void RasterizerVulkan::SetupFixedAttribs() {
|
|||||||
binding.fixed.Assign(1);
|
binding.fixed.Assign(1);
|
||||||
binding.stride.Assign(offset);
|
binding.stride.Assign(offset);
|
||||||
|
|
||||||
vertex_buffer.Commit(offset);
|
stream_buffer.Commit(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
|
||||||
@@ -430,7 +441,7 @@ void RasterizerVulkan::SetupIndexArray() {
|
|||||||
regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
|
regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
|
||||||
regs.pipeline.index_array.offset);
|
regs.pipeline.index_array.offset);
|
||||||
|
|
||||||
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size);
|
auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
|
||||||
if (index_u8 && !native_u8) {
|
if (index_u8 && !native_u8) {
|
||||||
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
|
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
|
||||||
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
|
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
|
||||||
@@ -440,11 +451,11 @@ void RasterizerVulkan::SetupIndexArray() {
|
|||||||
std::memcpy(index_ptr, index_data, index_buffer_size);
|
std::memcpy(index_ptr, index_data, index_buffer_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
index_buffer.Commit(index_buffer_size);
|
stream_buffer.Commit(index_buffer_size);
|
||||||
|
|
||||||
scheduler.Record([this, index_offset = index_offset,
|
scheduler.Record([this, index_offset = index_offset,
|
||||||
index_type = index_type](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
index_type = index_type](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||||
render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type);
|
render_cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -759,13 +770,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
|||||||
const u32 vertex_size = vertices * sizeof(HardwareVertex);
|
const u32 vertex_size = vertices * sizeof(HardwareVertex);
|
||||||
|
|
||||||
// Copy vertex data
|
// Copy vertex data
|
||||||
auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size);
|
auto [array_ptr, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex));
|
||||||
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
|
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
|
||||||
vertex_buffer.Commit(vertex_size);
|
stream_buffer.Commit(vertex_size);
|
||||||
|
|
||||||
scheduler.Record([this, vertices, base_vertex,
|
scheduler.Record([this, vertices, base_vertex,
|
||||||
offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
||||||
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
|
render_cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset);
|
||||||
render_cmdbuf.draw(vertices, 1, base_vertex, 0);
|
render_cmdbuf.draw(vertices, 1, base_vertex, 0);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -787,11 +798,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
|||||||
depth_surface);
|
depth_surface);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int submit_threshold = 50;
|
static int submit_threshold = 20;
|
||||||
submit_threshold--;
|
submit_threshold--;
|
||||||
if (!submit_threshold) {
|
if (!submit_threshold) {
|
||||||
submit_threshold = 50;
|
submit_threshold = 20;
|
||||||
scheduler.DispatchWork();
|
scheduler.Flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
return succeeded;
|
return succeeded;
|
||||||
@@ -1152,14 +1163,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info)
|
|||||||
return device.createFramebuffer(framebuffer_info);
|
return device.createFramebuffer(framebuffer_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushBuffers() {
|
|
||||||
vertex_buffer.Flush();
|
|
||||||
uniform_buffer.Flush();
|
|
||||||
index_buffer.Flush();
|
|
||||||
texture_buffer.Flush();
|
|
||||||
texture_lf_buffer.Flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
void RasterizerVulkan::SyncClipEnabled() {
|
void RasterizerVulkan::SyncClipEnabled() {
|
||||||
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
||||||
}
|
}
|
||||||
@@ -1294,7 +1297,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::size_t bytes_used = 0;
|
std::size_t bytes_used = 0;
|
||||||
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size);
|
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||||
|
|
||||||
// Sync the lighting luts
|
// Sync the lighting luts
|
||||||
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
|
||||||
@@ -1360,7 +1363,7 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::size_t bytes_used = 0;
|
std::size_t bytes_used = 0;
|
||||||
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size);
|
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||||
|
|
||||||
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
||||||
auto SyncProcTexValueLUT =
|
auto SyncProcTexValueLUT =
|
||||||
@@ -1460,16 +1463,16 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 used_bytes = 0;
|
const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs;
|
||||||
const u32 uniform_size = static_cast<u32>(uniform_size_aligned_vs + uniform_size_aligned_fs);
|
auto [uniforms, offset, invalidate] = stream_buffer.Map(uniform_size, uniform_buffer_alignment);
|
||||||
auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size);
|
|
||||||
|
|
||||||
|
u32 used_bytes = 0;
|
||||||
if (sync_vs) {
|
if (sync_vs) {
|
||||||
Pica::Shader::VSUniformData vs_uniforms;
|
Pica::Shader::VSUniformData vs_uniforms;
|
||||||
vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs);
|
vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs);
|
||||||
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
||||||
|
|
||||||
pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), offset + used_bytes,
|
pipeline_cache.BindBuffer(0, stream_buffer.Handle(), offset + used_bytes,
|
||||||
sizeof(vs_uniforms));
|
sizeof(vs_uniforms));
|
||||||
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
|
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
|
||||||
}
|
}
|
||||||
@@ -1478,13 +1481,13 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
|
|||||||
std::memcpy(uniforms + used_bytes, &uniform_block_data.data,
|
std::memcpy(uniforms + used_bytes, &uniform_block_data.data,
|
||||||
sizeof(Pica::Shader::UniformData));
|
sizeof(Pica::Shader::UniformData));
|
||||||
|
|
||||||
pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), offset + used_bytes,
|
pipeline_cache.BindBuffer(1, stream_buffer.Handle(), offset + used_bytes,
|
||||||
sizeof(uniform_block_data.data));
|
sizeof(uniform_block_data.data));
|
||||||
uniform_block_data.dirty = false;
|
uniform_block_data.dirty = false;
|
||||||
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
|
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
uniform_buffer.Commit(used_bytes);
|
stream_buffer.Commit(used_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@@ -7,6 +7,7 @@
|
|||||||
#include "core/hw/gpu.h"
|
#include "core/hw/gpu.h"
|
||||||
#include "video_core/rasterizer_accelerated.h"
|
#include "video_core/rasterizer_accelerated.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||||
|
|
||||||
@@ -101,9 +102,6 @@ public:
|
|||||||
/// Sync fixed function pipeline state
|
/// Sync fixed function pipeline state
|
||||||
void SyncFixedState();
|
void SyncFixedState();
|
||||||
|
|
||||||
/// Flushes all rasterizer owned buffers
|
|
||||||
void FlushBuffers();
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
|
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
|
||||||
|
|
||||||
@@ -201,16 +199,17 @@ private:
|
|||||||
SamplerInfo texture_cube_sampler;
|
SamplerInfo texture_cube_sampler;
|
||||||
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
|
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
|
||||||
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
|
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
|
||||||
|
|
||||||
StreamBuffer vertex_buffer;
|
|
||||||
StreamBuffer uniform_buffer;
|
|
||||||
StreamBuffer index_buffer;
|
|
||||||
StreamBuffer texture_buffer;
|
|
||||||
StreamBuffer texture_lf_buffer;
|
|
||||||
PipelineInfo pipeline_info;
|
PipelineInfo pipeline_info;
|
||||||
std::size_t uniform_buffer_alignment;
|
|
||||||
std::size_t uniform_size_aligned_vs;
|
StreamBuffer stream_buffer; ///< Vertex+Index+Uniform buffer
|
||||||
std::size_t uniform_size_aligned_fs;
|
StreamBuffer texture_buffer; ///< Texture buffer
|
||||||
|
StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer
|
||||||
|
vk::BufferView texture_lf_view;
|
||||||
|
vk::BufferView texture_rg_view;
|
||||||
|
vk::BufferView texture_rgba_view;
|
||||||
|
u64 uniform_buffer_alignment;
|
||||||
|
u64 uniform_size_aligned_vs;
|
||||||
|
u64 uniform_size_aligned_fs;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@@ -26,10 +26,9 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf,
|
|||||||
last = nullptr;
|
last = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
|
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache)
|
||||||
RendererVulkan& renderer)
|
: instance{instance}, renderpass_cache{renderpass_cache}, master_semaphore{instance},
|
||||||
: instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer},
|
command_pool{instance, master_semaphore}, stop_requested{false},
|
||||||
master_semaphore{instance}, command_pool{instance, master_semaphore}, stop_requested{false},
|
|
||||||
use_worker_thread{Settings::values.async_command_recording} {
|
use_worker_thread{Settings::values.async_command_recording} {
|
||||||
AllocateWorkerCommandBuffers();
|
AllocateWorkerCommandBuffers();
|
||||||
if (use_worker_thread) {
|
if (use_worker_thread) {
|
||||||
@@ -133,10 +132,9 @@ void Scheduler::AllocateWorkerCommandBuffers() {
|
|||||||
|
|
||||||
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
|
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
|
||||||
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
|
||||||
const auto handle = master_semaphore.Handle();
|
const vk::Semaphore handle = master_semaphore.Handle();
|
||||||
const u64 signal_value = master_semaphore.NextTick();
|
const u64 signal_value = master_semaphore.NextTick();
|
||||||
state = StateFlags::AllDirty;
|
state = StateFlags::AllDirty;
|
||||||
renderer.FlushBuffers();
|
|
||||||
|
|
||||||
renderpass_cache.ExitRenderpass();
|
renderpass_cache.ExitRenderpass();
|
||||||
Record([signal_semaphore, wait_semaphore, handle, signal_value,
|
Record([signal_semaphore, wait_semaphore, handle, signal_value,
|
||||||
|
@@ -28,14 +28,12 @@ DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
|
|||||||
|
|
||||||
class Instance;
|
class Instance;
|
||||||
class RenderpassCache;
|
class RenderpassCache;
|
||||||
class RendererVulkan;
|
|
||||||
|
|
||||||
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
|
||||||
/// OpenGL-like operations on Vulkan command buffers.
|
/// OpenGL-like operations on Vulkan command buffers.
|
||||||
class Scheduler {
|
class Scheduler {
|
||||||
public:
|
public:
|
||||||
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
|
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache);
|
||||||
RendererVulkan& renderer);
|
|
||||||
~Scheduler();
|
~Scheduler();
|
||||||
|
|
||||||
/// Sends the current execution context to the GPU.
|
/// Sends the current execution context to the GPU.
|
||||||
@@ -198,7 +196,6 @@ private:
|
|||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
RenderpassCache& renderpass_cache;
|
RenderpassCache& renderpass_cache;
|
||||||
RendererVulkan& renderer;
|
|
||||||
MasterSemaphore master_semaphore;
|
MasterSemaphore master_semaphore;
|
||||||
CommandPool command_pool;
|
CommandPool command_pool;
|
||||||
std::unique_ptr<CommandChunk> chunk;
|
std::unique_ptr<CommandChunk> chunk;
|
||||||
|
@@ -1,243 +1,155 @@
|
|||||||
// Copyright 2022 Citra Emulator Project
|
// Copyright 2019 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <limits>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/logging/log.h"
|
|
||||||
#include "common/microprofile.h"
|
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
|
||||||
#include <vk_mem_alloc.h>
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
[[nodiscard]] vk::AccessFlags MakeAccessFlags(vk::BufferUsageFlagBits usage) {
|
namespace {
|
||||||
switch (usage) {
|
|
||||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
|
||||||
return vk::AccessFlagBits::eVertexAttributeRead;
|
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
|
||||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
|
||||||
return vk::AccessFlagBits::eIndexRead;
|
/// Find a memory type with the passed requirements
|
||||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
|
||||||
return vk::AccessFlagBits::eUniformRead;
|
vk::MemoryPropertyFlags wanted,
|
||||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
u32 filter = std::numeric_limits<u32>::max()) {
|
||||||
return vk::AccessFlagBits::eShaderRead;
|
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
|
||||||
default:
|
const auto flags = properties.memoryTypes[i].propertyFlags;
|
||||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
|
||||||
UNREACHABLE();
|
return i;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return vk::AccessFlagBits::eNone;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] vk::PipelineStageFlags MakePipelineStage(vk::BufferUsageFlagBits usage) {
|
/// Get the preferred host visible memory type.
|
||||||
switch (usage) {
|
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, bool readback,
|
||||||
case vk::BufferUsageFlagBits::eVertexBuffer:
|
u32 filter = std::numeric_limits<u32>::max()) {
|
||||||
return vk::PipelineStageFlagBits::eVertexInput;
|
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
|
||||||
case vk::BufferUsageFlagBits::eIndexBuffer:
|
// Otherwise search for a host visible allocation.
|
||||||
return vk::PipelineStageFlagBits::eVertexInput;
|
const vk::MemoryPropertyFlags HOST_MEMORY =
|
||||||
case vk::BufferUsageFlagBits::eUniformBuffer:
|
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
|
||||||
return vk::PipelineStageFlagBits::eVertexShader |
|
const vk::MemoryPropertyFlags DYNAMIC_MEMORY =
|
||||||
vk::PipelineStageFlagBits::eFragmentShader;
|
HOST_MEMORY | (readback ? vk::MemoryPropertyFlagBits::eHostCached
|
||||||
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
|
: vk::MemoryPropertyFlagBits::eDeviceLocal);
|
||||||
return vk::PipelineStageFlagBits::eFragmentShader;
|
|
||||||
default:
|
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
|
||||||
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
|
if (!preferred_type) {
|
||||||
UNREACHABLE();
|
preferred_type = FindMemoryType(properties, HOST_MEMORY);
|
||||||
|
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
|
||||||
}
|
}
|
||||||
return vk::PipelineStageFlagBits::eNone;
|
return preferred_type.value_or(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
|
} // Anonymous namespace
|
||||||
: instance{instance} {
|
|
||||||
const vk::BufferUsageFlags usage =
|
|
||||||
readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc;
|
|
||||||
const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};
|
|
||||||
|
|
||||||
const VmaAllocationCreateFlags flags =
|
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
|
||||||
readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
|
vk::BufferUsageFlags usage_, u64 size, bool readback_)
|
||||||
: VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
: instance{instance_}, scheduler{scheduler_}, usage{usage_}, readback{readback_} {
|
||||||
const VmaAllocationCreateInfo alloc_create_info = {
|
CreateBuffers(size);
|
||||||
.flags = flags | VMA_ALLOCATION_CREATE_MAPPED_BIT,
|
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
|
||||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
|
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
|
||||||
};
|
|
||||||
|
|
||||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
|
||||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
|
||||||
VmaAllocationInfo alloc_info;
|
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
|
||||||
|
|
||||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
|
|
||||||
&alloc_info);
|
|
||||||
|
|
||||||
buffer = vk::Buffer{unsafe_buffer};
|
|
||||||
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
|
|
||||||
}
|
|
||||||
|
|
||||||
StagingBuffer::~StagingBuffer() {
|
|
||||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
|
|
||||||
}
|
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback)
|
|
||||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, total_size{size},
|
|
||||||
bucket_size{size / BUCKET_COUNT}, readback{readback} {}
|
|
||||||
|
|
||||||
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
|
||||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
|
|
||||||
bool readback)
|
|
||||||
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, usage{usage},
|
|
||||||
total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {
|
|
||||||
|
|
||||||
const vk::BufferCreateInfo buffer_info = {
|
|
||||||
.size = total_size,
|
|
||||||
.usage = usage | vk::BufferUsageFlagBits::eTransferDst,
|
|
||||||
};
|
|
||||||
|
|
||||||
const VmaAllocationCreateInfo alloc_create_info = {
|
|
||||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
|
||||||
};
|
|
||||||
|
|
||||||
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
|
|
||||||
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
|
|
||||||
VmaAllocationInfo alloc_info;
|
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
|
||||||
|
|
||||||
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
|
|
||||||
&alloc_info);
|
|
||||||
|
|
||||||
gpu_buffer = vk::Buffer{unsafe_buffer};
|
|
||||||
|
|
||||||
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
|
|
||||||
|
|
||||||
vk::Device device = instance.GetDevice();
|
|
||||||
for (std::size_t i = 0; i < view_formats.size(); i++) {
|
|
||||||
const vk::BufferViewCreateInfo view_info = {
|
|
||||||
.buffer = gpu_buffer,
|
|
||||||
.format = view_formats[i],
|
|
||||||
.offset = 0,
|
|
||||||
.range = total_size,
|
|
||||||
};
|
|
||||||
|
|
||||||
views[i] = device.createBufferView(view_info);
|
|
||||||
}
|
|
||||||
|
|
||||||
view_count = view_formats.size();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
StreamBuffer::~StreamBuffer() {
|
StreamBuffer::~StreamBuffer() {
|
||||||
if (gpu_buffer) {
|
const vk::Device device = instance.GetDevice();
|
||||||
vk::Device device = instance.GetDevice();
|
device.unmapMemory(memory);
|
||||||
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(gpu_buffer), allocation);
|
device.destroyBuffer(buffer);
|
||||||
for (std::size_t i = 0; i < view_count; i++) {
|
device.freeMemory(memory);
|
||||||
device.destroyBufferView(views[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size) {
|
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||||
ASSERT(size <= total_size);
|
ASSERT(size <= stream_buffer_size);
|
||||||
size = Common::AlignUp(size, 16);
|
mapped_size = size;
|
||||||
|
|
||||||
Bucket& bucket = buckets[bucket_index];
|
if (alignment > 0) {
|
||||||
|
offset = Common::AlignUp(offset, alignment);
|
||||||
if (bucket.cursor + size > bucket_size) {
|
|
||||||
bucket.gpu_tick = scheduler.CurrentTick();
|
|
||||||
MoveNextBucket();
|
|
||||||
return Map(size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool invalidate = std::exchange(bucket.invalid, false);
|
WaitPendingOperations(offset);
|
||||||
const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
|
|
||||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
|
||||||
|
|
||||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
bool invalidate{false};
|
||||||
|
if (offset + size > stream_buffer_size) {
|
||||||
|
// The buffer would overflow, save the amount of used watches and reset the state.
|
||||||
|
invalidate = true;
|
||||||
|
invalidation_mark = current_watch_cursor;
|
||||||
|
current_watch_cursor = 0;
|
||||||
|
offset = 0;
|
||||||
|
|
||||||
|
// Swap watches and reset waiting cursors.
|
||||||
|
std::swap(previous_watches, current_watches);
|
||||||
|
wait_cursor = 0;
|
||||||
|
wait_bound = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_tuple(mapped + offset, offset, invalidate);
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::Commit(u32 size) {
|
void StreamBuffer::Commit(u64 size) {
|
||||||
size = Common::AlignUp(size, 16);
|
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
|
||||||
buckets[bucket_index].cursor += size;
|
size);
|
||||||
|
|
||||||
|
offset += size;
|
||||||
|
|
||||||
|
if (current_watch_cursor + 1 >= current_watches.size()) {
|
||||||
|
// Ensure that there are enough watches.
|
||||||
|
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
|
||||||
|
}
|
||||||
|
auto& watch = current_watches[current_watch_cursor++];
|
||||||
|
watch.upper_bound = offset;
|
||||||
|
watch.tick = scheduler.CurrentTick();
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::Flush() {
|
void StreamBuffer::CreateBuffers(u64 prefered_size) {
|
||||||
if (readback) {
|
const vk::Device device = instance.GetDevice();
|
||||||
LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
|
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
|
||||||
|
const u32 preferred_type = GetMemoryType(memory_properties, readback);
|
||||||
|
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
|
||||||
|
|
||||||
|
// Substract from the preferred heap size some bytes to avoid getting out of memory.
|
||||||
|
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
|
||||||
|
// As per DXVK's example, using `heap_size / 2`
|
||||||
|
const VkDeviceSize allocable_size = heap_size / 2;
|
||||||
|
buffer = device.createBuffer({
|
||||||
|
.size = std::min(prefered_size, allocable_size),
|
||||||
|
.usage = usage,
|
||||||
|
});
|
||||||
|
|
||||||
|
const auto requirements = device.getBufferMemoryRequirements(buffer);
|
||||||
|
const u32 required_flags = requirements.memoryTypeBits;
|
||||||
|
stream_buffer_size = static_cast<u64>(requirements.size);
|
||||||
|
|
||||||
|
memory = device.allocateMemory({
|
||||||
|
.allocationSize = requirements.size,
|
||||||
|
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
|
||||||
|
});
|
||||||
|
|
||||||
|
device.bindBufferMemory(buffer, memory, 0);
|
||||||
|
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
|
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
|
||||||
|
watches.resize(watches.size() + grow_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
|
||||||
|
if (!invalidation_mark) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
|
||||||
Bucket& bucket = buckets[bucket_index];
|
auto& watch = previous_watches[wait_cursor];
|
||||||
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
wait_bound = watch.upper_bound;
|
||||||
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
scheduler.Wait(watch.tick);
|
||||||
ASSERT(flush_size <= bucket_size);
|
++wait_cursor;
|
||||||
ASSERT(flush_start + flush_size <= total_size);
|
|
||||||
|
|
||||||
// Ensure all staging writes are visible to the host memory domain
|
|
||||||
if (flush_size > 0) [[likely]] {
|
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
|
||||||
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
|
||||||
if (gpu_buffer) {
|
|
||||||
scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer,
|
|
||||||
vk::CommandBuffer upload_cmdbuf) {
|
|
||||||
const vk::BufferCopy copy_region = {
|
|
||||||
.srcOffset = flush_start,
|
|
||||||
.dstOffset = flush_start,
|
|
||||||
.size = flush_size,
|
|
||||||
};
|
|
||||||
|
|
||||||
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
|
|
||||||
|
|
||||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
|
||||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
|
||||||
.dstAccessMask = MakeAccessFlags(usage),
|
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
|
||||||
.buffer = gpu_buffer,
|
|
||||||
.offset = flush_start,
|
|
||||||
.size = flush_size,
|
|
||||||
};
|
|
||||||
|
|
||||||
upload_cmdbuf.pipelineBarrier(
|
|
||||||
vk::PipelineStageFlagBits::eTransfer, MakePipelineStage(usage),
|
|
||||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
bucket.flush_cursor += flush_size;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamBuffer::Invalidate() {
|
|
||||||
if (!readback) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
Bucket& bucket = buckets[bucket_index];
|
|
||||||
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
|
|
||||||
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
|
|
||||||
ASSERT(flush_size <= bucket_size);
|
|
||||||
|
|
||||||
if (flush_size > 0) [[likely]] {
|
|
||||||
// Ensure the staging memory can be read by the host
|
|
||||||
VmaAllocator allocator = instance.GetAllocator();
|
|
||||||
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
|
|
||||||
bucket.flush_cursor += flush_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void StreamBuffer::MoveNextBucket() {
|
|
||||||
// Flush and Invalidate are bucket local operations for simplicity so perform them here
|
|
||||||
if (readback) {
|
|
||||||
Invalidate();
|
|
||||||
} else {
|
|
||||||
Flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
bucket_index = (bucket_index + 1) % BUCKET_COUNT;
|
|
||||||
Bucket& next_bucket = buckets[bucket_index];
|
|
||||||
scheduler.Wait(next_bucket.gpu_tick);
|
|
||||||
next_bucket.cursor = 0;
|
|
||||||
next_bucket.flush_cursor = 0;
|
|
||||||
next_bucket.invalid = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@@ -1,101 +1,81 @@
|
|||||||
// Copyright 2022 Citra Emulator Project
|
// Copyright 2019 yuzu Emulator Project
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <optional>
|
||||||
#include <map>
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include "common/assert.h"
|
#include <tuple>
|
||||||
|
#include <vector>
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
VK_DEFINE_HANDLE(VmaAllocation)
|
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
class Instance;
|
class Instance;
|
||||||
class Scheduler;
|
class Scheduler;
|
||||||
|
|
||||||
struct StagingBuffer {
|
class StreamBuffer final {
|
||||||
StagingBuffer(const Instance& instance, u32 size, bool readback);
|
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
|
||||||
~StagingBuffer();
|
|
||||||
|
|
||||||
const Instance& instance;
|
|
||||||
vk::Buffer buffer{};
|
|
||||||
VmaAllocation allocation{};
|
|
||||||
std::span<std::byte> mapped{};
|
|
||||||
};
|
|
||||||
|
|
||||||
class StreamBuffer {
|
|
||||||
static constexpr u32 MAX_BUFFER_VIEWS = 3;
|
|
||||||
static constexpr u32 BUCKET_COUNT = 2;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Staging only constructor
|
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
|
||||||
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback = false);
|
vk::BufferUsageFlags usage, u64 size, bool readback = false);
|
||||||
/// Staging + GPU streaming constructor
|
|
||||||
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
|
|
||||||
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
|
|
||||||
bool readback = false);
|
|
||||||
~StreamBuffer();
|
~StreamBuffer();
|
||||||
|
|
||||||
StreamBuffer(const StreamBuffer&) = delete;
|
/**
|
||||||
StreamBuffer& operator=(const StreamBuffer&) = delete;
|
* Reserves a region of memory from the stream buffer.
|
||||||
|
* @param size Size to reserve.
|
||||||
|
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
|
||||||
|
*/
|
||||||
|
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
|
||||||
|
|
||||||
/// Maps aligned staging memory of size bytes
|
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
|
||||||
std::tuple<u8*, u32, bool> Map(u32 size);
|
void Commit(u64 size);
|
||||||
|
|
||||||
/// Commits size bytes from the currently mapped staging memory
|
vk::Buffer Handle() const noexcept {
|
||||||
void Commit(u32 size = 0);
|
return buffer;
|
||||||
|
|
||||||
/// Flushes staging memory to the GPU buffer
|
|
||||||
void Flush();
|
|
||||||
|
|
||||||
/// Invalidates staging memory for reading
|
|
||||||
void Invalidate();
|
|
||||||
|
|
||||||
/// Returns the GPU buffer handle
|
|
||||||
[[nodiscard]] vk::Buffer GetHandle() const {
|
|
||||||
return gpu_buffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the staging buffer handle
|
u64 Address() const noexcept {
|
||||||
[[nodiscard]] vk::Buffer GetStagingHandle() const {
|
return 0;
|
||||||
return staging.buffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns an immutable reference to the requested buffer view
|
|
||||||
[[nodiscard]] const vk::BufferView& GetView(u32 index = 0) const {
|
|
||||||
ASSERT(index < view_count);
|
|
||||||
return views[index];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Moves to the next bucket
|
struct Watch {
|
||||||
void MoveNextBucket();
|
u64 tick{};
|
||||||
|
u64 upper_bound{};
|
||||||
struct Bucket {
|
|
||||||
bool invalid = false;
|
|
||||||
u32 gpu_tick = 0;
|
|
||||||
u32 cursor = 0;
|
|
||||||
u32 flush_cursor = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Creates Vulkan buffer handles committing the required the required memory.
|
||||||
|
void CreateBuffers(u64 prefered_size);
|
||||||
|
|
||||||
|
/// Increases the amount of watches available.
|
||||||
|
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
|
||||||
|
|
||||||
|
void WaitPendingOperations(u64 requested_upper_bound);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance; ///< Vulkan instance.
|
||||||
Scheduler& scheduler;
|
Scheduler& scheduler; ///< Command scheduler.
|
||||||
StagingBuffer staging;
|
|
||||||
vk::Buffer gpu_buffer{};
|
vk::Buffer buffer; ///< Mapped buffer.
|
||||||
VmaAllocation allocation{};
|
vk::DeviceMemory memory; ///< Memory allocation.
|
||||||
vk::BufferUsageFlagBits usage;
|
u8* mapped{}; ///< Pointer to the mapped memory
|
||||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
u64 stream_buffer_size{}; ///< Stream buffer size.
|
||||||
std::array<Bucket, BUCKET_COUNT> buckets;
|
vk::BufferUsageFlags usage{};
|
||||||
std::size_t view_count = 0;
|
bool readback{}; ///< Flag indicating if the buffer should use cached memory
|
||||||
u32 total_size = 0;
|
|
||||||
u32 bucket_size = 0;
|
u64 offset{}; ///< Buffer iterator.
|
||||||
u32 bucket_index = 0;
|
u64 mapped_size{}; ///< Size reserved for the current copy.
|
||||||
bool readback = false;
|
|
||||||
|
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
|
||||||
|
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
|
||||||
|
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
|
||||||
|
|
||||||
|
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
|
||||||
|
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
|
||||||
|
u64 wait_bound{}; ///< Highest offset being watched for completion.
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
@@ -103,16 +103,16 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
|
|||||||
return depth_offset;
|
return depth_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
|
constexpr u64 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||||
|
|
||||||
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
|
||||||
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
|
||||||
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
|
||||||
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager},
|
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager},
|
||||||
upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE}, download_buffer{instance, scheduler,
|
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
|
||||||
DOWNLOAD_BUFFER_SIZE,
|
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
|
||||||
true} {
|
DOWNLOAD_BUFFER_SIZE, true} {
|
||||||
|
|
||||||
auto Register = [this](VideoCore::PixelFormat dest,
|
auto Register = [this](VideoCore::PixelFormat dest,
|
||||||
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||||
@@ -153,25 +153,20 @@ TextureRuntime::~TextureRuntime() {
|
|||||||
|
|
||||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||||
auto& buffer = upload ? upload_buffer : download_buffer;
|
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||||
auto [data, offset, invalidate] = buffer.Map(size);
|
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
||||||
|
|
||||||
return StagingData{
|
return StagingData{
|
||||||
.buffer = buffer.GetStagingHandle(),
|
.buffer = buffer.Handle(),
|
||||||
.size = size,
|
.size = size,
|
||||||
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||||
.buffer_offset = offset,
|
.buffer_offset = offset,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureRuntime::FlushBuffers() {
|
|
||||||
upload_buffer.Flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
|
MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
|
||||||
void TextureRuntime::Finish() {
|
void TextureRuntime::Finish() {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Finish);
|
MICROPROFILE_SCOPE(Vulkan_Finish);
|
||||||
scheduler.Finish();
|
scheduler.Finish();
|
||||||
download_buffer.Invalidate();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||||
@@ -415,7 +410,8 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
|
|||||||
};
|
};
|
||||||
|
|
||||||
if (clear.texture_rect == surface.GetScaledRect()) {
|
if (clear.texture_rect == surface.GetScaledRect()) {
|
||||||
scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf,
|
||||||
|
vk::CommandBuffer) {
|
||||||
const vk::ImageSubresourceRange range = {
|
const vk::ImageSubresourceRange range = {
|
||||||
.aspectMask = params.aspect,
|
.aspectMask = params.aspect,
|
||||||
.baseMipLevel = clear.texture_level,
|
.baseMipLevel = clear.texture_level,
|
||||||
@@ -458,20 +454,25 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
|
render_cmdbuf.pipelineBarrier(params.pipeline_flags,
|
||||||
|
vk::PipelineStageFlagBits::eTransfer,
|
||||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||||
|
|
||||||
const bool is_color = static_cast<bool>(params.aspect & vk::ImageAspectFlagBits::eColor);
|
const bool is_color =
|
||||||
|
static_cast<bool>(params.aspect & vk::ImageAspectFlagBits::eColor);
|
||||||
if (is_color) {
|
if (is_color) {
|
||||||
render_cmdbuf.clearColorImage(params.src_image, vk::ImageLayout::eTransferDstOptimal,
|
render_cmdbuf.clearColorImage(params.src_image,
|
||||||
|
vk::ImageLayout::eTransferDstOptimal,
|
||||||
MakeClearColorValue(value), range);
|
MakeClearColorValue(value), range);
|
||||||
} else {
|
} else {
|
||||||
render_cmdbuf.clearDepthStencilImage(params.src_image, vk::ImageLayout::eTransferDstOptimal,
|
render_cmdbuf.clearDepthStencilImage(params.src_image,
|
||||||
|
vk::ImageLayout::eTransferDstOptimal,
|
||||||
MakeClearDepthStencilValue(value), range);
|
MakeClearDepthStencilValue(value), range);
|
||||||
}
|
}
|
||||||
|
|
||||||
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
|
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
|
||||||
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
|
params.pipeline_flags, vk::DependencyFlagBits::eByRegion,
|
||||||
|
{}, {}, post_barrier);
|
||||||
});
|
});
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -528,34 +529,34 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface,
|
|||||||
.src_image = surface.alloc.image,
|
.src_image = surface.alloc.image,
|
||||||
};
|
};
|
||||||
|
|
||||||
scheduler.Record(
|
scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf,
|
||||||
[params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
vk::CommandBuffer) {
|
||||||
const vk::ImageMemoryBarrier pre_barrier = {
|
const vk::ImageMemoryBarrier pre_barrier = {
|
||||||
.srcAccessMask = params.src_access,
|
.srcAccessMask = params.src_access,
|
||||||
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||||
.oldLayout = vk::ImageLayout::eGeneral,
|
.oldLayout = vk::ImageLayout::eGeneral,
|
||||||
.newLayout = vk::ImageLayout::eGeneral,
|
.newLayout = vk::ImageLayout::eGeneral,
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.image = params.src_image,
|
.image = params.src_image,
|
||||||
.subresourceRange{
|
.subresourceRange{
|
||||||
.aspectMask = params.aspect,
|
.aspectMask = params.aspect,
|
||||||
.baseMipLevel = level,
|
.baseMipLevel = level,
|
||||||
.levelCount = 1,
|
.levelCount = 1,
|
||||||
.baseArrayLayer = 0,
|
.baseArrayLayer = 0,
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
|
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
|
||||||
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
|
||||||
});
|
});
|
||||||
|
|
||||||
renderpass_cache.EnterRenderpass(clear_info);
|
renderpass_cache.EnterRenderpass(clear_info);
|
||||||
renderpass_cache.ExitRenderpass();
|
renderpass_cache.ExitRenderpass();
|
||||||
|
|
||||||
scheduler.Record([params, level = clear.texture_level]
|
scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf,
|
||||||
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
|
vk::CommandBuffer) {
|
||||||
const vk::ImageMemoryBarrier post_barrier = {
|
const vk::ImageMemoryBarrier post_barrier = {
|
||||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||||
.dstAccessMask = params.src_access,
|
.dstAccessMask = params.src_access,
|
||||||
|
@@ -15,13 +15,15 @@
|
|||||||
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
|
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
|
|
||||||
|
VK_DEFINE_HANDLE(VmaAllocation)
|
||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
struct StagingData {
|
struct StagingData {
|
||||||
vk::Buffer buffer;
|
vk::Buffer buffer;
|
||||||
u32 size = 0;
|
u32 size = 0;
|
||||||
std::span<std::byte> mapped{};
|
std::span<std::byte> mapped{};
|
||||||
u32 buffer_offset = 0;
|
u64 buffer_offset = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ImageAlloc {
|
struct ImageAlloc {
|
||||||
@@ -127,9 +129,6 @@ public:
|
|||||||
/// Generates mipmaps for all the available levels of the texture
|
/// Generates mipmaps for all the available levels of the texture
|
||||||
void GenerateMipmaps(Surface& surface, u32 max_level);
|
void GenerateMipmaps(Surface& surface, u32 max_level);
|
||||||
|
|
||||||
/// Flushes staging buffers
|
|
||||||
void FlushBuffers();
|
|
||||||
|
|
||||||
/// Returns all source formats that support reinterpretation to the dest format
|
/// Returns all source formats that support reinterpretation to the dest format
|
||||||
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
||||||
VideoCore::PixelFormat dest_format) const;
|
VideoCore::PixelFormat dest_format) const;
|
||||||
|
Reference in New Issue
Block a user