renderer_vulkan: Rewrite data streaming

* Most GPUs nowadays provide a device local/host visible memory heap which is useful for avoiding copies between staging and local memory and especially beneficial for mobile and APUs that are mostly the target of this backend.

* This commit ports the old yuzu stream buffer with some changes to suit our needs and gets rid of the buffer flush methods
This commit is contained in:
GPUCode
2022-12-30 11:06:15 +02:00
parent 410b8b8809
commit 0e987959a6
13 changed files with 319 additions and 430 deletions

View File

@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <limits>
#include "common/alignment.h"
#include "core/memory.h"
#include "video_core/pica_state.h"
#include "video_core/rasterizer_accelerated.h"
@@ -210,7 +211,7 @@ RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray
u32 vs_input_size = 0;
for (const auto& loader : vertex_attributes.attribute_loaders) {
if (loader.component_count != 0) {
vs_input_size += loader.byte_count * vertex_num;
vs_input_size += Common::AlignUp(loader.byte_count * vertex_num, 4);
}
}

View File

@@ -98,13 +98,12 @@ RendererVulkan::RendererVulkan(Frontend::EmuWindow& window, Frontend::EmuWindow*
: RendererBase{window, secondary_window},
telemetry_session{Core::System::GetInstance().TelemetrySession()},
instance{window, Settings::values.physical_device.GetValue()}, scheduler{instance,
renderpass_cache,
*this},
renderpass_cache},
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
runtime{instance, scheduler, renderpass_cache, desc_manager}, swapchain{instance, scheduler,
renderpass_cache},
vertex_buffer{
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
vertex_buffer{instance, scheduler, vk::BufferUsageFlagBits::eVertexBuffer,
VERTEX_BUFFER_SIZE},
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
Report();
window.mailbox = nullptr;
@@ -601,7 +600,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
const auto& texcoords = screen_info.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
const std::array vertices = {
ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
@@ -633,7 +632,7 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
@@ -643,7 +642,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
const auto& texcoords = screen_info.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
const std::array vertices = {
ScreenRectVertex{x, y, texcoords.bottom, texcoords.right},
@@ -672,7 +671,7 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
@@ -683,7 +682,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
const auto& texcoords = screen_info_l.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
const std::array vertices = {ScreenRectVertex{x, y, texcoords.bottom, texcoords.left},
ScreenRectVertex{x + w, y, texcoords.bottom, texcoords.right},
@@ -712,7 +711,7 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
@@ -723,7 +722,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
const auto& texcoords = screen_info_l.display_texcoords;
u32 size = sizeof(ScreenRectVertex) * 4;
auto [ptr, offset, invalidate] = vertex_buffer.Map(size);
auto [ptr, offset, invalidate] = vertex_buffer.Map(size, 16);
const std::array<ScreenRectVertex, 4> vertices = {{
ScreenRectVertex(x, y, texcoords.bottom, texcoords.right),
@@ -754,7 +753,7 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.Handle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
@@ -967,12 +966,6 @@ void RendererVulkan::SwapBuffers() {
}
}
void RendererVulkan::FlushBuffers() {
vertex_buffer.Flush();
rasterizer.FlushBuffers();
runtime.FlushBuffers();
}
void RendererVulkan::Report() const {
const std::string vendor_name{instance.GetVendorName()};
const std::string model_name{instance.GetModelName()};

View File

@@ -78,7 +78,6 @@ public:
void PrepareVideoDumping() override {}
void CleanupVideoDumping() override {}
void Sync() override;
void FlushBuffers();
private:
void ReloadSampler();

View File

@@ -238,6 +238,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index)
physical_device = physical_devices[physical_device_index];
properties = physical_device.getProperties();
limits = properties.limits;
LOG_INFO(Render_Vulkan, "Creating logical device for physical device: {}",
properties.deviceName);

View File

@@ -177,7 +177,12 @@ public:
/// Returns the minimum required alignment for uniforms
vk::DeviceSize UniformMinAlignment() const {
return properties.limits.minUniformBufferOffsetAlignment;
return limits.minUniformBufferOffsetAlignment;
}
/// Returns the maximum supported elements in a texel buffer
u32 MaxTexelBufferElements() const {
return limits.maxTexelBufferElements;
}
private:
@@ -204,6 +209,7 @@ private:
vk::SurfaceKHR surface;
vk::PhysicalDeviceProperties properties;
vk::PhysicalDeviceFeatures features;
vk::PhysicalDeviceLimits limits;
vk::DriverIdKHR driver_id;
vk::DebugUtilsMessengerEXT debug_messenger;
std::string vendor_name;

View File

@@ -17,23 +17,16 @@
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/video_core.h"
#include <vk_mem_alloc.h>
namespace Vulkan {
constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 TEXTURE_BUFFER_SIZE = 512 * 1024;
constexpr u64 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
constexpr u64 TEXTURE_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr std::array TEXTURE_BUFFER_LF_FORMATS = {
vk::Format::eR32G32Sfloat,
};
constexpr vk::BufferUsageFlags BUFFER_USAGE = vk::BufferUsageFlagBits::eVertexBuffer |
vk::BufferUsageFlagBits::eIndexBuffer |
vk::BufferUsageFlagBits::eUniformBuffer;
constexpr std::array TEXTURE_BUFFER_FORMATS = {
vk::Format::eR32G32Sfloat,
vk::Format::eR32G32B32A32Sfloat,
};
constexpr vk::BufferUsageFlags TEX_BUFFER_USAGE = vk::BufferUsageFlagBits::eUniformTexelBuffer;
constexpr VideoCore::SurfaceParams NULL_PARAMS = {
.width = 1,
@@ -55,6 +48,13 @@ struct DrawParams {
bool is_indexed;
};
[[nodiscard]] u64 TextureBufferSize(const Instance& instance) {
// Use the smallest texel size from the texel views
// which corresponds to eR32G32Sfloat
const u64 max_size = instance.MaxTexelBufferElements() * 8;
return std::min(max_size, TEXTURE_BUFFER_SIZE);
}
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
Scheduler& scheduler, DescriptorManager& desc_manager,
TextureRuntime& runtime, RenderpassCache& renderpass_cache)
@@ -63,24 +63,17 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
vertex_buffer{
instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
uniform_buffer{
instance, scheduler, UNIFORM_BUFFER_SIZE, vk::BufferUsageFlagBits::eUniformBuffer, {}},
index_buffer{
instance, scheduler, INDEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eIndexBuffer, {}},
texture_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_FORMATS},
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
stream_buffer{instance, scheduler, BUFFER_USAGE, VERTEX_BUFFER_SIZE},
texture_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)},
texture_lf_buffer{instance, scheduler, TEX_BUFFER_USAGE, TextureBufferSize(instance)} {
vertex_buffers.fill(vertex_buffer.GetHandle());
vertex_buffers.fill(stream_buffer.Handle());
uniform_buffer_alignment = instance.UniformMinAlignment();
uniform_size_aligned_vs =
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
Common::AlignUp(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
uniform_size_aligned_fs =
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
// Define vertex layout for software shaders
MakeSoftwareVertexLayout();
@@ -96,15 +89,31 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
default_sampler = CreateSampler(default_sampler_info);
const vk::Device device = instance.GetDevice();
texture_lf_view = device.createBufferView({
.buffer = texture_lf_buffer.Handle(),
.format = vk::Format::eR32G32Sfloat,
.offset = 0,
.range = VK_WHOLE_SIZE,
});
texture_rg_view = device.createBufferView({
.buffer = texture_buffer.Handle(),
.format = vk::Format::eR32G32Sfloat,
.offset = 0,
.range = VK_WHOLE_SIZE,
});
texture_rgba_view = device.createBufferView({
.buffer = texture_buffer.Handle(),
.format = vk::Format::eR32G32B32A32Sfloat,
.offset = 0,
.range = VK_WHOLE_SIZE,
});
// Since we don't have access to VK_EXT_descriptor_indexing we need to intiallize
// all descriptor sets even the ones we don't use. Use default_texture for this
const u32 vs_uniform_size = sizeof(Pica::Shader::VSUniformData);
const u32 fs_uniform_size = sizeof(Pica::Shader::UniformData);
pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), 0, vs_uniform_size);
pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), vs_uniform_size, fs_uniform_size);
pipeline_cache.BindTexelBuffer(2, texture_lf_buffer.GetView());
pipeline_cache.BindTexelBuffer(3, texture_buffer.GetView(0));
pipeline_cache.BindTexelBuffer(4, texture_buffer.GetView(1));
pipeline_cache.BindTexelBuffer(2, texture_lf_view);
pipeline_cache.BindTexelBuffer(3, texture_rg_view);
pipeline_cache.BindTexelBuffer(4, texture_rgba_view);
for (u32 i = 0; i < 4; i++) {
pipeline_cache.BindTexture(i, null_surface.GetImageView());
@@ -122,8 +131,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
RasterizerVulkan::~RasterizerVulkan() {
scheduler.Finish();
vk::Device device = instance.GetDevice();
const vk::Device device = instance.GetDevice();
for (auto& [key, sampler] : samplers) {
device.destroySampler(sampler);
@@ -134,6 +142,9 @@ RasterizerVulkan::~RasterizerVulkan() {
}
device.destroySampler(default_sampler);
device.destroyBufferView(texture_lf_view);
device.destroyBufferView(texture_rg_view);
device.destroyBufferView(texture_rgba_view);
}
void RasterizerVulkan::LoadDiskResources(const std::atomic_bool& stop_loading,
@@ -189,7 +200,7 @@ void RasterizerVulkan::SyncFixedState() {
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
u32 vs_input_index_max) {
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size);
auto [array_ptr, array_offset, invalidate] = stream_buffer.Map(vs_input_size, 16);
/**
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
@@ -262,11 +273,11 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
// Keep track of the binding offsets so we can bind the vertex buffer later
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
buffer_offset += Common::AlignUp(data_size, 16);
buffer_offset += Common::AlignUp(data_size, 4);
}
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
vertex_buffer.Commit(buffer_offset);
stream_buffer.Commit(buffer_offset);
// Assign the rest of the attributes to the last binding
SetupFixedAttribs();
@@ -283,7 +294,7 @@ void RasterizerVulkan::SetupFixedAttribs() {
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
VertexLayout& layout = pipeline_info.vertex_layout;
auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
auto [fixed_ptr, fixed_offset, _] = stream_buffer.Map(16 * sizeof(Common::Vec4f), 0);
// Reserve the last binding for fixed and default attributes
// Place the default attrib at offset zero for easy access
@@ -336,7 +347,7 @@ void RasterizerVulkan::SetupFixedAttribs() {
binding.fixed.Assign(1);
binding.stride.Assign(offset);
vertex_buffer.Commit(offset);
stream_buffer.Commit(offset);
}
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
@@ -430,7 +441,7 @@ void RasterizerVulkan::SetupIndexArray() {
regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
regs.pipeline.index_array.offset);
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size);
auto [index_ptr, index_offset, _] = stream_buffer.Map(index_buffer_size, 2);
if (index_u8 && !native_u8) {
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
@@ -440,11 +451,11 @@ void RasterizerVulkan::SetupIndexArray() {
std::memcpy(index_ptr, index_data, index_buffer_size);
}
index_buffer.Commit(index_buffer_size);
stream_buffer.Commit(index_buffer_size);
scheduler.Record([this, index_offset = index_offset,
index_type = index_type](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type);
render_cmdbuf.bindIndexBuffer(stream_buffer.Handle(), index_offset, index_type);
});
}
@@ -759,13 +770,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
const u32 vertex_size = vertices * sizeof(HardwareVertex);
// Copy vertex data
auto [array_ptr, offset, _] = vertex_buffer.Map(vertex_size);
auto [array_ptr, offset, _] = stream_buffer.Map(vertex_size, sizeof(HardwareVertex));
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
vertex_buffer.Commit(vertex_size);
stream_buffer.Commit(vertex_size);
scheduler.Record([this, vertices, base_vertex,
offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
render_cmdbuf.bindVertexBuffers(0, stream_buffer.Handle(), offset);
render_cmdbuf.draw(vertices, 1, base_vertex, 0);
});
}
@@ -787,11 +798,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface);
}
static int submit_threshold = 50;
static int submit_threshold = 20;
submit_threshold--;
if (!submit_threshold) {
submit_threshold = 50;
scheduler.DispatchWork();
submit_threshold = 20;
scheduler.Flush();
}
return succeeded;
@@ -1152,14 +1163,6 @@ vk::Framebuffer RasterizerVulkan::CreateFramebuffer(const FramebufferInfo& info)
return device.createFramebuffer(framebuffer_info);
}
void RasterizerVulkan::FlushBuffers() {
vertex_buffer.Flush();
uniform_buffer.Flush();
index_buffer.Flush();
texture_buffer.Flush();
texture_lf_buffer.Flush();
}
void RasterizerVulkan::SyncClipEnabled() {
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
}
@@ -1294,7 +1297,7 @@ void RasterizerVulkan::SyncAndUploadLUTsLF() {
}
std::size_t bytes_used = 0;
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size);
auto [buffer, offset, invalidate] = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
// Sync the lighting luts
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
@@ -1360,7 +1363,7 @@ void RasterizerVulkan::SyncAndUploadLUTs() {
}
std::size_t bytes_used = 0;
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size);
auto [buffer, offset, invalidate] = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
auto SyncProcTexValueLUT =
@@ -1460,16 +1463,16 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
return;
}
u32 used_bytes = 0;
const u32 uniform_size = static_cast<u32>(uniform_size_aligned_vs + uniform_size_aligned_fs);
auto [uniforms, offset, invalidate] = uniform_buffer.Map(uniform_size);
const u64 uniform_size = uniform_size_aligned_vs + uniform_size_aligned_fs;
auto [uniforms, offset, invalidate] = stream_buffer.Map(uniform_size, uniform_buffer_alignment);
u32 used_bytes = 0;
if (sync_vs) {
Pica::Shader::VSUniformData vs_uniforms;
vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs);
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
pipeline_cache.BindBuffer(0, uniform_buffer.GetHandle(), offset + used_bytes,
pipeline_cache.BindBuffer(0, stream_buffer.Handle(), offset + used_bytes,
sizeof(vs_uniforms));
used_bytes += static_cast<u32>(uniform_size_aligned_vs);
}
@@ -1478,13 +1481,13 @@ void RasterizerVulkan::UploadUniforms(bool accelerate_draw) {
std::memcpy(uniforms + used_bytes, &uniform_block_data.data,
sizeof(Pica::Shader::UniformData));
pipeline_cache.BindBuffer(1, uniform_buffer.GetHandle(), offset + used_bytes,
pipeline_cache.BindBuffer(1, stream_buffer.Handle(), offset + used_bytes,
sizeof(uniform_block_data.data));
uniform_block_data.dirty = false;
used_bytes += static_cast<u32>(uniform_size_aligned_fs);
}
uniform_buffer.Commit(used_bytes);
stream_buffer.Commit(used_bytes);
}
} // namespace Vulkan

View File

@@ -7,6 +7,7 @@
#include "core/hw/gpu.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
@@ -101,9 +102,6 @@ public:
/// Sync fixed function pipeline state
void SyncFixedState();
/// Flushes all rasterizer owned buffers
void FlushBuffers();
private:
void NotifyFixedFunctionPicaRegisterChanged(u32 id) override;
@@ -201,16 +199,17 @@ private:
SamplerInfo texture_cube_sampler;
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
std::unordered_map<FramebufferInfo, vk::Framebuffer> framebuffers;
StreamBuffer vertex_buffer;
StreamBuffer uniform_buffer;
StreamBuffer index_buffer;
StreamBuffer texture_buffer;
StreamBuffer texture_lf_buffer;
PipelineInfo pipeline_info;
std::size_t uniform_buffer_alignment;
std::size_t uniform_size_aligned_vs;
std::size_t uniform_size_aligned_fs;
StreamBuffer stream_buffer; ///< Vertex+Index+Uniform buffer
StreamBuffer texture_buffer; ///< Texture buffer
StreamBuffer texture_lf_buffer; ///< Texture Light-Fog buffer
vk::BufferView texture_lf_view;
vk::BufferView texture_rg_view;
vk::BufferView texture_rgba_view;
u64 uniform_buffer_alignment;
u64 uniform_size_aligned_vs;
u64 uniform_size_aligned_fs;
};
} // namespace Vulkan

View File

@@ -26,10 +26,9 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf,
last = nullptr;
}
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
RendererVulkan& renderer)
: instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer},
master_semaphore{instance}, command_pool{instance, master_semaphore}, stop_requested{false},
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache)
: instance{instance}, renderpass_cache{renderpass_cache}, master_semaphore{instance},
command_pool{instance, master_semaphore}, stop_requested{false},
use_worker_thread{Settings::values.async_command_recording} {
AllocateWorkerCommandBuffers();
if (use_worker_thread) {
@@ -133,10 +132,9 @@ void Scheduler::AllocateWorkerCommandBuffers() {
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
const auto handle = master_semaphore.Handle();
const vk::Semaphore handle = master_semaphore.Handle();
const u64 signal_value = master_semaphore.NextTick();
state = StateFlags::AllDirty;
renderer.FlushBuffers();
renderpass_cache.ExitRenderpass();
Record([signal_semaphore, wait_semaphore, handle, signal_value,

View File

@@ -28,14 +28,12 @@ DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
class Instance;
class RenderpassCache;
class RendererVulkan;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
RendererVulkan& renderer);
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache);
~Scheduler();
/// Sends the current execution context to the GPU.
@@ -198,7 +196,6 @@ private:
private:
const Instance& instance;
RenderpassCache& renderpass_cache;
RendererVulkan& renderer;
MasterSemaphore master_semaphore;
CommandPool command_pool;
std::unique_ptr<CommandChunk> chunk;

View File

@@ -1,243 +1,155 @@
// Copyright 2022 Citra Emulator Project
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <limits>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include <vk_mem_alloc.h>
namespace Vulkan {
[[nodiscard]] vk::AccessFlags MakeAccessFlags(vk::BufferUsageFlagBits usage) {
switch (usage) {
case vk::BufferUsageFlagBits::eVertexBuffer:
return vk::AccessFlagBits::eVertexAttributeRead;
case vk::BufferUsageFlagBits::eIndexBuffer:
return vk::AccessFlagBits::eIndexRead;
case vk::BufferUsageFlagBits::eUniformBuffer:
return vk::AccessFlagBits::eUniformRead;
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
return vk::AccessFlagBits::eShaderRead;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
UNREACHABLE();
namespace {
constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000;
constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000;
/// Find a memory type with the passed requirements
std::optional<u32> FindMemoryType(const vk::PhysicalDeviceMemoryProperties& properties,
vk::MemoryPropertyFlags wanted,
u32 filter = std::numeric_limits<u32>::max()) {
for (u32 i = 0; i < properties.memoryTypeCount; ++i) {
const auto flags = properties.memoryTypes[i].propertyFlags;
if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) {
return i;
}
}
return vk::AccessFlagBits::eNone;
return std::nullopt;
}
[[nodiscard]] vk::PipelineStageFlags MakePipelineStage(vk::BufferUsageFlagBits usage) {
switch (usage) {
case vk::BufferUsageFlagBits::eVertexBuffer:
return vk::PipelineStageFlagBits::eVertexInput;
case vk::BufferUsageFlagBits::eIndexBuffer:
return vk::PipelineStageFlagBits::eVertexInput;
case vk::BufferUsageFlagBits::eUniformBuffer:
return vk::PipelineStageFlagBits::eVertexShader |
vk::PipelineStageFlagBits::eFragmentShader;
case vk::BufferUsageFlagBits::eUniformTexelBuffer:
return vk::PipelineStageFlagBits::eFragmentShader;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
UNREACHABLE();
/// Get the preferred host visible memory type.
u32 GetMemoryType(const vk::PhysicalDeviceMemoryProperties& properties, bool readback,
u32 filter = std::numeric_limits<u32>::max()) {
// Prefer device local host visible allocations. Both AMD and Nvidia now provide one.
// Otherwise search for a host visible allocation.
const vk::MemoryPropertyFlags HOST_MEMORY =
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent;
const vk::MemoryPropertyFlags DYNAMIC_MEMORY =
HOST_MEMORY | (readback ? vk::MemoryPropertyFlagBits::eHostCached
: vk::MemoryPropertyFlagBits::eDeviceLocal);
std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY);
if (!preferred_type) {
preferred_type = FindMemoryType(properties, HOST_MEMORY);
ASSERT_MSG(preferred_type, "No host visible and coherent memory type found");
}
return vk::PipelineStageFlagBits::eNone;
return preferred_type.value_or(0);
}
StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
: instance{instance} {
const vk::BufferUsageFlags usage =
readback ? vk::BufferUsageFlagBits::eTransferDst : vk::BufferUsageFlagBits::eTransferSrc;
const vk::BufferCreateInfo buffer_info = {.size = size, .usage = usage};
} // Anonymous namespace
const VmaAllocationCreateFlags flags =
readback ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT
: VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
const VmaAllocationCreateInfo alloc_create_info = {
.flags = flags | VMA_ALLOCATION_CREATE_MAPPED_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
&alloc_info);
buffer = vk::Buffer{unsafe_buffer};
mapped = std::span{reinterpret_cast<std::byte*>(alloc_info.pMappedData), size};
}
StagingBuffer::~StagingBuffer() {
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
}
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback)
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, total_size{size},
bucket_size{size / BUCKET_COUNT}, readback{readback} {}
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
bool readback)
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback}, usage{usage},
total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {
const vk::BufferCreateInfo buffer_info = {
.size = total_size,
.usage = usage | vk::BufferUsageFlagBits::eTransferDst,
};
const VmaAllocationCreateInfo alloc_create_info = {
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
};
VkBuffer unsafe_buffer = VK_NULL_HANDLE;
VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
VmaAllocationInfo alloc_info;
VmaAllocator allocator = instance.GetAllocator();
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
&alloc_info);
gpu_buffer = vk::Buffer{unsafe_buffer};
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
vk::Device device = instance.GetDevice();
for (std::size_t i = 0; i < view_formats.size(); i++) {
const vk::BufferViewCreateInfo view_info = {
.buffer = gpu_buffer,
.format = view_formats[i],
.offset = 0,
.range = total_size,
};
views[i] = device.createBufferView(view_info);
}
view_count = view_formats.size();
StreamBuffer::StreamBuffer(const Instance& instance_, Scheduler& scheduler_,
vk::BufferUsageFlags usage_, u64 size, bool readback_)
: instance{instance_}, scheduler{scheduler_}, usage{usage_}, readback{readback_} {
CreateBuffers(size);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
}
StreamBuffer::~StreamBuffer() {
if (gpu_buffer) {
vk::Device device = instance.GetDevice();
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(gpu_buffer), allocation);
for (std::size_t i = 0; i < view_count; i++) {
device.destroyBufferView(views[i]);
}
}
const vk::Device device = instance.GetDevice();
device.unmapMemory(memory);
device.destroyBuffer(buffer);
device.freeMemory(memory);
}
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size) {
ASSERT(size <= total_size);
size = Common::AlignUp(size, 16);
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
ASSERT(size <= stream_buffer_size);
mapped_size = size;
Bucket& bucket = buckets[bucket_index];
if (bucket.cursor + size > bucket_size) {
bucket.gpu_tick = scheduler.CurrentTick();
MoveNextBucket();
return Map(size);
if (alignment > 0) {
offset = Common::AlignUp(offset, alignment);
}
const bool invalidate = std::exchange(bucket.invalid, false);
const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
WaitPendingOperations(offset);
return std::make_tuple(mapped, buffer_offset, invalidate);
bool invalidate{false};
if (offset + size > stream_buffer_size) {
// The buffer would overflow, save the amount of used watches and reset the state.
invalidate = true;
invalidation_mark = current_watch_cursor;
current_watch_cursor = 0;
offset = 0;
// Swap watches and reset waiting cursors.
std::swap(previous_watches, current_watches);
wait_cursor = 0;
wait_bound = 0;
}
return std::make_tuple(mapped + offset, offset, invalidate);
}
void StreamBuffer::Commit(u32 size) {
size = Common::AlignUp(size, 16);
buckets[bucket_index].cursor += size;
void StreamBuffer::Commit(u64 size) {
ASSERT_MSG(size <= mapped_size, "Reserved size {} is too small compared to {}", mapped_size,
size);
offset += size;
if (current_watch_cursor + 1 >= current_watches.size()) {
// Ensure that there are enough watches.
ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK);
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
watch.tick = scheduler.CurrentTick();
}
void StreamBuffer::Flush() {
if (readback) {
LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
void StreamBuffer::CreateBuffers(u64 prefered_size) {
const vk::Device device = instance.GetDevice();
const auto memory_properties = instance.GetPhysicalDevice().getMemoryProperties();
const u32 preferred_type = GetMemoryType(memory_properties, readback);
const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex;
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
// As per DXVK's example, using `heap_size / 2`
const VkDeviceSize allocable_size = heap_size / 2;
buffer = device.createBuffer({
.size = std::min(prefered_size, allocable_size),
.usage = usage,
});
const auto requirements = device.getBufferMemoryRequirements(buffer);
const u32 required_flags = requirements.memoryTypeBits;
stream_buffer_size = static_cast<u64>(requirements.size);
memory = device.allocateMemory({
.allocationSize = requirements.size,
.memoryTypeIndex = GetMemoryType(memory_properties, required_flags),
});
device.bindBufferMemory(buffer, memory, 0);
mapped = reinterpret_cast<u8*>(device.mapMemory(memory, 0, VK_WHOLE_SIZE));
}
void StreamBuffer::ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size) {
watches.resize(watches.size() + grow_size);
}
void StreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
if (!invalidation_mark) {
return;
}
Bucket& bucket = buckets[bucket_index];
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
ASSERT(flush_size <= bucket_size);
ASSERT(flush_start + flush_size <= total_size);
// Ensure all staging writes are visible to the host memory domain
if (flush_size > 0) [[likely]] {
VmaAllocator allocator = instance.GetAllocator();
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
if (gpu_buffer) {
scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer,
vk::CommandBuffer upload_cmdbuf) {
const vk::BufferCopy copy_region = {
.srcOffset = flush_start,
.dstOffset = flush_start,
.size = flush_size,
};
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = MakeAccessFlags(usage),
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = gpu_buffer,
.offset = flush_start,
.size = flush_size,
};
upload_cmdbuf.pipelineBarrier(
vk::PipelineStageFlagBits::eTransfer, MakePipelineStage(usage),
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
});
}
bucket.flush_cursor += flush_size;
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
scheduler.Wait(watch.tick);
++wait_cursor;
}
}
void StreamBuffer::Invalidate() {
if (!readback) {
return;
}
Bucket& bucket = buckets[bucket_index];
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
ASSERT(flush_size <= bucket_size);
if (flush_size > 0) [[likely]] {
// Ensure the staging memory can be read by the host
VmaAllocator allocator = instance.GetAllocator();
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
bucket.flush_cursor += flush_size;
}
}
void StreamBuffer::MoveNextBucket() {
// Flush and Invalidate are bucket local operations for simplicity so perform them here
if (readback) {
Invalidate();
} else {
Flush();
}
bucket_index = (bucket_index + 1) % BUCKET_COUNT;
Bucket& next_bucket = buckets[bucket_index];
scheduler.Wait(next_bucket.gpu_tick);
next_bucket.cursor = 0;
next_bucket.flush_cursor = 0;
next_bucket.invalid = true;
}
} // namespace Vulkan

View File

@@ -1,101 +1,81 @@
// Copyright 2022 Citra Emulator Project
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <map>
#include <optional>
#include <span>
#include "common/assert.h"
#include <tuple>
#include <vector>
#include "video_core/renderer_vulkan/vk_common.h"
VK_DEFINE_HANDLE(VmaAllocation)
namespace Vulkan {
class Instance;
class Scheduler;
struct StagingBuffer {
StagingBuffer(const Instance& instance, u32 size, bool readback);
~StagingBuffer();
const Instance& instance;
vk::Buffer buffer{};
VmaAllocation allocation{};
std::span<std::byte> mapped{};
};
class StreamBuffer {
static constexpr u32 MAX_BUFFER_VIEWS = 3;
static constexpr u32 BUCKET_COUNT = 2;
class StreamBuffer final {
static constexpr std::size_t MAX_BUFFER_VIEWS = 3;
public:
/// Staging only constructor
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size, bool readback = false);
/// Staging + GPU streaming constructor
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
bool readback = false);
explicit StreamBuffer(const Instance& instance, Scheduler& scheduler,
vk::BufferUsageFlags usage, u64 size, bool readback = false);
~StreamBuffer();
StreamBuffer(const StreamBuffer&) = delete;
StreamBuffer& operator=(const StreamBuffer&) = delete;
/**
* Reserves a region of memory from the stream buffer.
* @param size Size to reserve.
* @returns A pair of a raw memory pointer (with offset added), and the buffer offset
*/
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment);
/// Maps aligned staging memory of size bytes
std::tuple<u8*, u32, bool> Map(u32 size);
/// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy.
void Commit(u64 size);
/// Commits size bytes from the currently mapped staging memory
void Commit(u32 size = 0);
/// Flushes staging memory to the GPU buffer
void Flush();
/// Invalidates staging memory for reading
void Invalidate();
/// Returns the GPU buffer handle
[[nodiscard]] vk::Buffer GetHandle() const {
return gpu_buffer;
vk::Buffer Handle() const noexcept {
return buffer;
}
/// Returns the staging buffer handle
[[nodiscard]] vk::Buffer GetStagingHandle() const {
return staging.buffer;
}
/// Returns an immutable reference to the requested buffer view
[[nodiscard]] const vk::BufferView& GetView(u32 index = 0) const {
ASSERT(index < view_count);
return views[index];
u64 Address() const noexcept {
return 0;
}
private:
/// Moves to the next bucket
void MoveNextBucket();
struct Bucket {
bool invalid = false;
u32 gpu_tick = 0;
u32 cursor = 0;
u32 flush_cursor = 0;
struct Watch {
u64 tick{};
u64 upper_bound{};
};
/// Creates Vulkan buffer handles committing the required the required memory.
void CreateBuffers(u64 prefered_size);
/// Increases the amount of watches available.
void ReserveWatches(std::vector<Watch>& watches, std::size_t grow_size);
void WaitPendingOperations(u64 requested_upper_bound);
private:
const Instance& instance;
Scheduler& scheduler;
StagingBuffer staging;
vk::Buffer gpu_buffer{};
VmaAllocation allocation{};
vk::BufferUsageFlagBits usage;
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
std::array<Bucket, BUCKET_COUNT> buckets;
std::size_t view_count = 0;
u32 total_size = 0;
u32 bucket_size = 0;
u32 bucket_index = 0;
bool readback = false;
const Instance& instance; ///< Vulkan instance.
Scheduler& scheduler; ///< Command scheduler.
vk::Buffer buffer; ///< Mapped buffer.
vk::DeviceMemory memory; ///< Memory allocation.
u8* mapped{}; ///< Pointer to the mapped memory
u64 stream_buffer_size{}; ///< Stream buffer size.
vk::BufferUsageFlags usage{};
bool readback{}; ///< Flag indicating if the buffer should use cached memory
u64 offset{}; ///< Buffer iterator.
u64 mapped_size{}; ///< Size reserved for the current copy.
std::vector<Watch> current_watches; ///< Watches recorded in the current iteration.
std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation.
std::optional<std::size_t> invalidation_mark; ///< Number of watches used in the previous cycle.
std::vector<Watch> previous_watches; ///< Watches used in the previous iteration.
std::size_t wait_cursor{}; ///< Last watch being waited for completion.
u64 wait_bound{}; ///< Highest offset being watched for completion.
};
} // namespace Vulkan

View File

@@ -103,16 +103,16 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
return depth_offset;
}
constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
constexpr u64 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
constexpr u64 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
desc_manager{desc_manager}, blit_helper{instance, scheduler, desc_manager},
upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE}, download_buffer{instance, scheduler,
DOWNLOAD_BUFFER_SIZE,
true} {
upload_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferSrc, UPLOAD_BUFFER_SIZE},
download_buffer{instance, scheduler, vk::BufferUsageFlagBits::eTransferDst,
DOWNLOAD_BUFFER_SIZE, true} {
auto Register = [this](VideoCore::PixelFormat dest,
std::unique_ptr<FormatReinterpreterBase>&& obj) {
@@ -153,25 +153,20 @@ TextureRuntime::~TextureRuntime() {
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
auto& buffer = upload ? upload_buffer : download_buffer;
auto [data, offset, invalidate] = buffer.Map(size);
auto [data, offset, invalidate] = buffer.Map(size, 4);
return StagingData{
.buffer = buffer.GetStagingHandle(),
.buffer = buffer.Handle(),
.size = size,
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
.buffer_offset = offset,
};
}
void TextureRuntime::FlushBuffers() {
upload_buffer.Flush();
}
MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
void TextureRuntime::Finish() {
MICROPROFILE_SCOPE(Vulkan_Finish);
scheduler.Finish();
download_buffer.Invalidate();
}
ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
@@ -415,7 +410,8 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
};
if (clear.texture_rect == surface.GetScaledRect()) {
scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
scheduler.Record([params, clear, value](vk::CommandBuffer render_cmdbuf,
vk::CommandBuffer) {
const vk::ImageSubresourceRange range = {
.aspectMask = params.aspect,
.baseMipLevel = clear.texture_level,
@@ -458,20 +454,25 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
},
};
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
render_cmdbuf.pipelineBarrier(params.pipeline_flags,
vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
const bool is_color = static_cast<bool>(params.aspect & vk::ImageAspectFlagBits::eColor);
const bool is_color =
static_cast<bool>(params.aspect & vk::ImageAspectFlagBits::eColor);
if (is_color) {
render_cmdbuf.clearColorImage(params.src_image, vk::ImageLayout::eTransferDstOptimal,
render_cmdbuf.clearColorImage(params.src_image,
vk::ImageLayout::eTransferDstOptimal,
MakeClearColorValue(value), range);
} else {
render_cmdbuf.clearDepthStencilImage(params.src_image, vk::ImageLayout::eTransferDstOptimal,
render_cmdbuf.clearDepthStencilImage(params.src_image,
vk::ImageLayout::eTransferDstOptimal,
MakeClearDepthStencilValue(value), range);
}
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, params.pipeline_flags,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barrier);
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
params.pipeline_flags, vk::DependencyFlagBits::eByRegion,
{}, {}, post_barrier);
});
return true;
}
@@ -528,34 +529,34 @@ void TextureRuntime::ClearTextureWithRenderpass(Surface& surface,
.src_image = surface.alloc.image,
};
scheduler.Record(
[params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = params.src_access,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = params.src_image,
.subresourceRange{
.aspectMask = params.aspect,
.baseMipLevel = level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf,
vk::CommandBuffer) {
const vk::ImageMemoryBarrier pre_barrier = {
.srcAccessMask = params.src_access,
.dstAccessMask = vk::AccessFlagBits::eTransferWrite,
.oldLayout = vk::ImageLayout::eGeneral,
.newLayout = vk::ImageLayout::eGeneral,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = params.src_image,
.subresourceRange{
.aspectMask = params.aspect,
.baseMipLevel = level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
});
render_cmdbuf.pipelineBarrier(params.pipeline_flags, vk::PipelineStageFlagBits::eTransfer,
vk::DependencyFlagBits::eByRegion, {}, {}, pre_barrier);
});
renderpass_cache.EnterRenderpass(clear_info);
renderpass_cache.ExitRenderpass();
scheduler.Record([params, level = clear.texture_level]
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
scheduler.Record([params, level = clear.texture_level](vk::CommandBuffer render_cmdbuf,
vk::CommandBuffer) {
const vk::ImageMemoryBarrier post_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = params.src_access,

View File

@@ -15,13 +15,15 @@
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
VK_DEFINE_HANDLE(VmaAllocation)
namespace Vulkan {
struct StagingData {
vk::Buffer buffer;
u32 size = 0;
std::span<std::byte> mapped{};
u32 buffer_offset = 0;
u64 buffer_offset = 0;
};
struct ImageAlloc {
@@ -127,9 +129,6 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface, u32 max_level);
/// Flushes staging buffers
void FlushBuffers();
/// Returns all source formats that support reinterpretation to the dest format
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
VideoCore::PixelFormat dest_format) const;