renderer_vulkan: Scheduler and presentation rewrite

* This commit ports yuzu's async scheduler replacing our older and crummier version
  Commands are recorded by the scheduler and processed by a separate worker thread.

* Queue submission is also moved to the worker thread which should alliviate slowdowns related to vkQueueSubmit stalls

* Fragment shader compilation and queue submission are also moved to that thread to reduce stutters
This commit is contained in:
GPUCode
2022-10-22 21:01:03 +03:00
parent a99be221b2
commit 11728d6772
34 changed files with 1665 additions and 1209 deletions

View File

@ -39,9 +39,9 @@ set(SDL_JOYSTICK ON CACHE BOOL "")
set(SDL_HAPTIC OFF CACHE BOOL "")
set(SDL_HIDAPI ON CACHE BOOL "")
set(SDL_POWER OFF CACHE BOOL "")
set(SDL_THREADS ON CACHE BOOL "")
set(SDL_TIMERS ON CACHE BOOL "")
set(SDL_FILE ON CACHE BOOL "")
set(SDL_THREADS ON CACHE BOOL "")
set(SDL_LOADSO ON CACHE BOOL "")
set(SDL_CPUINFO OFF CACHE BOOL "")
set(SDL_FILESYSTEM OFF CACHE BOOL "")

View File

@ -93,8 +93,14 @@ add_library(video_core STATIC
renderer_vulkan/vk_format_reinterpreter.cpp
renderer_vulkan/vk_format_reinterpreter.h
renderer_vulkan/vk_layout_tracker.h
renderer_vulkan/vk_master_semaphore.cpp
renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_scheduler.cpp
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_resource_pool.cpp
renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_instance.cpp
renderer_vulkan/vk_instance.h
renderer_vulkan/vk_pipeline_cache.cpp
@ -105,14 +111,12 @@ add_library(video_core STATIC
renderer_vulkan/vk_renderpass_cache.h
renderer_vulkan/vk_shader_gen.cpp
renderer_vulkan/vk_shader_gen.h
renderer_vulkan/vk_shader.cpp
renderer_vulkan/vk_shader.h
renderer_vulkan/vk_shader_util.cpp
renderer_vulkan/vk_shader_util.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_task_scheduler.cpp
renderer_vulkan/vk_task_scheduler.h
renderer_vulkan/vk_texture_runtime.cpp
renderer_vulkan/vk_texture_runtime.h
shader/debug_data.h

View File

@ -16,9 +16,7 @@
#include "core/tracer/recorder.h"
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/video_core.h"
namespace Vulkan {
@ -154,14 +152,12 @@ struct ScreenRectVertex {
constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192;
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
: RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance,
*this},
renderpass_cache{instance, scheduler}, runtime{instance, scheduler, renderpass_cache},
swapchain{instance, renderpass_cache}, vertex_buffer{instance,
scheduler,
VERTEX_BUFFER_SIZE,
vk::BufferUsageFlagBits::eVertexBuffer,
{}} {
: RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this},
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
runtime{instance, scheduler, renderpass_cache, desc_manager},
swapchain{instance, scheduler, renderpass_cache},
vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
@ -197,30 +193,23 @@ RendererVulkan::~RendererVulkan() {
runtime.Recycle(tag, std::move(info.texture.alloc));
}
rasterizer.reset();
}
VideoCore::ResultStatus RendererVulkan::Init() {
CompileShaders();
BuildLayouts();
BuildPipelines();
// Create the rasterizer
rasterizer = std::make_unique<RasterizerVulkan>(render_window, instance, scheduler, runtime,
renderpass_cache);
return VideoCore::ResultStatus::Success;
}
VideoCore::RasterizerInterface* RendererVulkan::Rasterizer() {
return rasterizer.get();
return &rasterizer;
}
void RendererVulkan::ShutDown() {}
void RendererVulkan::Sync() {
rasterizer->SyncEntireState();
rasterizer.SyncEntireState();
}
void RendererVulkan::PrepareRendertarget() {
@ -236,24 +225,26 @@ void RendererVulkan::PrepareRendertarget() {
LCD::Read(color_fill.raw, lcd_color_addr);
if (color_fill.is_enabled) {
const vk::ClearColorValue clear_color = {
.float32 = std::array{color_fill.color_r / 255.0f, color_fill.color_g / 255.0f,
color_fill.color_b / 255.0f, 1.0f}};
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
TextureInfo& texture = screen_infos[i].texture;
runtime.Transition(command_buffer, texture.alloc, vk::ImageLayout::eTransferDstOptimal,
runtime.Transition(texture.alloc, vk::ImageLayout::eTransferDstOptimal,
0, texture.alloc.levels);
command_buffer.clearColorImage(
texture.alloc.image, vk::ImageLayout::eTransferDstOptimal, clear_color, range);
scheduler.Record([image = texture.alloc.image,
color_fill](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ClearColorValue clear_color = {
.float32 = std::array{color_fill.color_r / 255.0f, color_fill.color_g / 255.0f,
color_fill.color_b / 255.0f, 1.0f}};
const vk::ImageSubresourceRange range = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1,
};
render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_color, range);
});
} else {
TextureInfo& texture = screen_infos[i].texture;
if (texture.width != framebuffer.width || texture.height != framebuffer.height ||
@ -275,9 +266,7 @@ void RendererVulkan::PrepareRendertarget() {
}
void RendererVulkan::BeginRendering() {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics,
present_pipelines[current_pipeline]);
vk::Device device = instance.GetDevice();
std::array<vk::DescriptorImageInfo, 4> present_textures;
for (std::size_t i = 0; i < screen_infos.size(); i++) {
@ -290,29 +279,24 @@ void RendererVulkan::BeginRendering() {
present_textures[3] = vk::DescriptorImageInfo{.sampler = present_samplers[current_sampler]};
const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool =
scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &present_descriptor_layout};
vk::Device device = instance.GetDevice();
vk::DescriptorSet set = device.allocateDescriptorSets(alloc_info)[0];
vk::DescriptorSet set = desc_manager.AllocateSet(present_descriptor_layout);
device.updateDescriptorSetWithTemplate(set, present_update_template, present_textures[0]);
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0,
1, &set, 0, nullptr);
scheduler.Record([this, set, pipeline_index = current_pipeline](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics,
present_pipelines[pipeline_index]);
const vk::ClearValue clear_value = {.color = clear_color};
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, present_pipeline_layout, 0, set, {});
});
const vk::RenderPassBeginInfo begin_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
const RenderpassState renderpass_info = {
.renderpass = renderpass_cache.GetPresentRenderpass(),
.framebuffer = swapchain.GetFramebuffer(),
.renderArea = vk::Rect2D{.offset = {0, 0}, .extent = swapchain.GetExtent()},
.clearValueCount = 1,
.pClearValues = &clear_value,
.render_area = vk::Rect2D{.offset = {0, 0}, .extent = swapchain.GetExtent()},
.clear = vk::ClearValue{.color = clear_color}
};
renderpass_cache.EnterRenderpass(begin_info);
renderpass_cache.EnterRenderpass(renderpass_info);
}
void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& framebuffer,
@ -340,7 +324,7 @@ void RendererVulkan::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram
// only allows rows to have a memory alignement of 4.
ASSERT(pixel_stride % 4 == 0);
if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr,
if (!rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr,
static_cast<u32>(pixel_stride), screen_info)) {
ASSERT(false);
// Reset the screen info's display texture to its own permanent texture
@ -618,14 +602,16 @@ void RendererVulkan::DrawSingleScreenRotated(u32 screen_id, float x, float y, fl
draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w};
draw_info.screen_id_l = screen_id;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
scheduler.Record([this, offset = offset,
info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w, float h) {
@ -655,25 +641,16 @@ void RendererVulkan::DrawSingleScreen(u32 screen_id, float x, float y, float w,
draw_info.o_resolution = Common::Vec4f{h, w, 1.0f / h, 1.0f / w};
draw_info.screen_id_l = screen_id;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
scheduler.Record([this, offset = offset,
info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
const vk::ClearValue clear_value = {.color = clear_color};
const vk::RenderPassBeginInfo begin_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
.framebuffer = swapchain.GetFramebuffer(),
.clearValueCount = 1,
.pClearValues = &clear_value,
};
command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_id_r, float x,
@ -704,14 +681,16 @@ void RendererVulkan::DrawSingleScreenStereoRotated(u32 screen_id_l, u32 screen_i
draw_info.screen_id_l = screen_id_l;
draw_info.screen_id_r = screen_id_r;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
scheduler.Record([this, offset = offset,
info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, float x, float y,
@ -744,14 +723,16 @@ void RendererVulkan::DrawSingleScreenStereo(u32 screen_id_l, u32 screen_id_r, fl
draw_info.screen_id_l = screen_id_l;
draw_info.screen_id_r = screen_id_r;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(draw_info), &draw_info);
scheduler.Record([this, offset = offset,
info = draw_info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.pushConstants(present_pipeline_layout,
vk::ShaderStageFlagBits::eFragment |
vk::ShaderStageFlagBits::eVertex,
0, sizeof(info), &info);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
command_buffer.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), {0});
render_cmdbuf.draw(4, 1, offset / sizeof(ScreenRectVertex), 0);
});
}
void RendererVulkan::DrawScreens(const Layout::FramebufferLayout& layout, bool flipped) {
@ -908,41 +889,53 @@ void RendererVulkan::SwapBuffers() {
const auto& layout = render_window.GetFramebufferLayout();
PrepareRendertarget();
// Create swapchain if needed
if (swapchain.NeedsRecreation()) {
const auto RecreateSwapchain = [&] {
scheduler.Finish();
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
swapchain.Create(layout.width, layout.height);
};
if (swapchain.NeedsRecreation()) {
RecreateSwapchain();
}
// Calling Submit will change the slot so get the required semaphores now
const vk::Semaphore image_acquired = scheduler.GetImageAcquiredSemaphore();
const vk::Semaphore present_ready = scheduler.GetPresentReadySemaphore();
swapchain.AcquireNextImage(image_acquired);
do {
scheduler.WaitWorker();
swapchain.AcquireNextImage();
if (swapchain.NeedsRecreation()) {
RecreateSwapchain();
}
} while (swapchain.NeedsRecreation());
const vk::Viewport viewport = {.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(layout.width),
.height = static_cast<float>(layout.height),
.minDepth = 0.0f,
.maxDepth = 1.0f};
scheduler.Record([layout](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::Viewport viewport = {.x = 0.0f,
.y = 0.0f,
.width = static_cast<float>(layout.width),
.height = static_cast<float>(layout.height),
.minDepth = 0.0f,
.maxDepth = 1.0f};
const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {layout.width, layout.height}};
const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {layout.width, layout.height}};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setViewport(0, viewport);
command_buffer.setScissor(0, scissor);
render_cmdbuf.setViewport(0, viewport);
render_cmdbuf.setScissor(0, scissor);
});
renderpass_cache.ExitRenderpass();
for (auto& info : screen_infos) {
auto alloc = info.display_texture ? info.display_texture : &info.texture.alloc;
runtime.Transition(command_buffer, *alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0,
ImageAlloc* alloc = info.display_texture ? info.display_texture : &info.texture.alloc;
runtime.Transition(*alloc, vk::ImageLayout::eShaderReadOnlyOptimal, 0,
alloc->levels);
}
DrawScreens(layout, false);
scheduler.Submit(SubmitMode::SwapchainSynced);
swapchain.Present(present_ready);
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
const VkSemaphore present_ready = swapchain.GetPresentReadySemaphore();
scheduler.Flush(present_ready, image_acquired);
//scheduler.WaitWorker();
swapchain.Present();
m_current_frame++;
@ -961,13 +954,8 @@ void RendererVulkan::SwapBuffers() {
void RendererVulkan::FlushBuffers() {
vertex_buffer.Flush();
rasterizer->FlushBuffers();
rasterizer.FlushBuffers();
runtime.FlushBuffers();
}
void RendererVulkan::OnSlotSwitch() {
renderpass_cache.OnSlotSwitch();
rasterizer->pipeline_cache.MarkDirty();
}
} // namespace Vulkan

View File

@ -11,9 +11,11 @@
#include "core/hw/gpu.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Layout {
struct FramebufferLayout;
@ -73,7 +75,6 @@ public:
void CleanupVideoDumping() override {}
void Sync() override;
void FlushBuffers();
void OnSlotSwitch();
private:
void ReloadSampler();
@ -103,12 +104,13 @@ private:
private:
Instance instance;
TaskScheduler scheduler;
Scheduler scheduler;
RenderpassCache renderpass_cache;
DescriptorManager desc_manager;
TextureRuntime runtime;
Swapchain swapchain;
std::unique_ptr<RasterizerVulkan> rasterizer;
StreamBuffer vertex_buffer;
RasterizerVulkan rasterizer;
// Present pipelines (Normal, Anaglyph, Interlaced)
vk::PipelineLayout present_pipeline_layout;

View File

@ -5,14 +5,15 @@
#include "common/vector_math.h"
#include "video_core/renderer_vulkan/vk_blit_helper.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
BlitHelper::BlitHelper(const Instance& instance, TaskScheduler& scheduler)
: scheduler{scheduler}, device{instance.GetDevice()} {
BlitHelper::BlitHelper(const Instance& instance, Scheduler& scheduler, DescriptorManager& desc_manager)
: scheduler{scheduler}, desc_manager{desc_manager}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
@ -137,25 +138,19 @@ void BlitHelper::BlitD24S8ToR32(Surface& source, Surface& dest,
vk::DescriptorImageInfo{.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral}};
const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool =
scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_layout};
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
scheduler.Record([this, set, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {});
render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
render_cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(blit.src_rect.left, blit.src_rect.bottom);
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
command_buffer.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
render_cmdbuf.dispatch(blit.src_rect.GetWidth() / 8, blit.src_rect.GetHeight() / 8, 1);
});
}
} // namespace Vulkan

View File

@ -13,12 +13,14 @@ struct TextureBlit;
namespace Vulkan {
class Instance;
class TaskScheduler;
class DescriptorManager;
class Scheduler;
class Surface;
class BlitHelper {
public:
BlitHelper(const Instance& instance, TaskScheduler& scheduler);
BlitHelper(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager);
~BlitHelper();
/// Blits D24S8 pixel data to the provided buffer
@ -26,12 +28,12 @@ public:
const VideoCore::TextureBlit& blit);
private:
TaskScheduler& scheduler;
Scheduler& scheduler;
DescriptorManager& desc_manager;
vk::Device device;
vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout;
vk::DescriptorSetLayout descriptor_layout;
vk::DescriptorSet descriptor_set;
vk::DescriptorUpdateTemplate update_template;
vk::ShaderModule compute_shader;
};

View File

@ -20,8 +20,6 @@
namespace Vulkan {
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
/// Return the image aspect associated on the provided format
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
switch (format) {

View File

@ -4,7 +4,8 @@
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "vulkan/vulkan.hpp"
namespace Vulkan {
@ -13,8 +14,6 @@ struct Bindings {
u32 binding_count;
};
constexpr u32 DESCRIPTOR_BATCH_SIZE = 8;
constexpr u32 RASTERIZER_SET_COUNT = 4;
constexpr static std::array RASTERIZER_SETS = {
Bindings{// Utility set
.bindings = {vk::DescriptorType::eUniformBuffer, vk::DescriptorType::eUniformBuffer,
@ -58,71 +57,56 @@ constexpr vk::ShaderStageFlags ToVkStageFlags(vk::DescriptorType type) {
return flags;
}
DescriptorManager::DescriptorManager(const Instance& instance, TaskScheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
descriptor_dirty.fill(true);
DescriptorManager::DescriptorManager(const Instance& instance, Scheduler& scheduler)
: instance{instance}, scheduler{scheduler}, pool_provider{instance, scheduler.GetMasterSemaphore()} {
BuildLayouts();
descriptor_set_dirty.fill(true);
current_pool = pool_provider.Commit();
}
DescriptorManager::~DescriptorManager() {
vk::Device device = instance.GetDevice();
device.destroyPipelineLayout(layout);
device.destroyPipelineLayout(pipeline_layout);
for (std::size_t i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
device.destroyDescriptorSetLayout(descriptor_set_layouts[i]);
device.destroyDescriptorUpdateTemplate(update_templates[i]);
}
}
void DescriptorManager::SetBinding(u32 set, u32 binding, DescriptorData data) {
if (update_data[set][binding] != data) {
update_data[set][binding] = data;
descriptor_dirty[set] = true;
DescriptorData& current = update_data[set][binding];
if (current != data) {
current = data;
descriptor_set_dirty[set] = true;
}
}
void DescriptorManager::BindDescriptorSets() {
vk::Device device = instance.GetDevice();
std::array<vk::DescriptorSetLayout, DESCRIPTOR_BATCH_SIZE> layouts;
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
if (descriptor_dirty[i] || !descriptor_sets[i]) {
auto& batch = descriptor_batch[i];
if (batch.empty()) {
layouts.fill(descriptor_set_layouts[i]);
const vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = scheduler.GetDescriptorPool(),
.descriptorSetCount = DESCRIPTOR_BATCH_SIZE,
.pSetLayouts = layouts.data()};
try {
batch = device.allocateDescriptorSets(alloc_info);
} catch (vk::OutOfPoolMemoryError& err) {
LOG_CRITICAL(Render_Vulkan, "Run out of pool memory for layout {}: {}", i,
err.what());
UNREACHABLE();
}
}
vk::DescriptorSet set = batch.back();
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]);
descriptor_sets[i] = set;
descriptor_dirty[i] = false;
batch.pop_back();
}
const bool is_dirty = scheduler.IsStateDirty(StateFlags::DescriptorSets);
if (is_dirty) {
descriptor_set_dirty.fill(true);
}
// Bind the descriptor sets
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0,
RASTERIZER_SET_COUNT, descriptor_sets.data(), 0, nullptr);
}
vk::Device device = instance.GetDevice();
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> bound_sets;
for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
if (descriptor_set_dirty[i]) {
vk::DescriptorSet set = AllocateSet(descriptor_set_layouts[i]);
device.updateDescriptorSetWithTemplate(set, update_templates[i], update_data[i][0]);
descriptor_sets[i] = set;
}
void DescriptorManager::MarkDirty() {
descriptor_dirty.fill(true);
for (auto& batch : descriptor_batch) {
batch.clear();
bound_sets[i] = descriptor_sets[i];
}
scheduler.Record([this, bound_sets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, 0, bound_sets, {});
});
descriptor_set_dirty.fill(false);
if (is_dirty) {
scheduler.MarkStateNonDirty(StateFlags::DescriptorSets);
}
}
@ -131,7 +115,7 @@ void DescriptorManager::BuildLayouts() {
std::array<vk::DescriptorUpdateTemplateEntry, MAX_DESCRIPTORS> update_entries;
vk::Device device = instance.GetDevice();
for (u32 i = 0; i < RASTERIZER_SET_COUNT; i++) {
for (u32 i = 0; i < MAX_DESCRIPTOR_SETS; i++) {
const auto& set = RASTERIZER_SETS[i];
for (u32 j = 0; j < set.binding_count; j++) {
vk::DescriptorType type = set.bindings[j];
@ -151,8 +135,6 @@ void DescriptorManager::BuildLayouts() {
const vk::DescriptorSetLayoutCreateInfo layout_info = {.bindingCount = set.binding_count,
.pBindings = set_bindings.data()};
// Create descriptor set layout
descriptor_set_layouts[i] = device.createDescriptorSetLayout(layout_info);
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
@ -161,16 +143,33 @@ void DescriptorManager::BuildLayouts() {
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_set_layouts[i]};
// Create descriptor set update template
update_templates[i] = device.createDescriptorUpdateTemplate(template_info);
}
const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = RASTERIZER_SET_COUNT,
const vk::PipelineLayoutCreateInfo layout_info = {.setLayoutCount = MAX_DESCRIPTOR_SETS,
.pSetLayouts = descriptor_set_layouts.data(),
.pushConstantRangeCount = 0,
.pPushConstantRanges = nullptr};
layout = device.createPipelineLayout(layout_info);
pipeline_layout = device.createPipelineLayout(layout_info);
}
vk::DescriptorSet DescriptorManager::AllocateSet(vk::DescriptorSetLayout layout) {
vk::Device device = instance.GetDevice();
const vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = current_pool,
.descriptorSetCount = 1,
.pSetLayouts = &layout};
try {
return device.allocateDescriptorSets(alloc_info)[0];
} catch (vk::OutOfPoolMemoryError) {
pool_provider.RefreshTick();
current_pool = pool_provider.Commit();
}
return AllocateSet(layout);
}
} // namespace Vulkan

View File

@ -4,19 +4,19 @@
#pragma once
#include "video_core/renderer_vulkan/vk_common.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
constexpr u32 MAX_DESCRIPTORS = 8;
constexpr u32 MAX_DESCRIPTOR_SETS = 6;
constexpr u32 MAX_DESCRIPTOR_SETS = 4;
union DescriptorData {
vk::DescriptorImageInfo image_info;
vk::DescriptorBufferInfo buffer_info;
vk::BufferView buffer_view;
bool operator!=(const DescriptorData& other) const {
[[nodiscard]] bool operator!=(const DescriptorData& other) const noexcept {
return std::memcmp(this, &other, sizeof(DescriptorData)) != 0;
}
};
@ -24,25 +24,25 @@ union DescriptorData {
using DescriptorSetData = std::array<DescriptorData, MAX_DESCRIPTORS>;
class Instance;
class TaskScheduler;
class Scheduler;
class DescriptorManager {
public:
DescriptorManager(const Instance& instance, TaskScheduler& scheduler);
DescriptorManager(const Instance& instance, Scheduler& scheduler);
~DescriptorManager();
/// Allocates a descriptor set of the provided layout
vk::DescriptorSet AllocateSet(vk::DescriptorSetLayout layout);
/// Binds a resource to the provided binding
void SetBinding(u32 set, u32 binding, DescriptorData data);
/// Builds descriptor sets that reference the currently bound resources
void BindDescriptorSets();
/// Marks cached descriptor state dirty
void MarkDirty();
/// Returns the rasterizer pipeline layout
vk::PipelineLayout GetPipelineLayout() const {
return layout;
[[nodiscard]] vk::PipelineLayout GetPipelineLayout() const noexcept {
return pipeline_layout;
}
private:
@ -51,18 +51,15 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
// Cached layouts for the rasterizer pipelines
vk::PipelineLayout layout;
Scheduler& scheduler;
DescriptorPool pool_provider;
vk::PipelineLayout pipeline_layout;
vk::DescriptorPool current_pool;
std::array<vk::DescriptorSetLayout, MAX_DESCRIPTOR_SETS> descriptor_set_layouts;
std::array<vk::DescriptorUpdateTemplate, MAX_DESCRIPTOR_SETS> update_templates;
// Current data for the descriptor sets
std::array<DescriptorSetData, MAX_DESCRIPTOR_SETS> update_data{};
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_dirty{};
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets;
std::array<std::vector<vk::DescriptorSet>, MAX_DESCRIPTOR_SETS> descriptor_batch;
std::array<vk::DescriptorSet, MAX_DESCRIPTOR_SETS> descriptor_sets{};
std::array<bool, MAX_DESCRIPTOR_SETS> descriptor_set_dirty{};
};
} // namespace Vulkan

View File

@ -3,14 +3,16 @@
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
namespace Vulkan {
D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler,
TextureRuntime& runtime)
: FormatReinterpreterBase{instance, scheduler, runtime}, device{instance.GetDevice()} {
D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager, TextureRuntime& runtime)
: FormatReinterpreterBase{instance, scheduler, desc_manager, runtime}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
@ -135,25 +137,19 @@ void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, Surf
vk::DescriptorImageInfo{.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral}};
const vk::DescriptorSetAllocateInfo alloc_info = {.descriptorPool =
scheduler.GetDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_layout};
vk::DescriptorSet set = desc_manager.AllocateSet(descriptor_layout);
device.updateDescriptorSetWithTemplate(set, update_template, textures[0]);
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
scheduler.Record([this, set, src_rect](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0, set, {});
render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom);
render_cmdbuf.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, 0,
1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom);
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, 0,
sizeof(Common::Vec2i), src_offset.AsArray());
command_buffer.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
render_cmdbuf.dispatch(src_rect.GetWidth() / 8, src_rect.GetHeight() / 8, 1);
});
}
} // namespace Vulkan

View File

@ -11,14 +11,15 @@ namespace Vulkan {
class Surface;
class Instance;
class TaskScheduler;
class DescriptorManager;
class Scheduler;
class TextureRuntime;
class FormatReinterpreterBase {
public:
FormatReinterpreterBase(const Instance& instance, TaskScheduler& scheduler,
TextureRuntime& runtime)
: instance{instance}, scheduler{scheduler}, runtime{runtime} {}
FormatReinterpreterBase(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager, TextureRuntime& runtime)
: instance{instance}, scheduler{scheduler}, desc_manager{desc_manager}, runtime{runtime} {}
virtual ~FormatReinterpreterBase() = default;
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
@ -27,7 +28,8 @@ public:
protected:
const Instance& instance;
TaskScheduler& scheduler;
Scheduler& scheduler;
DescriptorManager& desc_manager;
TextureRuntime& runtime;
};
@ -35,7 +37,8 @@ using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
class D24S8toRGBA8 final : public FormatReinterpreterBase {
public:
D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime);
D24S8toRGBA8(const Instance& instance, Scheduler& scheduler,
DescriptorManager& desc_manager, TextureRuntime& runtime);
~D24S8toRGBA8();
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
@ -50,7 +53,6 @@ private:
vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout;
vk::DescriptorSetLayout descriptor_layout;
vk::DescriptorSet descriptor_set;
vk::DescriptorUpdateTemplate update_template;
vk::ShaderModule compute_shader;
VideoCore::Rect2D temp_rect{0, 0, 0, 0};

View File

@ -111,7 +111,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index) {
try {
instance = vk::createInstance(instance_info);
} catch (vk::LayerNotPresentError& err) {
LOG_CRITICAL(Render_Vulkan, "Validation requested but layer is not available!");
LOG_CRITICAL(Render_Vulkan, "Validation requested but layer is not available {}", err.what());
UNREACHABLE();
}
@ -354,8 +354,7 @@ bool Instance::CreateDevice() {
try {
device = physical_device.createDevice(device_chain.get());
} catch (vk::ExtensionNotPresentError& err) {
LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available, "
"check extension log for details");
LOG_CRITICAL(Render_Vulkan, "Some required extensions are not available {}", err.what());
UNREACHABLE();
}

View File

@ -0,0 +1,25 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
MasterSemaphore::MasterSemaphore(const Instance& instance) : device{instance.GetDevice()} {
const vk::StructureChain semaphore_chain = {
vk::SemaphoreCreateInfo{},
vk::SemaphoreTypeCreateInfoKHR{
.semaphoreType = vk::SemaphoreType::eTimeline,
.initialValue = 0,
}
};
semaphore = device.createSemaphore(semaphore_chain.get());
}
MasterSemaphore::~MasterSemaphore() {
device.destroySemaphore(semaphore);
}
} // namespace Vulkan

View File

@ -0,0 +1,92 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <atomic>
#include <thread>
#include <limits>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
constexpr u64 WAIT_TIMEOUT = std::numeric_limits<u64>::max();
class MasterSemaphore {
public:
explicit MasterSemaphore(const Instance& instance);
~MasterSemaphore();
/// Returns the current logical tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return current_tick.load(std::memory_order_acquire);
}
/// Returns the last known GPU tick.
[[nodiscard]] u64 KnownGpuTick() const noexcept {
return gpu_tick.load(std::memory_order_acquire);
}
/// Returns the timeline semaphore handle.
[[nodiscard]] vk::Semaphore Handle() const noexcept {
return semaphore;
}
/// Returns true when a tick has been hit by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return KnownGpuTick() >= tick;
}
/// Advance to the logical tick and return the old one
[[nodiscard]] u64 NextTick() noexcept {
return current_tick.fetch_add(1, std::memory_order_release);
}
/// Refresh the known GPU tick
void Refresh() {
u64 this_tick{};
u64 counter{};
do {
this_tick = gpu_tick.load(std::memory_order_acquire);
counter = device.getSemaphoreCounterValueKHR(semaphore);
if (counter < this_tick) {
return;
}
} while (!gpu_tick.compare_exchange_weak(this_tick, counter, std::memory_order_release,
std::memory_order_relaxed));
}
/// Waits for a tick to be hit on the GPU
void Wait(u64 tick) {
// No need to wait if the GPU is ahead of the tick
if (IsFree(tick)) {
return;
}
// Update the GPU tick and try again
Refresh();
if (IsFree(tick)) {
return;
}
// If none of the above is hit, fallback to a regular wait
const vk::SemaphoreWaitInfoKHR wait_info = {
.semaphoreCount = 1,
.pSemaphores = &semaphore,
.pValues = &tick,
};
while (device.waitSemaphoresKHR(&wait_info, WAIT_TIMEOUT) != vk::Result::eSuccess) {}
Refresh();
}
private:
vk::Device device;
vk::Semaphore semaphore; ///< Timeline semaphore.
std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
std::atomic<u64> current_tick{1}; ///< Current logical tick.
};
} // namespace Vulkan

View File

@ -6,12 +6,14 @@
#include "common/common_paths.h"
#include "common/file_util.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "core/settings.h"
#include "video_core/renderer_vulkan/pica_to_vk.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
@ -64,11 +66,9 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
return vk::ShaderStageFlagBits::eVertex;
}
PipelineCache::PipelineCache(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{
instance,
scheduler} {
PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} {
trivial_vertex_shader = Compile(GenerateTrivialVertexShader(), vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
}
@ -158,36 +158,38 @@ void PipelineCache::SaveDiskCache() {
void PipelineCache::BindPipeline(const PipelineInfo& info) {
ApplyDynamic(info);
std::size_t shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
scheduler.Record([this, info](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
std::size_t shader_hash = 0;
for (u32 i = 0; i < MAX_SHADER_STAGES; i++) {
shader_hash = Common::HashCombine(shader_hash, shader_hashes[i]);
}
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported()
? offsetof(PipelineInfo, rasterization)
: offsetof(PipelineInfo, depth_stencil) +
offsetof(DepthStencilState, stencil_reference);
const u64 info_hash_size = instance.IsExtendedDynamicStateSupported()
? offsetof(PipelineInfo, rasterization)
: offsetof(PipelineInfo, depth_stencil) +
offsetof(DepthStencilState, stencil_reference);
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
if (new_pipeline) {
it->second = BuildPipeline(info);
}
auto [it, new_pipeline] = graphics_pipelines.try_emplace(pipeline_hash, vk::Pipeline{});
if (new_pipeline) {
it->second = BuildPipeline(info);
}
if (it->second != current_pipeline) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
render_cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, it->second);
current_pipeline = it->second;
}
});
desc_manager.BindDescriptorSets();
}
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
MICROPROFILE_SCOPE(Vulkan_VS);
PicaVSConfig config{regs.vs, setup};
for (u32 i = 0; i < layout.attribute_count; i++) {
const auto& attrib = layout.attributes[i];
@ -198,38 +200,52 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
if (!handle) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
return false;
}
current_shaders[ProgramType::VS] = handle;
shader_hashes[ProgramType::VS] = config.Hash();
scheduler.Record([this, handle = handle, hash = config.Hash()](vk::CommandBuffer, vk::CommandBuffer) {
current_shaders[ProgramType::VS] = handle;
shader_hashes[ProgramType::VS] = hash;
});
return true;
}
void PipelineCache::UseTrivialVertexShader() {
current_shaders[ProgramType::VS] = trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
scheduler.Record([this](vk::CommandBuffer, vk::CommandBuffer) {
current_shaders[ProgramType::VS] = trivial_vertex_shader;
shader_hashes[ProgramType::VS] = 0;
});
}
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
const PicaFixedGSConfig gs_config{regs};
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::Debug);
current_shaders[ProgramType::GS] = handle;
shader_hashes[ProgramType::GS] = gs_config.Hash();
scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) {
auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::High);
current_shaders[ProgramType::GS] = handle;
shader_hashes[ProgramType::GS] = gs_config.Hash();
});
}
void PipelineCache::UseTrivialGeometryShader() {
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
shader_hashes[ProgramType::GS] = 0;
scheduler.Record([this](vk::CommandBuffer, vk::CommandBuffer) {
current_shaders[ProgramType::GS] = VK_NULL_HANDLE;
shader_hashes[ProgramType::GS] = 0;
});
}
void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
instance.GetDevice(), ShaderOptimization::Debug);
current_shaders[ProgramType::FS] = handle;
shader_hashes[ProgramType::FS] = config.Hash();
scheduler.Record([this, config](vk::CommandBuffer, vk::CommandBuffer) {
auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
instance.GetDevice(), ShaderOptimization::High);
current_shaders[ProgramType::FS] = handle;
shader_hashes[ProgramType::FS] = config.Hash();
});
}
void PipelineCache::BindTexture(u32 binding, vk::ImageView image_view) {
@ -261,105 +277,108 @@ void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
}
void PipelineCache::SetViewport(float x, float y, float width, float height) {
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
const vk::Viewport viewport{x, y, width, height, 0.f, 1.f};
if (viewport != current_viewport || state_dirty) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setViewport(0, vk::Viewport{x, y, width, height, 0.f, 1.f});
if (viewport != current_viewport || is_dirty) {
scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.setViewport(0, viewport);
});
current_viewport = viewport;
}
}
void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
const vk::Rect2D scissor{{x, y}, {width, height}};
if (scissor != current_scissor || state_dirty) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setScissor(0, vk::Rect2D{{x, y}, {width, height}});
if (scissor != current_scissor || is_dirty) {
scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.setScissor(0, scissor);
});
current_scissor = scissor;
}
}
void PipelineCache::MarkDirty() {
desc_manager.MarkDirty();
current_pipeline = VK_NULL_HANDLE;
state_dirty = true;
}
void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
if (info.depth_stencil.stencil_compare_mask !=
current_info.depth_stencil.stencil_compare_mask ||
state_dirty) {
command_buffer.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
info.depth_stencil.stencil_compare_mask);
}
if (info.depth_stencil.stencil_write_mask != current_info.depth_stencil.stencil_write_mask ||
state_dirty) {
command_buffer.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
info.depth_stencil.stencil_write_mask);
}
if (info.depth_stencil.stencil_reference != current_info.depth_stencil.stencil_reference ||
state_dirty) {
command_buffer.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
info.depth_stencil.stencil_reference);
}
if (instance.IsExtendedDynamicStateSupported()) {
if (info.rasterization.cull_mode != current_info.rasterization.cull_mode || state_dirty) {
command_buffer.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode));
command_buffer.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode));
PipelineInfo current = current_info;
scheduler.Record([this, info, is_dirty, current](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
if (info.depth_stencil.stencil_compare_mask !=
current.depth_stencil.stencil_compare_mask ||
is_dirty) {
render_cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
info.depth_stencil.stencil_compare_mask);
}
if (info.depth_stencil.depth_compare_op != current_info.depth_stencil.depth_compare_op ||
state_dirty) {
command_buffer.setDepthCompareOpEXT(
PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op));
if (info.depth_stencil.stencil_write_mask != current.depth_stencil.stencil_write_mask ||
is_dirty) {
render_cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
info.depth_stencil.stencil_write_mask);
}
if (info.depth_stencil.depth_test_enable != current_info.depth_stencil.depth_test_enable ||
state_dirty) {
command_buffer.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable);
if (info.depth_stencil.stencil_reference != current.depth_stencil.stencil_reference ||
is_dirty) {
render_cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
info.depth_stencil.stencil_reference);
}
if (info.depth_stencil.depth_write_enable !=
current_info.depth_stencil.depth_write_enable ||
state_dirty) {
command_buffer.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable);
}
if (instance.IsExtendedDynamicStateSupported()) {
if (info.rasterization.cull_mode != current.rasterization.cull_mode || is_dirty) {
render_cmdbuf.setCullModeEXT(PicaToVK::CullMode(info.rasterization.cull_mode));
render_cmdbuf.setFrontFaceEXT(PicaToVK::FrontFace(info.rasterization.cull_mode));
}
if (info.rasterization.topology != current_info.rasterization.topology || state_dirty) {
command_buffer.setPrimitiveTopologyEXT(
PicaToVK::PrimitiveTopology(info.rasterization.topology));
}
if (info.depth_stencil.depth_compare_op != current.depth_stencil.depth_compare_op ||
is_dirty) {
render_cmdbuf.setDepthCompareOpEXT(
PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op));
}
if (info.depth_stencil.stencil_test_enable !=
current_info.depth_stencil.stencil_test_enable ||
state_dirty) {
command_buffer.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable);
}
if (info.depth_stencil.depth_test_enable != current.depth_stencil.depth_test_enable ||
is_dirty) {
render_cmdbuf.setDepthTestEnableEXT(info.depth_stencil.depth_test_enable);
}
if (info.depth_stencil.stencil_fail_op != current_info.depth_stencil.stencil_fail_op ||
info.depth_stencil.stencil_pass_op != current_info.depth_stencil.stencil_pass_op ||
info.depth_stencil.stencil_depth_fail_op !=
current_info.depth_stencil.stencil_depth_fail_op ||
info.depth_stencil.stencil_compare_op !=
current_info.depth_stencil.stencil_compare_op ||
state_dirty) {
command_buffer.setStencilOpEXT(
vk::StencilFaceFlagBits::eFrontAndBack,
PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op));
if (info.depth_stencil.depth_write_enable !=
current.depth_stencil.depth_write_enable ||
is_dirty) {
render_cmdbuf.setDepthWriteEnableEXT(info.depth_stencil.depth_write_enable);
}
if (info.rasterization.topology != current.rasterization.topology || is_dirty) {
render_cmdbuf.setPrimitiveTopologyEXT(
PicaToVK::PrimitiveTopology(info.rasterization.topology));
}
if (info.depth_stencil.stencil_test_enable !=
current.depth_stencil.stencil_test_enable ||
is_dirty) {
render_cmdbuf.setStencilTestEnableEXT(info.depth_stencil.stencil_test_enable);
}
if (info.depth_stencil.stencil_fail_op != current.depth_stencil.stencil_fail_op ||
info.depth_stencil.stencil_pass_op != current.depth_stencil.stencil_pass_op ||
info.depth_stencil.stencil_depth_fail_op !=
current.depth_stencil.stencil_depth_fail_op ||
info.depth_stencil.stencil_compare_op !=
current.depth_stencil.stencil_compare_op ||
is_dirty) {
render_cmdbuf.setStencilOpEXT(
vk::StencilFaceFlagBits::eFrontAndBack,
PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op));
}
}
}
});
current_info = info;
state_dirty = false;
if (is_dirty) {
scheduler.MarkStateNonDirty(StateFlags::Pipeline);
}
}
vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {

View File

@ -9,8 +9,7 @@
#include "common/hash.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/regs.h"
#include "video_core/renderer_vulkan/vk_descriptor_manager.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_shader_gen.h"
#include "video_core/shader/shader_cache.h"
@ -120,8 +119,9 @@ using FragmentShaders =
Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;
class Instance;
class TaskScheduler;
class Scheduler;
class RenderpassCache;
class DescriptorManager;
/**
* Stores a collection of rasterizer pipelines used during rendering.
@ -129,8 +129,8 @@ class RenderpassCache;
*/
class PipelineCache {
public:
PipelineCache(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache);
PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager);
~PipelineCache();
/// Loads the pipeline cache stored to disk
@ -179,9 +179,6 @@ public:
/// Sets the scissor rectange to the provided values
void SetScissor(s32 x, s32 y, u32 width, u32 height);
/// Marks all cached pipeline cache state as dirty
void MarkDirty();
private:
/// Applies dynamic pipeline state to the current command buffer
void ApplyDynamic(const PipelineInfo& info);
@ -203,9 +200,9 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
DescriptorManager desc_manager;
DescriptorManager& desc_manager;
// Cached pipelines
vk::PipelineCache pipeline_cache;
@ -214,7 +211,6 @@ private:
PipelineInfo current_info{};
vk::Viewport current_viewport{};
vk::Rect2D current_scissor{};
bool state_dirty = true;
// Bound shader modules
enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 };

View File

@ -13,7 +13,7 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/video_core.h"
#include <vk_mem_alloc.h>
@ -88,8 +88,8 @@ constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
return layout;
}
constexpr u32 VERTEX_BUFFER_SIZE = 128 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 8 * 1024 * 1024;
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr u32 TEXTURE_BUFFER_SIZE = 16 * 1024 * 1024;
@ -111,11 +111,11 @@ constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled |
constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage;
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
TaskScheduler& scheduler, TextureRuntime& runtime,
RenderpassCache& renderpass_cache)
Scheduler& scheduler, DescriptorManager& desc_manager,
TextureRuntime& runtime, RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, runtime{runtime},
renderpass_cache{renderpass_cache}, res_cache{*this, runtime},
pipeline_cache{instance, scheduler, renderpass_cache},
renderpass_cache{renderpass_cache}, desc_manager{desc_manager}, res_cache{*this, runtime},
pipeline_cache{instance, scheduler, renderpass_cache, desc_manager},
null_surface{NULL_PARAMS, vk::Format::eR8G8B8A8Unorm, NULL_USAGE, runtime},
null_storage_surface{NULL_PARAMS, vk::Format::eR32Uint, NULL_STORAGE_USAGE, runtime},
vertex_buffer{
@ -178,7 +178,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
RasterizerVulkan::~RasterizerVulkan() {
renderpass_cache.ExitRenderpass();
scheduler.Submit(SubmitMode::Flush | SubmitMode::Shutdown);
scheduler.Finish();
vk::Device device = instance.GetDevice();
@ -466,18 +466,15 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
pipeline_info.vertex_layout = layout;
vertex_buffer.Commit(buffer_offset - array_offset);
std::array<vk::Buffer, 16> buffers;
buffers.fill(vertex_buffer.GetHandle());
// Bind the vertex buffer with all the bindings
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.bindVertexBuffers(0, layout.binding_count, buffers.data(),
binding_offsets.data());
scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
std::array<vk::Buffer, 16> buffers;
buffers.fill(vertex_buffer.GetHandle());
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
offsets.data());
});
}
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
bool RasterizerVulkan::SetupVertexShader() {
MICROPROFILE_SCOPE(Vulkan_VS);
return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs,
pipeline_info.vertex_layout);
}
@ -533,7 +530,6 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology);
pipeline_cache.BindPipeline(pipeline_info);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (is_indexed) {
bool index_u16 = regs.pipeline.index_array.format != 0;
const u32 index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
@ -552,13 +548,16 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
std::memcpy(index_ptr, index_data, index_buffer_size);
index_buffer.Commit(index_buffer_size);
vk::IndexType index_type = index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT;
command_buffer.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type);
// Submit draw
command_buffer.drawIndexed(regs.pipeline.num_vertices, 1, 0, -vs_input_index_min, 0);
scheduler.Record([this, offset = index_offset, num_vertices = regs.pipeline.num_vertices,
index_u16, vertex_offset = vs_input_index_min](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::IndexType index_type = index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT;
render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), offset, index_type);
render_cmdbuf.drawIndexed(num_vertices, 1, 0, -vertex_offset, 0);
});
} else {
command_buffer.draw(regs.pipeline.num_vertices, 1, 0, 0);
scheduler.Record([num_vertices = regs.pipeline.num_vertices](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.draw(num_vertices, 1, 0, 0);
});
}
return true;
@ -863,17 +862,16 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface->Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
}
const vk::RenderPassBeginInfo renderpass_begin = {
.renderPass = framebuffer_info.renderpass,
const RenderpassState renderpass_info = {
.renderpass = framebuffer_info.renderpass,
.framebuffer = it->second,
.renderArea = vk::Rect2D{.offset = {static_cast<s32>(draw_rect.left),
static_cast<s32>(draw_rect.bottom)},
.extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}},
.render_area = vk::Rect2D{.offset = {static_cast<s32>(draw_rect.left),
static_cast<s32>(draw_rect.bottom)},
.extent = {draw_rect.GetWidth(), draw_rect.GetHeight()}},
.clear = {}
};
.clearValueCount = 0,
.pClearValues = nullptr};
renderpass_cache.EnterRenderpass(renderpass_begin);
renderpass_cache.EnterRenderpass(renderpass_info);
// Draw the vertex batch
bool succeeded = true;
@ -886,8 +884,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_cache.UseTrivialGeometryShader();
pipeline_cache.BindPipeline(pipeline_info);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
const u32 max_vertices = VERTEX_BUFFER_SIZE / sizeof(HardwareVertex);
const u32 batch_size = static_cast<u32>(vertex_batch.size());
for (u32 base_vertex = 0; base_vertex < batch_size; base_vertex += max_vertices) {
@ -899,8 +895,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
std::memcpy(array_ptr, vertex_batch.data() + base_vertex, vertex_size);
vertex_buffer.Commit(vertex_size);
command_buffer.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
command_buffer.draw(vertices, 1, base_vertex, 0);
scheduler.Record([this, vertices, base_vertex,
offset = offset](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer){
render_cmdbuf.bindVertexBuffers(0, vertex_buffer.GetHandle(), offset);
render_cmdbuf.draw(vertices, 1, base_vertex, 0);
});
}
}
@ -1738,11 +1737,12 @@ void RasterizerVulkan::SyncBlendFuncs() {
}
void RasterizerVulkan::SyncBlendColor() {
auto blend_color =
const Common::Vec4f blend_color =
PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.setBlendConstants(blend_color.AsArray());
scheduler.Record([blend_color](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.setBlendConstants(blend_color.AsArray());
});
}
void RasterizerVulkan::SyncFogColor() {

View File

@ -24,8 +24,9 @@ namespace Vulkan {
struct ScreenInfo;
class Instance;
class TaskScheduler;
class Scheduler;
class RenderpassCache;
class DescriptorManager;
struct SamplerInfo {
using TextureConfig = Pica::TexturingRegs::TextureConfig;
@ -80,8 +81,8 @@ class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
TaskScheduler& scheduler, TextureRuntime& runtime,
RenderpassCache& renderpass_cache);
Scheduler& scheduler, DescriptorManager& desc_manager,
TextureRuntime& runtime, RenderpassCache& renderpass_cache);
~RasterizerVulkan() override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
@ -251,9 +252,10 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
Scheduler& scheduler;
TextureRuntime& runtime;
RenderpassCache& renderpass_cache;
DescriptorManager& desc_manager;
RasterizerCache res_cache;
PipelineCache pipeline_cache;
bool shader_dirty = true;

View File

@ -5,7 +5,7 @@
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
namespace Vulkan {
@ -39,7 +39,7 @@ VideoCore::PixelFormat ToFormatDepth(u32 index) {
}
}
RenderpassCache::RenderpassCache(const Instance& instance, TaskScheduler& scheduler)
RenderpassCache::RenderpassCache(const Instance& instance, Scheduler& scheduler)
: instance{instance}, scheduler{scheduler} {
// Pre-create all needed renderpasses by the renderer
for (u32 color = 0; color <= MAX_COLOR_FORMATS; color++) {
@ -88,28 +88,46 @@ RenderpassCache::~RenderpassCache() {
device.destroyRenderPass(present_renderpass);
}
void RenderpassCache::EnterRenderpass(const vk::RenderPassBeginInfo begin_info) {
if (active_begin == begin_info) {
void RenderpassCache::EnterRenderpass(const RenderpassState& state) {
const bool is_dirty = scheduler.IsStateDirty(StateFlags::Renderpass);
if (current_state == state && !is_dirty) {
return;
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (active_begin.renderPass) {
command_buffer.endRenderPass();
scheduler.Record([should_end = bool(current_state.renderpass), state]
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
if (should_end) {
render_cmdbuf.endRenderPass();
}
const vk::RenderPassBeginInfo renderpass_begin_info = {
.renderPass = state.renderpass,
.framebuffer = state.framebuffer,
.renderArea = state.render_area,
.clearValueCount = 1,
.pClearValues = &state.clear};
render_cmdbuf.beginRenderPass(renderpass_begin_info, vk::SubpassContents::eInline);
});
if (is_dirty) {
scheduler.MarkStateNonDirty(StateFlags::Renderpass);
}
command_buffer.beginRenderPass(begin_info, vk::SubpassContents::eInline);
active_begin = begin_info;
current_state = state;
}
void RenderpassCache::ExitRenderpass() {
if (!active_begin.renderPass) {
if (!current_state.renderpass) {
return;
}
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.endRenderPass();
active_begin = vk::RenderPassBeginInfo{};
scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.endRenderPass();
});
current_state = {};
}
void RenderpassCache::CreatePresentRenderpass(vk::Format format) {
@ -136,7 +154,6 @@ vk::RenderPass RenderpassCache::CreateRenderPass(vk::Format color, vk::Format de
vk::AttachmentLoadOp load_op,
vk::ImageLayout initial_layout,
vk::ImageLayout final_layout) const {
// Define attachments
u32 attachment_count = 0;
std::array<vk::AttachmentDescription, 2> attachments;

View File

@ -4,24 +4,35 @@
#pragma once
#include <cstring>
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class TaskScheduler;
class Scheduler;
constexpr u32 MAX_COLOR_FORMATS = 5;
constexpr u32 MAX_DEPTH_FORMATS = 4;
struct RenderpassState {
vk::RenderPass renderpass;
vk::Framebuffer framebuffer;
vk::Rect2D render_area;
vk::ClearValue clear;
[[nodiscard]] bool operator==(const RenderpassState& other) const {
return std::memcmp(this, &other, sizeof(RenderpassState)) == 0;
}
};
class RenderpassCache {
static constexpr u32 MAX_COLOR_FORMATS = 5;
static constexpr u32 MAX_DEPTH_FORMATS = 4;
public:
RenderpassCache(const Instance& instance, TaskScheduler& scheduler);
RenderpassCache(const Instance& instance, Scheduler& scheduler);
~RenderpassCache();
/// Begins a new renderpass only when no other renderpass is currently active
void EnterRenderpass(const vk::RenderPassBeginInfo begin_info);
void EnterRenderpass(const RenderpassState& state);
/// Exits from any currently active renderpass instance
void ExitRenderpass();
@ -32,16 +43,12 @@ public:
/// Returns the renderpass associated with the color-depth format pair
[[nodiscard]] vk::RenderPass GetRenderpass(VideoCore::PixelFormat color,
VideoCore::PixelFormat depth, bool is_clear) const;
/// Returns the swapchain clear renderpass
[[nodiscard]] vk::RenderPass GetPresentRenderpass() const {
return present_renderpass;
}
/// Invalidates the currently active renderpass
void OnSlotSwitch() {
active_begin = vk::RenderPassBeginInfo{};
}
private:
/// Creates a renderpass configured appropriately and stores it in cached_renderpasses
vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth,
@ -50,9 +57,8 @@ private:
private:
const Instance& instance;
TaskScheduler& scheduler;
vk::RenderPassBeginInfo active_begin{};
Scheduler& scheduler;
RenderpassState current_state{};
vk::RenderPass present_renderpass{};
vk::RenderPass cached_renderpasses[MAX_COLOR_FORMATS + 1][MAX_DEPTH_FORMATS + 1][2];
};

View File

@ -0,0 +1,148 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstddef>
#include <optional>
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_instance.h"
namespace Vulkan {
ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
: master_semaphore{&master_semaphore_}, grow_step{grow_step_} {}
std::size_t ResourcePool::CommitResource() {
// Refresh semaphore to query updated results
master_semaphore->Refresh();
const u64 gpu_tick = master_semaphore->KnownGpuTick();
const auto search = [this, gpu_tick](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
for (std::size_t iterator = begin; iterator < end; ++iterator) {
if (gpu_tick >= ticks[iterator]) {
ticks[iterator] = master_semaphore->CurrentTick();
return iterator;
}
}
return std::nullopt;
};
// Try to find a free resource from the hinted position to the end.
std::optional<std::size_t> found = search(hint_iterator, ticks.size());
if (!found) {
// Search from beginning to the hinted position.
found = search(0, hint_iterator);
if (!found) {
// Both searches failed, the pool is full; handle it.
const std::size_t free_resource = ManageOverflow();
ticks[free_resource] = master_semaphore->CurrentTick();
found = free_resource;
}
}
// Free iterator is hinted to the resource after the one that's been commited.
hint_iterator = (*found + 1) % ticks.size();
return *found;
}
std::size_t ResourcePool::ManageOverflow() {
const std::size_t old_capacity = ticks.size();
Grow();
// The last entry is guaranted to be free, since it's the first element of the freshly
// allocated resources.
return old_capacity;
}
void ResourcePool::Grow() {
const size_t old_capacity = ticks.size();
ticks.resize(old_capacity + grow_step);
Allocate(old_capacity, old_capacity + grow_step);
}
constexpr size_t COMMAND_BUFFER_POOL_SIZE = 4;
struct CommandPool::Pool {
vk::CommandPool handle;
std::array<vk::CommandBuffer, COMMAND_BUFFER_POOL_SIZE> cmdbufs;
};
CommandPool::CommandPool(const Instance& instance, MasterSemaphore& master_semaphore)
: ResourcePool{master_semaphore, COMMAND_BUFFER_POOL_SIZE}, instance{instance} {}
CommandPool::~CommandPool() {
vk::Device device = instance.GetDevice();
for (Pool& pool : pools) {
device.destroyCommandPool(pool.handle);
}
}
void CommandPool::Allocate(std::size_t begin, std::size_t end) {
// Command buffers are going to be commited, recorded, executed every single usage cycle.
// They are also going to be reseted when commited.
Pool& pool = pools.emplace_back();
const vk::CommandPoolCreateInfo pool_create_info = {
.flags = vk::CommandPoolCreateFlagBits::eTransient |
vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
};
vk::Device device = instance.GetDevice();
pool.handle = device.createCommandPool(pool_create_info);
const vk::CommandBufferAllocateInfo buffer_alloc_info = {.commandPool = pool.handle,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount = COMMAND_BUFFER_POOL_SIZE};
auto buffers = device.allocateCommandBuffers(buffer_alloc_info);
std::copy(buffers.begin(), buffers.end(), pool.cmdbufs.begin());
}
vk::CommandBuffer CommandPool::Commit() {
const std::size_t index = CommitResource();
const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
return pools[pool_index].cmdbufs[sub_index];
}
DescriptorPool::DescriptorPool(const Instance& instance, MasterSemaphore& master_semaphore)
: ResourcePool{master_semaphore, 1}, instance{instance} {}
DescriptorPool::~DescriptorPool() {
vk::Device device = instance.GetDevice();
for (vk::DescriptorPool& pool : pools) {
device.destroyDescriptorPool(pool);
}
}
void DescriptorPool::RefreshTick() {
ticks[pool_index] = master_semaphore->CurrentTick();
}
void DescriptorPool::Allocate(std::size_t begin, std::size_t end) {
vk::DescriptorPool& pool = pools.emplace_back();
// Choose a sane pool size good for most games
static constexpr std::array<vk::DescriptorPoolSize, 5> pool_sizes = {{
{vk::DescriptorType::eUniformBuffer, 2048},
{vk::DescriptorType::eSampledImage, 4096},
{vk::DescriptorType::eSampler, 4096},
{vk::DescriptorType::eUniformTexelBuffer, 2048},
{vk::DescriptorType::eStorageImage, 1024}}};
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
.maxSets = 8192,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data()};
pool = instance.GetDevice().createDescriptorPool(descriptor_pool_info);
}
vk::DescriptorPool DescriptorPool::Commit() {
pool_index = CommitResource();
instance.GetDevice().resetDescriptorPool(pools[pool_index]);
return pools[pool_index];
}
} // namespace Vulkan

View File

@ -0,0 +1,84 @@
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <vector>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
class MasterSemaphore;
/**
* Handles a pool of resources protected by fences. Manages resource overflow allocating more
* resources.
*/
class ResourcePool {
public:
explicit ResourcePool() = default;
explicit ResourcePool(MasterSemaphore& master_semaphore, std::size_t grow_step);
virtual ~ResourcePool() = default;
ResourcePool& operator=(ResourcePool&&) noexcept = default;
ResourcePool(ResourcePool&&) noexcept = default;
ResourcePool& operator=(const ResourcePool&) = default;
ResourcePool(const ResourcePool&) = default;
protected:
std::size_t CommitResource();
/// Called when a chunk of resources have to be allocated.
virtual void Allocate(std::size_t begin, std::size_t end) = 0;
private:
/// Manages pool overflow allocating new resources.
std::size_t ManageOverflow();
/// Allocates a new page of resources.
void Grow();
protected:
MasterSemaphore* master_semaphore{nullptr};
std::size_t grow_step = 0; ///< Number of new resources created after an overflow
std::size_t hint_iterator = 0; ///< Hint to where the next free resources is likely to be found
std::vector<u64> ticks; ///< Ticks for each resource
};
class CommandPool final : public ResourcePool {
public:
explicit CommandPool(const Instance& instance, MasterSemaphore& master_semaphore);
~CommandPool() override;
void Allocate(std::size_t begin, std::size_t end) override;
vk::CommandBuffer Commit();
private:
struct Pool;
const Instance& instance;
std::vector<Pool> pools;
};
class DescriptorPool final : public ResourcePool {
public:
explicit DescriptorPool(const Instance& instance, MasterSemaphore& master_semaphore);
~DescriptorPool() override;
/// Refreshes the tick of the currently commited pool
void RefreshTick();
void Allocate(std::size_t begin, std::size_t end) override;
vk::DescriptorPool Commit();
private:
const Instance& instance;
std::vector<vk::DescriptorPool> pools;
std::size_t pool_index;
};
} // namespace Vulkan

View File

@ -0,0 +1,180 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <mutex>
#include <utility>
#include "common/microprofile.h"
#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
namespace Vulkan {
void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
auto command = first;
while (command != nullptr) {
auto next = command->GetNext();
command->Execute(render_cmdbuf, upload_cmdbuf);
command->~Command();
command = next;
}
submit = false;
command_offset = 0;
first = nullptr;
last = nullptr;
}
Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer)
: instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore} {
AcquireNewChunk();
AllocateWorkerCommandBuffers();
worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
}
Scheduler::~Scheduler() = default;
void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
SubmitExecution(signal, wait);
}
void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {
const u64 presubmit_tick = CurrentTick();
SubmitExecution(signal, wait);
WaitWorker();
Wait(presubmit_tick);
}
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
void Scheduler::WaitWorker() {
MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
DispatchWork();
std::unique_lock lock{work_mutex};
wait_cv.wait(lock, [this] { return work_queue.empty(); });
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
return;
}
{
std::scoped_lock lock{work_mutex};
work_queue.push(std::move(chunk));
}
work_cv.notify_one();
AcquireNewChunk();
}
void Scheduler::WorkerThread(std::stop_token stop_token) {
do {
std::unique_ptr<CommandChunk> work;
bool has_submit{false};
{
std::unique_lock lock{work_mutex};
if (work_queue.empty()) {
wait_cv.notify_all();
}
work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
if (stop_token.stop_requested()) {
continue;
}
work = std::move(work_queue.front());
work_queue.pop();
has_submit = work->HasSubmit();
work->ExecuteAll(render_cmdbuf, upload_cmdbuf);
}
if (has_submit) {
AllocateWorkerCommandBuffers();
}
std::scoped_lock reserve_lock{reserve_mutex};
chunk_reserve.push_back(std::move(work));
} while (!stop_token.stop_requested());
}
void Scheduler::AllocateWorkerCommandBuffers() {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
};
upload_cmdbuf = command_pool.Commit();
upload_cmdbuf.begin(begin_info);
render_cmdbuf = command_pool.Commit();
render_cmdbuf.begin(begin_info);
}
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
renderer.FlushBuffers();
const u64 signal_value = master_semaphore.NextTick();
state = StateFlags::AllDirty;
Record([signal_semaphore, wait_semaphore, signal_value, this]
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
MICROPROFILE_SCOPE(Vulkan_Submit);
upload_cmdbuf.end();
render_cmdbuf.end();
const vk::Semaphore timeline_semaphore = master_semaphore.Handle();
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = num_wait_semaphores,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = num_signal_semaphores,
.pSignalSemaphoreValues = signal_values.data(),
};
const std::array cmdbuffers = {upload_cmdbuf, render_cmdbuf};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = num_wait_semaphores,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = 2,
.pCommandBuffers = cmdbuffers.data(),
.signalSemaphoreCount = num_signal_semaphores,
.pSignalSemaphores = signal_semaphores.data(),
};
try {
vk::Queue queue = instance.GetGraphicsQueue();
queue.submit(submit_info);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
}
});
chunk->MarkSubmit();
DispatchWork();
}
void Scheduler::AcquireNewChunk() {
std::scoped_lock lock{reserve_mutex};
if (chunk_reserve.empty()) {
chunk = std::make_unique<CommandChunk>();
return;
}
chunk = std::move(chunk_reserve.back());
chunk_reserve.pop_back();
}
} // namespace Vulkan

View File

@ -0,0 +1,208 @@
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <condition_variable>
#include <cstddef>
#include <memory>
#include <thread>
#include <utility>
#include <queue>
#include "common/alignment.h"
#include "common/common_types.h"
#include "common/common_funcs.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
namespace Vulkan {
enum class StateFlags {
AllDirty = 0,
Renderpass = 1 << 0,
Pipeline = 1 << 1,
DescriptorSets = 1 << 2
};
DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
class Instance;
class RendererVulkan;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Instance& instance, RendererVulkan& renderer);
~Scheduler();
/// Sends the current execution context to the GPU.
void Flush(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
void Finish(vk::Semaphore signal = nullptr, vk::Semaphore wait = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
void WaitWorker();
/// Sends currently recorded work to the worker thread.
void DispatchWork();
/// Records the command to the current chunk.
template <typename T>
void Record(T&& command) {
if (chunk->Record(command)) {
return;
}
DispatchWork();
(void)chunk->Record(command);
}
/// Marks the provided state as non dirty
void MarkStateNonDirty(StateFlags flag) noexcept {
state |= flag;
}
/// Returns true if the state is dirty
[[nodiscard]] bool IsStateDirty(StateFlags flag) const noexcept {
return False(state & flag);
}
/// Returns the current command buffer tick.
[[nodiscard]] u64 CurrentTick() const noexcept {
return master_semaphore.CurrentTick();
}
/// Returns true when a tick has been triggered by the GPU.
[[nodiscard]] bool IsFree(u64 tick) const noexcept {
return master_semaphore.IsFree(tick);
}
/// Waits for the given tick to trigger on the GPU.
void Wait(u64 tick) {
if (tick >= master_semaphore.CurrentTick()) {
// Make sure we are not waiting for the current tick without signalling
Flush();
}
master_semaphore.Wait(tick);
}
/// Returns the master timeline semaphore.
[[nodiscard]] MasterSemaphore& GetMasterSemaphore() noexcept {
return master_semaphore;
}
private:
class Command {
public:
virtual ~Command() = default;
virtual void Execute(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) const = 0;
Command* GetNext() const {
return next;
}
void SetNext(Command* next_) {
next = next_;
}
private:
Command* next = nullptr;
};
template <typename T>
class TypedCommand final : public Command {
public:
explicit TypedCommand(T&& command_) : command{std::move(command_)} {}
~TypedCommand() override = default;
TypedCommand(TypedCommand&&) = delete;
TypedCommand& operator=(TypedCommand&&) = delete;
void Execute(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) const override {
command(render_cmdbuf, upload_cmdbuf);
}
private:
T command;
};
class CommandChunk final {
public:
void ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf);
template <typename T>
bool Record(T& command) {
using FuncType = TypedCommand<T>;
static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
recorded_counts++;
command_offset = Common::AlignUp(command_offset, alignof(FuncType));
if (command_offset > sizeof(data) - sizeof(FuncType)) {
return false;
}
Command* const current_last = last;
last = new (data.data() + command_offset) FuncType(std::move(command));
if (current_last) {
current_last->SetNext(last);
} else {
first = last;
}
command_offset += sizeof(FuncType);
return true;
}
void MarkSubmit() {
submit = true;
}
bool Empty() const {
return recorded_counts == 0;
}
bool HasSubmit() const {
return submit;
}
private:
Command* first = nullptr;
Command* last = nullptr;
std::size_t recorded_counts = 0;
std::size_t command_offset = 0;
bool submit = false;
alignas(std::max_align_t) std::array<u8, 0x80000> data{};
};
private:
void WorkerThread(std::stop_token stop_token);
void AllocateWorkerCommandBuffers();
void SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore);
void AcquireNewChunk();
private:
const Instance& instance;
RendererVulkan& renderer;
MasterSemaphore master_semaphore;
CommandPool command_pool;
std::unique_ptr<CommandChunk> chunk;
std::queue<std::unique_ptr<CommandChunk>> work_queue;
std::vector<std::unique_ptr<CommandChunk>> chunk_reserve;
vk::CommandBuffer render_cmdbuf;
vk::CommandBuffer upload_cmdbuf;
StateFlags state{};
std::mutex reserve_mutex;
std::mutex work_mutex;
std::condition_variable_any work_cv;
std::condition_variable wait_cv;
std::jthread worker_thread;
std::jthread prsent_thread;
};
} // namespace Vulkan

View File

@ -7,7 +7,7 @@
#include <glslang/Public/ShaderLang.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_shader.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {

View File

@ -8,7 +8,7 @@
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include <vk_mem_alloc.h>
@ -69,17 +69,16 @@ StagingBuffer::~StagingBuffer() {
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
}
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
bool readback)
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
staging{instance, total_size, readback}, bucket_size{size} {}
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback},
total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {}
StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
StreamBuffer::StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
vk::BufferUsageFlagBits usage, std::span<const vk::Format> view_formats,
bool readback)
: instance{instance}, scheduler{scheduler}, total_size{size * SCHEDULER_COMMAND_COUNT},
staging{instance, total_size, readback}, usage{usage}, bucket_size{size} {
: instance{instance}, scheduler{scheduler}, staging{instance, size, readback},
usage{usage}, total_size{size}, bucket_size{size / BUCKET_COUNT}, readback{readback} {
const vk::BufferCreateInfo buffer_info = {
.size = total_size, .usage = usage | vk::BufferUsageFlagBits::eTransferDst};
@ -94,14 +93,14 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u
vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info, &unsafe_buffer, &allocation,
&alloc_info);
buffer = vk::Buffer{unsafe_buffer};
gpu_buffer = vk::Buffer{unsafe_buffer};
ASSERT(view_formats.size() < MAX_BUFFER_VIEWS);
vk::Device device = instance.GetDevice();
for (std::size_t i = 0; i < view_formats.size(); i++) {
const vk::BufferViewCreateInfo view_info = {
.buffer = buffer, .format = view_formats[i], .offset = 0, .range = total_size};
.buffer = gpu_buffer, .format = view_formats[i], .offset = 0, .range = total_size};
views[i] = device.createBufferView(view_info);
}
@ -110,9 +109,9 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u
}
StreamBuffer::~StreamBuffer() {
if (buffer) {
if (gpu_buffer) {
vk::Device device = instance.GetDevice();
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(buffer), allocation);
vmaDestroyBuffer(instance.GetAllocator(), static_cast<VkBuffer>(gpu_buffer), allocation);
for (std::size_t i = 0; i < view_count; i++) {
device.destroyBufferView(views[i]);
}
@ -121,92 +120,103 @@ StreamBuffer::~StreamBuffer() {
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
ASSERT(size <= total_size && alignment <= total_size);
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
auto& bucket = buckets[current_bucket];
Bucket& bucket = buckets[bucket_index];
if (alignment > 0) {
bucket.offset = Common::AlignUp(bucket.offset, alignment);
bucket.cursor = Common::AlignUp(bucket.cursor, alignment);
}
if (bucket.offset + size > bucket_size) {
UNREACHABLE();
// If we reach bucket boundaries move over to the next one
if (bucket.cursor + size > bucket_size) {
bucket.gpu_tick = scheduler.CurrentTick();
MoveNextBucket();
return Map(size, alignment);
}
bool invalidate = false;
if (bucket.invalid) {
invalidate = true;
bucket.invalid = false;
}
const u32 buffer_offset = current_bucket * bucket_size + bucket.offset;
const bool invalidate = std::exchange(bucket.invalid, false);
const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
return std::make_tuple(mapped, buffer_offset, invalidate);
}
void StreamBuffer::Commit(u32 size) {
buckets[scheduler.GetCurrentSlotIndex()].offset += size;
buckets[bucket_index].cursor += size;
}
void StreamBuffer::Flush() {
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
const u32 flush_start = current_bucket * bucket_size;
const u32 flush_size = buckets[current_bucket].offset;
if (readback) {
LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
return;
}
Bucket& bucket = buckets[bucket_index];
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
ASSERT(flush_size <= bucket_size);
if (flush_size > 0) [[likely]] {
// Ensure all staging writes are visible to the host memory domain
VmaAllocator allocator = instance.GetAllocator();
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
if (gpu_buffer) {
scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
const vk::BufferCopy copy_region = {
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
// Make the data available to the GPU if possible
if (buffer) {
const vk::BufferCopy copy_region = {
.srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};
upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = access_mask,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = gpu_buffer,
.offset = flush_start,
.size = flush_size};
// Add pipeline barrier for the flushed region
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
const vk::BufferMemoryBarrier buffer_barrier = {
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
.dstAccessMask = access_mask,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = buffer,
.offset = flush_start,
.size = flush_size};
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
{});
upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
{});
});
}
bucket.flush_cursor += flush_size;
}
SwitchBucket();
}
void StreamBuffer::Invalidate() {
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
const u32 flush_start = current_bucket * bucket_size;
const u32 flush_size = buckets[current_bucket].offset;
if (!readback) {
return;
}
Bucket& bucket = buckets[bucket_index];
const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
const u32 flush_size = bucket.cursor - bucket.flush_cursor;
ASSERT(flush_size <= bucket_size);
if (flush_size > 0) [[likely]] {
// Ensure the staging memory can be read by the host
VmaAllocator allocator = instance.GetAllocator();
vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
bucket.flush_cursor += flush_size;
}
SwitchBucket();
}
void StreamBuffer::SwitchBucket() {
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
buckets[next_bucket].offset = 0;
buckets[next_bucket].invalid = true;
void StreamBuffer::MoveNextBucket() {
// Flush and Invalidate are bucket local operations for simplicity so perform them here
if (readback) {
Invalidate();
} else {
Flush();
}
bucket_index = (bucket_index + 1) % BUCKET_COUNT;
Bucket& next_bucket = buckets[bucket_index];
scheduler.Wait(next_bucket.gpu_tick);
next_bucket.cursor = 0;
next_bucket.flush_cursor = 0;
next_bucket.invalid = true;
}
} // namespace Vulkan

View File

@ -15,9 +15,7 @@ VK_DEFINE_HANDLE(VmaAllocation)
namespace Vulkan {
class Instance;
class TaskScheduler;
constexpr u32 MAX_BUFFER_VIEWS = 3;
class Scheduler;
struct StagingBuffer {
StagingBuffer(const Instance& instance, u32 size, bool readback);
@ -30,12 +28,14 @@ struct StagingBuffer {
};
class StreamBuffer {
static constexpr u32 MAX_BUFFER_VIEWS = 3;
static constexpr u32 BUCKET_COUNT = 8;
public:
/// Staging only constructor
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
bool readback = false);
/// Staging + GPU streaming constructor
StreamBuffer(const Instance& instance, TaskScheduler& scheduler, u32 size,
StreamBuffer(const Instance& instance, Scheduler& scheduler, u32 size,
vk::BufferUsageFlagBits usage, std::span<const vk::Format> views,
bool readback = false);
~StreamBuffer();
@ -55,45 +55,47 @@ public:
/// Invalidates staging memory for reading
void Invalidate();
/// Switches to the next available bucket
void SwitchBucket();
/// Returns the GPU buffer handle
vk::Buffer GetHandle() const {
return buffer;
[[nodiscard]] vk::Buffer GetHandle() const {
return gpu_buffer;
}
/// Returns the staging buffer handle
vk::Buffer GetStagingHandle() const {
[[nodiscard]] vk::Buffer GetStagingHandle() const {
return staging.buffer;
}
/// Returns an immutable reference to the requested buffer view
const vk::BufferView& GetView(u32 index = 0) const {
[[nodiscard]] const vk::BufferView& GetView(u32 index = 0) const {
ASSERT(index < view_count);
return views[index];
}
private:
/// Moves to the next bucket
void MoveNextBucket();
struct Bucket {
bool invalid;
u32 fence_counter;
u32 offset;
bool invalid = false;
u32 gpu_tick = 0;
u32 cursor = 0;
u32 flush_cursor = 0;
};
private:
const Instance& instance;
TaskScheduler& scheduler;
u32 total_size = 0;
Scheduler& scheduler;
StagingBuffer staging;
vk::Buffer buffer{};
vk::Buffer gpu_buffer{};
VmaAllocation allocation{};
vk::BufferUsageFlagBits usage;
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
std::array<Bucket, BUCKET_COUNT> buckets;
std::size_t view_count = 0;
u32 total_size = 0;
u32 bucket_size = 0;
std::array<Bucket, SCHEDULER_COMMAND_COUNT> buckets{};
u32 bucket_index = 0;
bool readback = false;
};
} // namespace Vulkan

View File

@ -8,34 +8,35 @@
#include "core/settings.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
namespace Vulkan {
Swapchain::Swapchain(const Instance& instance, RenderpassCache& renderpass_cache)
: instance{instance}, renderpass_cache{renderpass_cache}, surface{instance.GetSurface()} {
// Set the surface format early for RenderpassCache to create the present renderpass
Configure(0, 0);
Swapchain::Swapchain(const Instance& instance, Scheduler& scheduler, RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
surface{instance.GetSurface()} {
FindPresentFormat();
SetPresentMode();
renderpass_cache.CreatePresentRenderpass(surface_format.format);
}
Swapchain::~Swapchain() {
vk::Device device = instance.GetDevice();
device.destroySwapchainKHR(swapchain);
Destroy();
for (auto& image : swapchain_images) {
device.destroyImageView(image.image_view);
device.destroyFramebuffer(image.framebuffer);
vk::Device device = instance.GetDevice();
for (const vk::Semaphore semaphore : image_acquired) {
device.destroySemaphore(semaphore);
}
for (const vk::Semaphore semaphore : present_ready) {
device.destroySemaphore(semaphore);
}
}
void Swapchain::Create(u32 width, u32 height) {
is_outdated = false;
is_suboptimal = false;
// Fetch information about the provided surface
Configure(width, height);
SetSurfaceProperties(width, height);
const std::array queue_family_indices = {
instance.GetGraphicsQueueFamilyIndex(),
@ -59,70 +60,29 @@ void Swapchain::Create(u32 width, u32 height) {
.pQueueFamilyIndices = queue_family_indices.data(),
.preTransform = transform,
.presentMode = present_mode,
.clipped = true,
.oldSwapchain = swapchain};
.clipped = true};
vk::Device device = instance.GetDevice();
vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);
device.waitIdle();
Destroy();
// If an old swapchain exists, destroy it and move the new one to its place.
if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
device.destroySwapchainKHR(old_swapchain);
}
swapchain = device.createSwapchainKHR(swapchain_info);
SetupImages();
auto images = device.getSwapchainImagesKHR(swapchain);
// Destroy the previous image views
for (auto& image : swapchain_images) {
device.destroyImageView(image.image_view);
device.destroyFramebuffer(image.framebuffer);
}
swapchain_images.clear();
swapchain_images.resize(images.size());
std::transform(
images.begin(), images.end(), swapchain_images.begin(), [device, this](vk::Image image) -> Image {
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = vk::ImageViewType::e2D,
.format = surface_format.format,
.subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1}};
vk::ImageView image_view = device.createImageView(view_info);
const std::array attachments = {image_view};
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
.attachmentCount = 1,
.pAttachments = attachments.data(),
.width = extent.width,
.height = extent.height,
.layers = 1};
vk::Framebuffer framebuffer = device.createFramebuffer(framebuffer_info);
return Image{.image = image, .image_view = image_view, .framebuffer = framebuffer};
});
resource_ticks.clear();
resource_ticks.resize(image_count);
}
// Wait for maximum of 1 second
constexpr u64 ACQUIRE_TIMEOUT = 1000000000;
MICROPROFILE_DEFINE(Vulkan_Acquire, "Vulkan", "Swapchain Acquire", MP_RGB(185, 66, 245));
void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
void Swapchain::AcquireNextImage() {
if (NeedsRecreation()) [[unlikely]] {
return;
}
MICROPROFILE_SCOPE(Vulkan_Acquire);
vk::Device device = instance.GetDevice();
vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT, signal_acquired,
VK_NULL_HANDLE, &current_image);
vk::Result result = device.acquireNextImageKHR(swapchain, UINT64_MAX, image_acquired[frame_index],
VK_NULL_HANDLE, &image_index);
switch (result) {
case vk::Result::eSuccess:
break;
@ -133,42 +93,46 @@ void Swapchain::AcquireNextImage(vk::Semaphore signal_acquired) {
is_outdated = true;
break;
default:
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result {}", result);
break;
}
scheduler.Wait(resource_ticks[image_index]);
resource_ticks[image_index] = scheduler.CurrentTick();
}
MICROPROFILE_DEFINE(Vulkan_Present, "Vulkan", "Swapchain Present", MP_RGB(66, 185, 245));
void Swapchain::Present(vk::Semaphore wait_for_present) {
void Swapchain::Present() {
if (NeedsRecreation()) [[unlikely]] {
return;
}
MICROPROFILE_SCOPE(Vulkan_Present);
const vk::PresentInfoKHR present_info = {.waitSemaphoreCount = 1,
.pWaitSemaphores = &wait_for_present,
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &current_image};
scheduler.Record([this, index = image_index](vk::CommandBuffer, vk::CommandBuffer) {
const vk::PresentInfoKHR present_info = {.waitSemaphoreCount = 1,
.pWaitSemaphores = &present_ready[index],
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &index};
vk::Queue present_queue = instance.GetPresentQueue();
try {
[[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info);
} catch (vk::OutOfDateKHRError err) {
is_outdated = true;
} catch (vk::SystemError err) {
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
UNREACHABLE();
}
vk::Queue present_queue = instance.GetPresentQueue();
try {
[[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info);
} catch (vk::OutOfDateKHRError& err) {
is_outdated = true;
} catch (vk::SystemError& err) {
LOG_CRITICAL(Render_Vulkan, "Swapchain presentation failed");
UNREACHABLE();
}
});
frame_index = (frame_index + 1) % image_count;
}
void Swapchain::Configure(u32 width, u32 height) {
vk::PhysicalDevice physical = instance.GetPhysicalDevice();
void Swapchain::FindPresentFormat() {
const std::vector<vk::SurfaceFormatKHR> formats =
instance.GetPhysicalDevice().getSurfaceFormatsKHR(surface);
// Choose surface format
auto formats = physical.getSurfaceFormatsKHR(surface);
surface_format = formats[0];
if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
surface_format.format = vk::Format::eB8G8R8A8Unorm;
} else {
@ -179,17 +143,19 @@ void Swapchain::Configure(u32 width, u32 height) {
if (it == formats.end()) {
LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
UNREACHABLE();
} else {
surface_format = *it;
}
}
}
// Checks if a particular mode is supported, if it is, returns that mode.
auto modes = physical.getSurfacePresentModesKHR(surface);
// FIFO is guaranteed by the Vulkan standard to be available
void Swapchain::SetPresentMode() {
present_mode = vk::PresentModeKHR::eFifo;
if (!Settings::values.use_vsync_new) {
const std::vector<vk::PresentModeKHR> modes =
instance.GetPhysicalDevice().getSurfacePresentModesKHR(surface);
const auto FindMode = [&modes](vk::PresentModeKHR requested) {
auto it =
std::find_if(modes.begin(), modes.end(),
@ -198,7 +164,7 @@ void Swapchain::Configure(u32 width, u32 height) {
return it != modes.end();
};
// Prefer Immediate when vsync is disabled for fastest acquire
// Prefer immediate when vsync is disabled for fastest acquire
if (FindMode(vk::PresentModeKHR::eImmediate)) {
present_mode = vk::PresentModeKHR::eImmediate;
} else if (FindMode(vk::PresentModeKHR::eMailbox)) {
@ -206,15 +172,18 @@ void Swapchain::Configure(u32 width, u32 height) {
}
}
// Query surface extent
auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
}
void Swapchain::SetSurfaceProperties(u32 width, u32 height) {
const vk::SurfaceCapabilitiesKHR capabilities =
instance.GetPhysicalDevice().getSurfaceCapabilitiesKHR(surface);
extent = capabilities.currentExtent;
if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
extent.width =
std::clamp(width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width);
extent.height = std::clamp(height, capabilities.minImageExtent.height,
capabilities.maxImageExtent.height);
extent.height =
std::clamp(height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height);
}
// Select number of images in swap chain, we prefer one buffer in the background to work on
@ -230,4 +199,54 @@ void Swapchain::Configure(u32 width, u32 height) {
}
}
void Swapchain::Destroy() {
vk::Device device = instance.GetDevice();
if (swapchain) {
device.destroySwapchainKHR(swapchain);
}
for (const vk::ImageView view : image_views) {
device.destroyImageView(view);
}
for (const vk::Framebuffer framebuffer : framebuffers) {
device.destroyFramebuffer(framebuffer);
}
frame_index = 0;
image_acquired.clear();
framebuffers.clear();
image_views.clear();
}
void Swapchain::SetupImages() {
vk::Device device = instance.GetDevice();
images = device.getSwapchainImagesKHR(swapchain);
for (const vk::Image image : images) {
image_acquired.push_back(device.createSemaphore({}));
present_ready.push_back(device.createSemaphore({}));
const vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = vk::ImageViewType::e2D,
.format = surface_format.format,
.subresourceRange = {.aspectMask = vk::ImageAspectFlagBits::eColor,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1}};
image_views.push_back(device.createImageView(view_info));
const vk::FramebufferCreateInfo framebuffer_info = {
.renderPass = renderpass_cache.GetPresentRenderpass(),
.attachmentCount = 1,
.pAttachments = &image_views.back(),
.width = extent.width,
.height = extent.height,
.layers = 1};
framebuffers.push_back(device.createFramebuffer(framebuffer_info));
}
}
} // namespace Vulkan

View File

@ -11,77 +11,97 @@
namespace Vulkan {
class Instance;
class Scheduler;
class RenderpassCache;
class Swapchain {
public:
Swapchain(const Instance& instance, RenderpassCache& renderpass_cache);
Swapchain(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache);
~Swapchain();
/// Creates (or recreates) the swapchain with a given size.
void Create(u32 width, u32 height);
/// Acquires the next image in the swapchain.
void AcquireNextImage(vk::Semaphore signal_acquired);
void AcquireNextImage();
/// Presents the current image and move to the next one
void Present(vk::Semaphore wait_for_present);
void Present();
/// Returns true when the swapchain should be recreated
[[nodiscard]] bool NeedsRecreation() const {
return is_suboptimal || is_outdated;
}
/// Returns current swapchain state
vk::Extent2D GetExtent() const {
[[nodiscard]] vk::Extent2D GetExtent() const {
return extent;
}
/// Returns the swapchain surface
vk::SurfaceKHR GetSurface() const {
[[nodiscard]] vk::SurfaceKHR GetSurface() const {
return surface;
}
/// Returns the current framebuffe
vk::Framebuffer GetFramebuffer() const {
return swapchain_images[current_image].framebuffer;
[[nodiscard]] vk::Framebuffer GetFramebuffer() const {
return framebuffers[frame_index];
}
/// Returns the swapchain format
vk::SurfaceFormatKHR GetSurfaceFormat() const {
[[nodiscard]] vk::SurfaceFormatKHR GetSurfaceFormat() const {
return surface_format;
}
/// Returns the Vulkan swapchain handle
vk::SwapchainKHR GetHandle() const {
[[nodiscard]] vk::SwapchainKHR GetHandle() const {
return swapchain;
}
/// Returns true when the swapchain should be recreated
bool NeedsRecreation() const {
return is_suboptimal || is_outdated;
[[nodiscard]] vk::Semaphore GetImageAcquiredSemaphore() const {
return image_acquired[frame_index];
}
[[nodiscard]] vk::Semaphore GetPresentReadySemaphore() const {
return present_ready[image_index];
}
private:
void Configure(u32 width, u32 height);
/// Selects the best available swapchain image format
void FindPresentFormat();
/// Sets the best available present mode
void SetPresentMode();
/// Sets the surface properties according to device capabilities
void SetSurfaceProperties(u32 width, u32 height);
/// Destroys current swapchain resources
void Destroy();
/// Performs creation of image views and framebuffers from the swapchain images
void SetupImages();
private:
const Instance& instance;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
vk::SwapchainKHR swapchain{};
vk::SurfaceKHR surface{};
// Swapchain properties
vk::SurfaceFormatKHR surface_format;
vk::PresentModeKHR present_mode;
vk::Extent2D extent;
vk::SurfaceTransformFlagBitsKHR transform;
u32 image_count;
struct Image {
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
};
// Swapchain state
std::vector<Image> swapchain_images;
u32 current_image = 0;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> image_acquired;
std::vector<vk::Semaphore> present_ready;
u32 image_count = 0;
u32 image_index = 0;
u32 frame_index = 0;
bool is_outdated = true;
bool is_suboptimal = true;
};

View File

@ -1,254 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer)
: instance{instance}, renderer{renderer} {
vk::Device device = instance.GetDevice();
const vk::CommandPoolCreateInfo command_pool_info = {
.flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()};
command_pool = device.createCommandPool(command_pool_info);
// If supported, prefer timeline semaphores over binary ones
if (instance.IsTimelineSemaphoreSupported()) {
const vk::StructureChain timeline_info = {
vk::SemaphoreCreateInfo{},
vk::SemaphoreTypeCreateInfo{.semaphoreType = vk::SemaphoreType::eTimeline,
.initialValue = 0}};
timeline = device.createSemaphore(timeline_info.get());
}
constexpr std::array pool_sizes = {
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformBufferDynamic, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eSampledImage, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eSampler, 4096},
vk::DescriptorPoolSize{vk::DescriptorType::eUniformTexelBuffer, 2048},
vk::DescriptorPoolSize{vk::DescriptorType::eStorageImage, 1024}};
const vk::DescriptorPoolCreateInfo descriptor_pool_info = {
.maxSets = 8192,
.poolSizeCount = static_cast<u32>(pool_sizes.size()),
.pPoolSizes = pool_sizes.data()};
const vk::CommandBufferAllocateInfo buffer_info = {.commandPool = command_pool,
.level = vk::CommandBufferLevel::ePrimary,
.commandBufferCount =
2 * SCHEDULER_COMMAND_COUNT};
const auto command_buffers = device.allocateCommandBuffers(buffer_info);
for (std::size_t i = 0; i < commands.size(); i++) {
commands[i] = ExecutionSlot{
.image_acquired = device.createSemaphore({}),
.present_ready = device.createSemaphore({}),
.fence = device.createFence({}),
.descriptor_pool = device.createDescriptorPool(descriptor_pool_info),
.render_command_buffer = command_buffers[2 * i],
.upload_command_buffer = command_buffers[2 * i + 1],
};
}
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit};
// Begin first command
auto& command = commands[current_command];
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
}
TaskScheduler::~TaskScheduler() {
vk::Device device = instance.GetDevice();
device.waitIdle();
if (timeline) {
device.destroySemaphore(timeline);
}
for (const auto& command : commands) {
device.destroyFence(command.fence);
device.destroySemaphore(command.image_acquired);
device.destroySemaphore(command.present_ready);
device.destroyDescriptorPool(command.descriptor_pool);
}
device.destroyCommandPool(command_pool);
}
MICROPROFILE_DEFINE(Vulkan_Synchronize, "Vulkan", "Scheduler Synchronize", MP_RGB(100, 52, 235));
void TaskScheduler::Synchronize(u32 slot) {
const auto& command = commands[slot];
vk::Device device = instance.GetDevice();
const u64 completed_counter = GetFenceCounter();
if (command.fence_counter > completed_counter) {
MICROPROFILE_SCOPE(Vulkan_Synchronize);
if (instance.IsTimelineSemaphoreSupported()) {
const vk::SemaphoreWaitInfo wait_info = {
.semaphoreCount = 1, .pSemaphores = &timeline, .pValues = &command.fence_counter};
if (device.waitSemaphores(wait_info, UINT64_MAX) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!",
command.fence_counter);
UNREACHABLE();
}
} else if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
LOG_ERROR(Render_Vulkan, "Waiting for fence counter {} failed!", command.fence_counter);
UNREACHABLE();
}
completed_fence_counter = command.fence_counter;
}
device.resetFences(command.fence);
device.resetDescriptorPool(command.descriptor_pool);
}
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Scheduler Queue Submit", MP_RGB(66, 245, 170));
void TaskScheduler::Submit(SubmitMode mode) {
if (False(mode & SubmitMode::Shutdown)) {
renderer.FlushBuffers();
}
const auto& command = commands[current_command];
command.render_command_buffer.end();
if (command.use_upload_buffer) {
command.upload_command_buffer.end();
}
u32 command_buffer_count = 0;
std::array<vk::CommandBuffer, 2> command_buffers;
if (command.use_upload_buffer) {
command_buffers[command_buffer_count++] = command.upload_command_buffer;
}
command_buffers[command_buffer_count++] = command.render_command_buffer;
const auto QueueSubmit = [this](const vk::SubmitInfo& info, vk::Fence fence) {
MICROPROFILE_SCOPE(Vulkan_Submit);
try {
vk::Queue queue = instance.GetGraphicsQueue();
queue.submit(info, fence);
} catch (vk::DeviceLostError& err) {
LOG_CRITICAL(Render_Vulkan, "Device lost during submit: {}", err.what());
UNREACHABLE();
}
};
const bool swapchain_sync = True(mode & SubmitMode::SwapchainSynced);
if (instance.IsTimelineSemaphoreSupported()) {
const u32 wait_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array wait_values{command.fence_counter - 1, u64(1)};
const std::array wait_semaphores{timeline, command.image_acquired};
const u32 signal_semaphore_count = swapchain_sync ? 2u : 1u;
const std::array signal_values{command.fence_counter, u64(0)};
const std::array signal_semaphores{timeline, command.present_ready};
const vk::TimelineSemaphoreSubmitInfoKHR timeline_si = {
.waitSemaphoreValueCount = wait_semaphore_count,
.pWaitSemaphoreValues = wait_values.data(),
.signalSemaphoreValueCount = signal_semaphore_count,
.pSignalSemaphoreValues = signal_values.data()};
const std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eColorAttachmentOutput,
};
const vk::SubmitInfo submit_info = {
.pNext = &timeline_si,
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = wait_semaphores.data(),
.pWaitDstStageMask = wait_stage_masks.data(),
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = signal_semaphores.data(),
};
QueueSubmit(submit_info, command.fence);
} else {
const u32 signal_semaphore_count = swapchain_sync ? 1u : 0u;
const u32 wait_semaphore_count = swapchain_sync ? 1u : 0u;
const vk::PipelineStageFlags wait_stage_masks =
vk::PipelineStageFlagBits::eColorAttachmentOutput;
const vk::SubmitInfo submit_info = {
.waitSemaphoreCount = wait_semaphore_count,
.pWaitSemaphores = &command.image_acquired,
.pWaitDstStageMask = &wait_stage_masks,
.commandBufferCount = command_buffer_count,
.pCommandBuffers = command_buffers.data(),
.signalSemaphoreCount = signal_semaphore_count,
.pSignalSemaphores = &command.present_ready,
};
QueueSubmit(submit_info, command.fence);
}
// Block host until the GPU catches up
if (True(mode & SubmitMode::Flush)) {
Synchronize(current_command);
}
// Switch to next cmdbuffer.
if (False(mode & SubmitMode::Shutdown)) {
SwitchSlot();
renderer.OnSlotSwitch();
}
}
u64 TaskScheduler::GetFenceCounter() const {
vk::Device device = instance.GetDevice();
if (instance.IsTimelineSemaphoreSupported()) {
return device.getSemaphoreCounterValue(timeline);
}
return completed_fence_counter;
}
vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
auto& command = commands[current_command];
if (!command.use_upload_buffer) {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit};
command.upload_command_buffer.begin(begin_info);
command.use_upload_buffer = true;
}
return command.upload_command_buffer;
}
void TaskScheduler::SwitchSlot() {
current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
auto& command = commands[current_command];
// Wait for the GPU to finish with all resources for this command.
Synchronize(current_command);
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit};
// Begin the next command buffer.
command.render_command_buffer.begin(begin_info);
command.fence_counter = next_fence_counter++;
command.use_upload_buffer = false;
}
} // namespace Vulkan

View File

@ -1,100 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <functional>
#include <memory>
#include <thread>
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Buffer;
class Instance;
class RendererVulkan;
enum class SubmitMode : u8 {
SwapchainSynced = 1 << 0, ///< Synchronizes command buffer execution with the swapchain
Flush = 1 << 1, ///< Causes a GPU command flush, useful for texture downloads
Shutdown = 1 << 2 ///< Submits all current commands without starting a new command buffer
};
DECLARE_ENUM_FLAG_OPERATORS(SubmitMode);
class TaskScheduler {
public:
TaskScheduler(const Instance& instance, RendererVulkan& renderer);
~TaskScheduler();
/// Blocks the host until the current command completes execution
void Synchronize(u32 slot);
/// Submits the current command to the graphics queue
void Submit(SubmitMode mode);
/// Returns the last completed fence counter
u64 GetFenceCounter() const;
/// Returns the command buffer used for early upload operations.
vk::CommandBuffer GetUploadCommandBuffer();
/// Returns the command buffer used for rendering
vk::CommandBuffer GetRenderCommandBuffer() const {
return commands[current_command].render_command_buffer;
}
/// Returns the current descriptor pool
vk::DescriptorPool GetDescriptorPool() const {
return commands[current_command].descriptor_pool;
}
/// Returns the index of the current command slot
u32 GetCurrentSlotIndex() const {
return current_command;
}
u64 GetHostFenceCounter() const {
return next_fence_counter - 1;
}
vk::Semaphore GetImageAcquiredSemaphore() const {
return commands[current_command].image_acquired;
}
vk::Semaphore GetPresentReadySemaphore() const {
return commands[current_command].present_ready;
}
private:
/// Activates the next command slot and optionally waits for its completion
void SwitchSlot();
private:
const Instance& instance;
RendererVulkan& renderer;
u64 next_fence_counter = 1;
u64 completed_fence_counter = 0;
struct ExecutionSlot {
bool use_upload_buffer = false;
u64 fence_counter = 0;
vk::Semaphore image_acquired;
vk::Semaphore present_ready;
vk::Fence fence;
vk::DescriptorPool descriptor_pool;
vk::CommandBuffer render_command_buffer;
vk::CommandBuffer upload_command_buffer;
};
vk::CommandPool command_pool{};
vk::Semaphore timeline{};
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
u32 current_command = 0;
};
} // namespace Vulkan

View File

@ -8,7 +8,7 @@
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include <vk_mem_alloc.h>
@ -33,13 +33,40 @@ vk::ImageAspectFlags ToVkAspect(VideoCore::SurfaceType type) {
return vk::ImageAspectFlagBits::eColor;
}
u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
u32 depth_offset = 0;
u32 stencil_offset = 4 * data.size / 5;
const auto& mapped = data.mapped;
switch (dest) {
case vk::Format::eD24UnormS8Uint: {
for (; stencil_offset < data.size; depth_offset += 4) {
std::byte* ptr = mapped.data() + depth_offset;
const u32 d24s8 = VideoCore::MakeInt<u32>(ptr);
const u32 d24 = d24s8 >> 8;
mapped[stencil_offset] = static_cast<std::byte>(d24s8 & 0xFF);
std::memcpy(ptr, &d24, 4);
stencil_offset++;
}
break;
}
default:
LOG_ERROR(Render_Vulkan, "Unimplemtend convertion for depth format {}",
vk::to_string(dest));
UNREACHABLE();
}
ASSERT(depth_offset == 4 * data.size / 5);
return depth_offset;
}
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache},
blit_helper{instance, scheduler}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE},
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager},
blit_helper{instance, scheduler, desc_manager}, upload_buffer{instance, scheduler, UPLOAD_BUFFER_SIZE},
download_buffer{instance, scheduler, DOWNLOAD_BUFFER_SIZE, true} {
auto Register = [this](VideoCore::PixelFormat dest,
@ -49,7 +76,7 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule
};
Register(VideoCore::PixelFormat::RGBA8,
std::make_unique<D24S8toRGBA8>(instance, scheduler, *this));
std::make_unique<D24S8toRGBA8>(instance, scheduler, desc_manager, *this));
}
TextureRuntime::~TextureRuntime() {
@ -98,7 +125,7 @@ MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192,
void TextureRuntime::Finish() {
MICROPROFILE_SCOPE(Vulkan_Finish);
renderpass_cache.ExitRenderpass();
scheduler.Submit(SubmitMode::Flush);
scheduler.Finish();
download_buffer.Invalidate();
}
@ -295,36 +322,33 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
vk::ClearDepthStencilValue{.depth = value.depth, .stencil = value.stencil};
}
// For full clears we can use vkCmdClearColorImage/vkCmdClearDepthStencilImage
if (clear.texture_rect == surface.GetScaledRect()) {
const vk::ImageSubresourceRange range = {.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1};
scheduler.Record(
[aspect, image = surface.alloc.image, clear_value, clear](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageSubresourceRange range = {.aspectMask = aspect,
.baseMipLevel = clear.texture_level,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
if (aspect & vk::ImageAspectFlagBits::eColor) {
command_buffer.clearColorImage(surface.alloc.image,
vk::ImageLayout::eTransferDstOptimal, clear_value.color,
range);
} else if (aspect & vk::ImageAspectFlagBits::eDepth ||
aspect & vk::ImageAspectFlagBits::eStencil) {
command_buffer.clearDepthStencilImage(surface.alloc.image,
vk::ImageLayout::eTransferDstOptimal,
clear_value.depthStencil, range);
}
if (aspect & vk::ImageAspectFlagBits::eColor) {
render_cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear_value.color,
range);
} else if (aspect & vk::ImageAspectFlagBits::eDepth ||
aspect & vk::ImageAspectFlagBits::eStencil) {
render_cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal,
clear_value.depthStencil, range);
}
});
} else {
// For partial clears we begin a clear renderpass with the appropriate render area
vk::RenderPass clear_renderpass{};
vk::RenderPass clear_renderpass;
if (aspect & vk::ImageAspectFlagBits::eColor) {
clear_renderpass = renderpass_cache.GetRenderpass(
surface.pixel_format, VideoCore::PixelFormat::Invalid, true);
surface.Transition(vk::ImageLayout::eColorAttachmentOptimal, 0, 1);
} else if (aspect & vk::ImageAspectFlagBits::eDepth ||
aspect & vk::ImageAspectFlagBits::eStencil) {
clear_renderpass = renderpass_cache.GetRenderpass(VideoCore::PixelFormat::Invalid,
surface.pixel_format, true);
} else if (aspect & vk::ImageAspectFlagBits::eDepth) {
clear_renderpass = renderpass_cache.GetRenderpass(
VideoCore::PixelFormat::Invalid, surface.pixel_format, true);
surface.Transition(vk::ImageLayout::eDepthStencilAttachmentOptimal, 0, 1);
}
@ -344,17 +368,17 @@ bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClea
it->second = device.createFramebuffer(framebuffer_info);
}
const vk::RenderPassBeginInfo clear_begin_info = {
.renderPass = clear_renderpass,
const RenderpassState clear_info = {
.renderpass = clear_renderpass,
.framebuffer = it->second,
.renderArea = vk::Rect2D{.offset = {static_cast<s32>(clear.texture_rect.left),
static_cast<s32>(clear.texture_rect.bottom)},
.extent = {clear.texture_rect.GetWidth(),
clear.texture_rect.GetHeight()}},
.clearValueCount = 1,
.pClearValues = &clear_value};
.render_area = vk::Rect2D{.offset = {static_cast<s32>(clear.texture_rect.left),
static_cast<s32>(clear.texture_rect.bottom)},
.extent = {clear.texture_rect.GetWidth(),
clear.texture_rect.GetHeight()}},
.clear = clear_value
};
renderpass_cache.EnterRenderpass(clear_begin_info);
renderpass_cache.EnterRenderpass(clear_info);
renderpass_cache.ExitRenderpass();
}
@ -368,22 +392,24 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
source.Transition(vk::ImageLayout::eTransferSrcOptimal, copy.src_level, 1);
dest.Transition(vk::ImageLayout::eTransferDstOptimal, copy.dst_level, 1);
const vk::ImageCopy image_copy = {
.srcSubresource = {.aspectMask = ToVkAspect(source.type),
.mipLevel = copy.src_level,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
.dstSubresource = {.aspectMask = ToVkAspect(dest.type),
.mipLevel = copy.dst_level,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
.extent = {copy.extent.width, copy.extent.height, 1}};
scheduler.Record([src_image = source.alloc.image, src_type = source.type,
dst_image = dest.alloc.image, dst_type = dest.type, copy](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::ImageCopy image_copy = {
.srcSubresource = {.aspectMask = ToVkAspect(src_type),
.mipLevel = copy.src_level,
.baseArrayLayer = 0,
.layerCount = 1},
.srcOffset = {static_cast<s32>(copy.src_offset.x), static_cast<s32>(copy.src_offset.y), 0},
.dstSubresource = {.aspectMask = ToVkAspect(dst_type),
.mipLevel = copy.dst_level,
.baseArrayLayer = 0,
.layerCount = 1},
.dstOffset = {static_cast<s32>(copy.dst_offset.x), static_cast<s32>(copy.dst_offset.y), 0},
.extent = {copy.extent.width, copy.extent.height, 1}};
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
render_cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal,
dst_image, vk::ImageLayout::eTransferDstOptimal, image_copy);
});
return true;
}
@ -395,45 +421,47 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
source.Transition(vk::ImageLayout::eTransferSrcOptimal, blit.src_level, 1);
dest.Transition(vk::ImageLayout::eTransferDstOptimal, blit.dst_level, 1);
const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left),
static_cast<s32>(blit.src_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.src_rect.right),
static_cast<s32>(blit.src_rect.top), 1}};
scheduler.Record([src_iamge = source.alloc.image, src_type = source.type,
dst_image = dest.alloc.image, dst_type = dest.type,
format = source.pixel_format, blit](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const std::array source_offsets = {vk::Offset3D{static_cast<s32>(blit.src_rect.left),
static_cast<s32>(blit.src_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.src_rect.right),
static_cast<s32>(blit.src_rect.top), 1}};
const std::array dest_offsets = {vk::Offset3D{static_cast<s32>(blit.dst_rect.left),
static_cast<s32>(blit.dst_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.dst_rect.right),
static_cast<s32>(blit.dst_rect.top), 1}};
const std::array dest_offsets = {vk::Offset3D{static_cast<s32>(blit.dst_rect.left),
static_cast<s32>(blit.dst_rect.bottom), 0},
vk::Offset3D{static_cast<s32>(blit.dst_rect.right),
static_cast<s32>(blit.dst_rect.top), 1}};
const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = ToVkAspect(source.type),
.mipLevel = blit.src_level,
.baseArrayLayer = blit.src_layer,
.layerCount = 1},
.srcOffsets = source_offsets,
.dstSubresource = {.aspectMask = ToVkAspect(dest.type),
.mipLevel = blit.dst_level,
.baseArrayLayer = blit.dst_layer,
.layerCount = 1},
.dstOffsets = dest_offsets};
const vk::ImageBlit blit_area = {.srcSubresource = {.aspectMask = ToVkAspect(src_type),
.mipLevel = blit.src_level,
.baseArrayLayer = blit.src_layer,
.layerCount = 1},
.srcOffsets = source_offsets,
.dstSubresource = {.aspectMask = ToVkAspect(dst_type),
.mipLevel = blit.dst_level,
.baseArrayLayer = blit.dst_layer,
.layerCount = 1},
.dstOffsets = dest_offsets};
// Don't use linear filtering on depth attachments
const VideoCore::PixelFormat format = source.pixel_format;
const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 ||
format == VideoCore::PixelFormat::D24 ||
format == VideoCore::PixelFormat::D16
? vk::Filter::eNearest
: vk::Filter::eLinear;
// Don't use linear filtering on depth attachments
const vk::Filter filtering = format == VideoCore::PixelFormat::D24S8 ||
format == VideoCore::PixelFormat::D24 ||
format == VideoCore::PixelFormat::D16
? vk::Filter::eNearest
: vk::Filter::eLinear;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.blitImage(source.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
dest.alloc.image, vk::ImageLayout::eTransferDstOptimal, blit_area,
filtering);
render_cmdbuf.blitImage(src_iamge, vk::ImageLayout::eTransferSrcOptimal,
dst_image, vk::ImageLayout::eTransferDstOptimal, blit_area,
filtering);
});
return true;
}
void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
renderpass_cache.ExitRenderpass();
/*renderpass_cache.ExitRenderpass();
// TODO: Investigate AMD single pass downsampler
s32 current_width = surface.GetScaledWidth();
@ -467,7 +495,7 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
command_buffer.blitImage(surface.alloc.image, vk::ImageLayout::eTransferSrcOptimal,
surface.alloc.image, vk::ImageLayout::eTransferDstOptimal,
blit_area, vk::Filter::eLinear);
}
}*/
}
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(
@ -483,8 +511,7 @@ bool TextureRuntime::NeedsConvertion(VideoCore::PixelFormat format) const {
!traits.attachment_support);
}
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count) {
void TextureRuntime::Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count) {
LayoutTracker& tracker = alloc.tracker;
if (tracker.IsRangeEqual(new_layout, level, level_count) || !alloc.image) {
return;
@ -566,22 +593,26 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
LayoutInfo dest = GetLayoutInfo(new_layout);
tracker.ForEachLayoutRange(
level, level_count, new_layout, [&](u32 start, u32 count, vk::ImageLayout old_layout) {
scheduler.Record([old_layout, new_layout, dest, start, count,
image = alloc.image, aspect = alloc.aspect,
layers = alloc.layers, GetLayoutInfo](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
LayoutInfo source = GetLayoutInfo(old_layout);
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = alloc.image,
.subresourceRange = {.aspectMask = alloc.aspect,
.baseMipLevel = start,
.levelCount = count,
.baseArrayLayer = 0,
.layerCount = alloc.layers}};
const vk::ImageMemoryBarrier barrier = {
.srcAccessMask = source.access,
.dstAccessMask = dest.access,
.oldLayout = old_layout,
.newLayout = new_layout,
.image = image,
.subresourceRange = {.aspectMask = aspect,
.baseMipLevel = start,
.levelCount = count,
.baseArrayLayer = 0,
.layerCount = layers}};
command_buffer.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
});
render_cmdbuf.pipelineBarrier(source.stage, dest.stage,
vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
});
});
tracker.SetLayout(new_layout, level, level_count);
for (u32 i = 0; i < level_count; i++) {
@ -625,8 +656,7 @@ Surface::~Surface() {
}
void Surface::Transition(vk::ImageLayout new_layout, u32 level, u32 level_count) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, alloc, new_layout, level, level_count);
runtime.Transition(alloc, new_layout, level, level_count);
}
MICROPROFILE_DEFINE(Vulkan_Upload, "VulkanSurface", "Texture Upload", MP_RGB(128, 192, 64));
@ -644,40 +674,40 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
if (is_scaled) {
ScaledUpload(upload, staging);
} else {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
const VideoCore::Rect2D rect = upload.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset + upload.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = alloc.aspect,
.mipLevel = upload.texture_level,
.baseArrayLayer = 0,
.layerCount = 1},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
if (alloc.aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (alloc.aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (alloc.aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += UnpackDepthStencil(staging);
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
Transition(vk::ImageLayout::eTransferDstOptimal, upload.texture_level, 1);
scheduler.Record([aspect = alloc.aspect, image = alloc.image,
format = alloc.format, staging, upload](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
u32 region_count = 0;
std::array<vk::BufferImageCopy, 2> copy_regions;
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyBufferToImage(staging.buffer, alloc.image,
vk::ImageLayout::eTransferDstOptimal, region_count,
copy_regions.data());
const VideoCore::Rect2D rect = upload.texture_rect;
vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset + upload.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = aspect,
.mipLevel = upload.texture_level,
.baseArrayLayer = 0,
.layerCount = 1},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
if (aspect & vk::ImageAspectFlagBits::eColor) {
copy_regions[region_count++] = copy_region;
} else if (aspect & vk::ImageAspectFlagBits::eDepth) {
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
copy_regions[region_count++] = copy_region;
if (aspect & vk::ImageAspectFlagBits::eStencil) {
copy_region.bufferOffset += UnpackDepthStencil(staging, format);
copy_region.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
copy_regions[region_count++] = copy_region;
}
}
render_cmdbuf.copyBufferToImage(staging.buffer, image, vk::ImageLayout::eTransferDstOptimal,
region_count, copy_regions.data());
});
}
InvalidateAllWatcher();
@ -703,24 +733,24 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
if (is_scaled) {
ScaledDownload(download, staging);
} else {
const VideoCore::Rect2D rect = download.texture_rect;
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset + download.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = alloc.aspect,
.mipLevel = download.texture_level,
.baseArrayLayer = 0,
.layerCount = 1},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
Transition(vk::ImageLayout::eTransferSrcOptimal, download.texture_level, 1);
scheduler.Record([aspect = alloc.aspect, image = alloc.image,
staging, download](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer){
const VideoCore::Rect2D rect = download.texture_rect;
const vk::BufferImageCopy copy_region = {
.bufferOffset = staging.buffer_offset + download.buffer_offset,
.bufferRowLength = rect.GetWidth(),
.bufferImageHeight = rect.GetHeight(),
.imageSubresource = {.aspectMask = aspect,
.mipLevel = download.texture_level,
.baseArrayLayer = 0,
.layerCount = 1},
.imageOffset = {static_cast<s32>(rect.left), static_cast<s32>(rect.bottom), 0},
.imageExtent = {rect.GetWidth(), rect.GetHeight(), 1}};
// Copy pixel data to the staging buffer
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
command_buffer.copyImageToBuffer(alloc.image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, copy_region);
render_cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
staging.buffer, copy_region);
});
}
// Lock this data until the next scheduler switch
@ -852,31 +882,4 @@ void Surface::DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
r32_surface.Download(r32_download, staging);
}
u32 Surface::UnpackDepthStencil(const StagingData& data) {
u32 depth_offset = 0;
u32 stencil_offset = 4 * data.size / 5;
const auto& mapped = data.mapped;
switch (alloc.format) {
case vk::Format::eD24UnormS8Uint: {
for (; stencil_offset < data.size; depth_offset += 4) {
std::byte* ptr = mapped.data() + depth_offset;
const u32 d24s8 = VideoCore::MakeInt<u32>(ptr);
const u32 d24 = d24s8 >> 8;
mapped[stencil_offset] = static_cast<std::byte>(d24s8 & 0xFF);
std::memcpy(ptr, &d24, 4);
stencil_offset++;
}
break;
}
default:
LOG_ERROR(Render_Vulkan, "Unimplemtend convertion for depth format {}",
vk::to_string(alloc.format));
UNREACHABLE();
}
ASSERT(depth_offset == 4 * data.size / 5);
return depth_offset;
}
} // namespace Vulkan

View File

@ -14,7 +14,6 @@
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_layout_tracker.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
namespace Vulkan {
@ -78,6 +77,7 @@ namespace Vulkan {
class Instance;
class RenderpassCache;
class DescriptorManager;
class Surface;
/**
@ -88,10 +88,16 @@ class TextureRuntime {
friend class Surface;
public:
TextureRuntime(const Instance& instance, TaskScheduler& scheduler,
RenderpassCache& renderpass_cache);
TextureRuntime(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager);
~TextureRuntime();
/// Causes a GPU command flush
void Finish();
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, ImageAlloc&& alloc);
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
@ -104,22 +110,12 @@ public:
VideoCore::TextureType type, vk::Format format,
vk::ImageUsageFlags usage);
/// Flushes staging buffers
void FlushBuffers();
/// Causes a GPU command flush
void Finish();
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, ImageAlloc&& alloc);
/// Performs required format convertions on the staging data
void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
std::span<std::byte> dest);
/// Transitions the mip level range of the surface to new_layout
void Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout,
u32 level, u32 level_count);
void Transition(ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
@ -134,6 +130,9 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface, u32 max_level);
/// Flushes staging buffers
void FlushBuffers();
/// Returns all source formats that support reinterpretation to the dest format
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
VideoCore::PixelFormat dest_format) const;
@ -148,14 +147,15 @@ private:
}
/// Returns the current Vulkan scheduler
TaskScheduler& GetScheduler() const {
Scheduler& GetScheduler() const {
return scheduler;
}
private:
const Instance& instance;
TaskScheduler& scheduler;
Scheduler& scheduler;
RenderpassCache& renderpass_cache;
DescriptorManager& desc_manager;
BlitHelper blit_helper;
StreamBuffer upload_buffer;
StreamBuffer download_buffer;
@ -235,13 +235,10 @@ private:
void DepthStencilDownload(const VideoCore::BufferTextureCopy& download,
const StagingData& staging);
/// Unpacks packed D24S8 data to facilitate depth upload
u32 UnpackDepthStencil(const StagingData& data);
private:
TextureRuntime& runtime;
const Instance& instance;
TaskScheduler& scheduler;
Scheduler& scheduler;
public:
ImageAlloc alloc;