renderer_vulkan: Rewrite stream buffer + other fixes
* Emulate blend color and clip planes correctly * Don't hack the depth in the vertex shader, use VK_EXT_depth_clip_control for now to set the range to -1, 1 * Rewrite the stream buffer to remove flickering problems. The new implementation doesn't try to be smart about holding memory. It divides the allocation in SCHEDULER_COMMAND_COUNT buckets and automatically switches between them based on the current slot index
This commit is contained in:
@ -40,7 +40,7 @@ void Zero(T& o) {
|
||||
State::State() : geometry_pipeline(*this) {
|
||||
auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
|
||||
using Pica::Shader::OutputVertex;
|
||||
auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
|
||||
auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
|
||||
const OutputVertex& v2) {
|
||||
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
|
||||
};
|
||||
|
@ -151,7 +151,7 @@ struct ScreenRectVertex {
|
||||
Common::Vec2f tex_coord;
|
||||
};
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 64;
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192;
|
||||
|
||||
/**
|
||||
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
// Include vulkan-hpp header
|
||||
#define VK_NO_PROTOTYPES 1
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
@ -17,6 +19,8 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
|
||||
|
||||
/// Return the image aspect associated on the provided format
|
||||
constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
|
||||
switch (format) {
|
||||
|
@ -148,6 +148,7 @@ bool Instance::CreateDevice() {
|
||||
};
|
||||
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
|
||||
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
@ -159,27 +160,31 @@ bool Instance::CreateDevice() {
|
||||
return false;
|
||||
}
|
||||
|
||||
graphics_queue_family_index = -1;
|
||||
present_queue_family_index = -1;
|
||||
for (int i = 0; i < family_properties.size(); i++) {
|
||||
bool graphics_queue_found = false;
|
||||
bool present_queue_found = false;
|
||||
for (std::size_t i = 0; i < family_properties.size(); i++) {
|
||||
// Check if queue supports graphics
|
||||
const u32 index = static_cast<u32>(i);
|
||||
if (family_properties[i].queueFlags & vk::QueueFlagBits::eGraphics) {
|
||||
graphics_queue_family_index = i;
|
||||
graphics_queue_family_index = index;
|
||||
graphics_queue_found = true;
|
||||
|
||||
// If this queue also supports presentation we are finished
|
||||
if (physical_device.getSurfaceSupportKHR(i, surface)) {
|
||||
present_queue_family_index = i;
|
||||
if (physical_device.getSurfaceSupportKHR(static_cast<u32>(i), surface)) {
|
||||
present_queue_family_index = index;
|
||||
present_queue_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if queue supports presentation
|
||||
if (physical_device.getSurfaceSupportKHR(i, surface)) {
|
||||
present_queue_family_index = i;
|
||||
if (physical_device.getSurfaceSupportKHR(index, surface)) {
|
||||
present_queue_family_index = index;
|
||||
present_queue_found = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (graphics_queue_family_index == -1 || present_queue_family_index == -1) {
|
||||
if (!graphics_queue_found || !present_queue_found) {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
|
||||
return false;
|
||||
}
|
||||
@ -221,6 +226,9 @@ bool Instance::CreateDevice() {
|
||||
.shaderClipDistance = available.shaderClipDistance
|
||||
}
|
||||
},
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{
|
||||
.depthClipControl = true
|
||||
},
|
||||
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
|
||||
feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>()
|
||||
};
|
||||
|
@ -483,7 +483,12 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
.extent = {1, 1}
|
||||
};
|
||||
|
||||
vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = {
|
||||
.negativeOneToOne = true
|
||||
};
|
||||
|
||||
const vk::PipelineViewportStateCreateInfo viewport_info = {
|
||||
.pNext = &depth_clip_control,
|
||||
.viewportCount = 1,
|
||||
.pViewports = &viewport,
|
||||
.scissorCount = 1,
|
||||
@ -497,6 +502,7 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
|
||||
vk::DynamicState::eStencilCompareMask,
|
||||
vk::DynamicState::eStencilWriteMask,
|
||||
vk::DynamicState::eStencilReference,
|
||||
vk::DynamicState::eBlendConstants,
|
||||
// VK_EXT_extended_dynamic_state
|
||||
vk::DynamicState::eCullModeEXT,
|
||||
vk::DynamicState::eDepthCompareOpEXT,
|
||||
|
@ -859,6 +859,8 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
}
|
||||
}
|
||||
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
||||
vertex_batch.clear();
|
||||
|
||||
// Mark framebuffer surfaces as dirty
|
||||
@ -1631,7 +1633,7 @@ void RasterizerVulkan::SetShader() {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncClipEnabled() {
|
||||
//state.clip_distance[1] = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
||||
uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncClipCoef() {
|
||||
@ -1689,12 +1691,11 @@ void RasterizerVulkan::SyncBlendFuncs() {
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncBlendColor() {
|
||||
/*auto blend_color =
|
||||
PicaToGL::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
|
||||
state.blend.color.red = blend_color[0];
|
||||
state.blend.color.green = blend_color[1];
|
||||
state.blend.color.blue = blend_color[2];
|
||||
state.blend.color.alpha = blend_color[3];*/
|
||||
auto blend_color =
|
||||
PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
|
||||
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
command_buffer.setBlendConstants(blend_color.AsArray());
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncFogColor() {
|
||||
|
@ -55,6 +55,7 @@ layout (set = 0, binding = 1, std140) uniform shader_data {
|
||||
int proctex_diff_lut_offset;
|
||||
float proctex_bias;
|
||||
int shadow_texture_bias;
|
||||
bool enable_clip1;
|
||||
ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4];
|
||||
vec3 fog_color;
|
||||
vec2 proctex_noise_f;
|
||||
@ -89,9 +90,7 @@ static std::string GetVertexInterfaceDeclaration(bool is_output) {
|
||||
out += R"(
|
||||
out gl_PerVertex {
|
||||
vec4 gl_Position;
|
||||
#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
|
||||
float gl_ClipDistance[2];
|
||||
#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
|
||||
};
|
||||
)";
|
||||
}
|
||||
@ -1568,11 +1567,13 @@ void main() {
|
||||
normquat = vert_normquat;
|
||||
view = vert_view;
|
||||
gl_Position = vert_position;
|
||||
gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
|
||||
#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
|
||||
|
||||
gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
|
||||
gl_ClipDistance[1] = dot(clip_coef, vert_position);
|
||||
#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)
|
||||
if (enable_clip1) {
|
||||
gl_ClipDistance[1] = dot(clip_coef, vert_position);
|
||||
} else {
|
||||
gl_ClipDistance[1] = 0;
|
||||
}
|
||||
}
|
||||
)";
|
||||
|
||||
|
@ -106,8 +106,8 @@ StreamBuffer::StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
|
||||
views[i] = device.createBufferView(view_info);
|
||||
}
|
||||
|
||||
available_size = total_size;
|
||||
view_count = view_formats.size();
|
||||
bucket_size = size / SCHEDULER_COMMAND_COUNT;
|
||||
}
|
||||
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
@ -123,57 +123,38 @@ StreamBuffer::~StreamBuffer() {
|
||||
std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
||||
ASSERT(size <= total_size && alignment <= total_size);
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_offset = Common::AlignUp(buffer_offset, alignment);
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
auto& bucket = buckets[current_bucket];
|
||||
if (bucket.offset + size > bucket_size) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
// Have we run out of available space?
|
||||
bool invalidate = false;
|
||||
if (available_size < size) {
|
||||
// If we are at the end of the buffer, start over
|
||||
if (buffer_offset + size > total_size) {
|
||||
// Flush any pending writes before looping back
|
||||
Flush();
|
||||
|
||||
// Since new regions are discovered based on locks, insert a lock
|
||||
// that reaches the end of the buffer to avoid having an empty region
|
||||
const LockedRegion region = {
|
||||
.size = total_size - buffer_offset,
|
||||
.fence_counter = scheduler.GetFenceCounter()
|
||||
};
|
||||
|
||||
regions.emplace(buffer_offset, region);
|
||||
|
||||
Invalidate();
|
||||
invalidate = true;
|
||||
}
|
||||
|
||||
// Try to garbage collect old regions
|
||||
if (!UnlockFreeRegions(size)) {
|
||||
// Nuclear option: stall the GPU to remove all the locks
|
||||
LOG_WARNING(Render_Vulkan, "Buffer GPU stall");
|
||||
Invalidate();
|
||||
regions.clear();
|
||||
available_size = total_size;
|
||||
}
|
||||
if (bucket.invalid) {
|
||||
invalidate = true;
|
||||
bucket.invalid = false;
|
||||
}
|
||||
|
||||
const u32 buffer_offset = current_bucket * bucket_size + bucket.offset;
|
||||
u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
|
||||
return std::make_tuple(mapped, buffer_offset, invalidate);
|
||||
|
||||
}
|
||||
|
||||
void StreamBuffer::Commit(u32 size) {
|
||||
buffer_offset += size;
|
||||
available_size -= size;
|
||||
buckets[scheduler.GetCurrentSlotIndex()].offset += size;
|
||||
}
|
||||
|
||||
void StreamBuffer::Flush() {
|
||||
const u32 flush_size = buffer_offset - flush_start;
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
const u32 flush_size = buckets[current_bucket].offset;
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
|
||||
if (flush_size > 0) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
|
||||
const u32 flush_size = buffer_offset - flush_start;
|
||||
const u32 flush_start = current_bucket * bucket_size;
|
||||
const vk::BufferCopy copy_region = {
|
||||
.srcOffset = flush_start,
|
||||
.dstOffset = flush_start,
|
||||
@ -183,14 +164,6 @@ void StreamBuffer::Flush() {
|
||||
vmaFlushAllocation(allocator, allocation, flush_start, flush_size);
|
||||
command_buffer.copyBuffer(staging.buffer, buffer, copy_region);
|
||||
|
||||
// Lock the region
|
||||
const LockedRegion region = {
|
||||
.size = flush_size,
|
||||
.fence_counter = scheduler.GetFenceCounter()
|
||||
};
|
||||
|
||||
regions.emplace(flush_start, region);
|
||||
|
||||
// Add pipeline barrier for the flushed region
|
||||
auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
|
||||
const vk::BufferMemoryBarrier buffer_barrier = {
|
||||
@ -205,45 +178,17 @@ void StreamBuffer::Flush() {
|
||||
|
||||
command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
|
||||
vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});
|
||||
|
||||
flush_start = buffer_offset;
|
||||
}
|
||||
|
||||
// Reset the offset of the next bucket
|
||||
const u32 next_bucket = (current_bucket + 1) % SCHEDULER_COMMAND_COUNT;
|
||||
buckets[next_bucket].offset = 0;
|
||||
buckets[next_bucket].invalid = true;
|
||||
}
|
||||
|
||||
void StreamBuffer::Invalidate() {
|
||||
buffer_offset = 0;
|
||||
flush_start = 0;
|
||||
}
|
||||
|
||||
bool StreamBuffer::UnlockFreeRegions(u32 target_size) {
|
||||
available_size = 0;
|
||||
|
||||
// Free regions that don't need waiting
|
||||
auto it = regions.lower_bound(buffer_offset);
|
||||
while (it != regions.end()) {
|
||||
const auto& [offset, region] = *it;
|
||||
if (region.fence_counter <= scheduler.GetFenceCounter()) {
|
||||
available_size += region.size;
|
||||
it = regions.erase(it);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If that wasn't enough, try waiting for some fences
|
||||
while (it != regions.end() && available_size < target_size) {
|
||||
const auto& [offset, region] = *it;
|
||||
|
||||
if (region.fence_counter > scheduler.GetFenceCounter()) {
|
||||
scheduler.WaitFence(region.fence_counter);
|
||||
}
|
||||
|
||||
available_size += region.size;
|
||||
it = regions.erase(it);
|
||||
}
|
||||
|
||||
return available_size >= target_size;
|
||||
u32 StreamBuffer::GetBufferOffset() const {
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
return current_bucket * bucket_size + buckets[current_bucket].offset;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -34,7 +34,7 @@ struct StagingBuffer {
|
||||
class StreamBuffer {
|
||||
public:
|
||||
StreamBuffer(const Instance& instance, TaskScheduler& scheduler,
|
||||
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
|
||||
u32 size, vk::BufferUsageFlagBits usage, std::span<const vk::Format> views);
|
||||
~StreamBuffer();
|
||||
|
||||
std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
|
||||
@ -45,15 +45,14 @@ public:
|
||||
/// Flushes staging memory to the GPU buffer
|
||||
void Flush();
|
||||
|
||||
/// Returns the current buffer offset
|
||||
u32 GetBufferOffset() const;
|
||||
|
||||
/// Returns the Vulkan buffer handle
|
||||
vk::Buffer GetHandle() const {
|
||||
return buffer;
|
||||
}
|
||||
|
||||
u32 GetBufferOffset() const {
|
||||
return buffer_offset;
|
||||
}
|
||||
|
||||
/// Returns an immutable reference to the requested buffer view
|
||||
const vk::BufferView& GetView(u32 index = 0) const {
|
||||
ASSERT(index < view_count);
|
||||
@ -68,6 +67,12 @@ private:
|
||||
bool UnlockFreeRegions(u32 target_size);
|
||||
|
||||
private:
|
||||
struct Bucket {
|
||||
bool invalid;
|
||||
u32 fence_counter;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
StagingBuffer staging;
|
||||
@ -79,10 +84,8 @@ private:
|
||||
std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
|
||||
std::size_t view_count = 0;
|
||||
|
||||
u32 buffer_offset = 0;
|
||||
u32 flush_start = 0;
|
||||
s32 available_size = 0;
|
||||
std::map<u32, LockedRegion> regions;
|
||||
u32 bucket_size = 0;
|
||||
std::array<Bucket, SCHEDULER_COMMAND_COUNT> buckets{};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
@ -13,8 +13,6 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
constexpr u32 SCHEDULER_COMMAND_COUNT = 4;
|
||||
|
||||
class Buffer;
|
||||
class Instance;
|
||||
|
||||
|
@ -215,13 +215,11 @@ void TextureRuntime::FormatConvert(VideoCore::PixelFormat format, bool upload,
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::RGB8 && upload) {
|
||||
return Pica::Texture::ConvertBGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::D24S8 && !upload) {
|
||||
return; // HACK: Skip depth download
|
||||
} else if (instance.IsFormatSupported(ToVkFormat(format), feature)) {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented converion for format {}!", format);
|
||||
UNREACHABLE();
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,6 +53,7 @@ struct UniformData {
|
||||
int proctex_diff_lut_offset;
|
||||
float proctex_bias;
|
||||
int shadow_texture_bias;
|
||||
bool enable_clip1;
|
||||
alignas(16) Common::Vec4i lighting_lut_offset[LightingRegs::NumLightingSampler / 4];
|
||||
alignas(16) Common::Vec3f fog_color;
|
||||
alignas(8) Common::Vec2f proctex_noise_f;
|
||||
|
Reference in New Issue
Block a user