renderer_vulkan: Add fallback path for VK_EXT_index_type_uint8

* Also remove some flush barriers
This commit is contained in:
GPUCode
2022-12-28 17:42:37 +02:00
parent ad45b9880d
commit 98e0ecf6a7
9 changed files with 108 additions and 119 deletions

View File

@ -117,9 +117,9 @@ void Config::ReadValues() {
Settings::values.graphics_api =
static_cast<Settings::GraphicsAPI>(sdl2_config->GetInteger("Renderer", "graphics_api", 2));
Settings::values.async_command_recording =
sdl2_config->GetBoolean("Renderer", "async_command_recording", false);
sdl2_config->GetBoolean("Renderer", "async_command_recording", true);
Settings::values.spirv_shader_gen = sdl2_config->GetBoolean("Renderer", "spirv_shader_gen", true);
Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "renderer_debug", true);
Settings::values.renderer_debug = sdl2_config->GetBoolean("Renderer", "renderer_debug", false);
Settings::values.use_hw_renderer = sdl2_config->GetBoolean("Renderer", "use_hw_renderer", true);
Settings::values.use_hw_shader = sdl2_config->GetBoolean("Renderer", "use_hw_shader", true);
Settings::values.shaders_accurate_mul =

View File

@ -888,7 +888,7 @@ void RendererVulkan::SwapBuffers() {
DrawScreens(layout, false);
renderpass_cache.ExitRenderpass();
/*renderpass_cache.ExitRenderpass();
scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::MemoryBarrier memory_write_barrier = {
@ -900,7 +900,7 @@ void RendererVulkan::SwapBuffers() {
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
});*/
const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();

View File

@ -164,7 +164,7 @@ Instance::Instance(Frontend::EmuWindow& window, u32 physical_device_index)
VULKAN_HPP_DEFAULT_DISPATCHER.init(vkGetInstanceProcAddr);
// Enable the instance extensions the backend uses
auto extensions = GetInstanceExtensions(window_info.type, false);
auto extensions = GetInstanceExtensions(window_info.type, enable_validation);
// Use required platform-specific flags
auto flags = GetInstanceFlags();
@ -350,12 +350,12 @@ bool Instance::CreateDevice() {
physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR,
vk::PhysicalDeviceCustomBorderColorFeaturesEXT>();
vk::PhysicalDeviceCustomBorderColorFeaturesEXT,
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>();
// Not having geometry shaders will cause issues with accelerated rendering.
const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
features = available;
if (!available.geometryShader) {
features = feature_chain.get().features;
if (!features.geometryShader) {
LOG_WARNING(Render_Vulkan,
"Geometry shaders not availabe! Accelerated rendering not possible!");
}
@ -390,6 +390,7 @@ bool Instance::CreateDevice() {
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
custom_border_color = AddExtension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
index_type_uint8 = AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
// Search queue families for graphics and present queues
auto family_properties = physical_device.getQueueFamilyProperties();
@ -446,21 +447,21 @@ bool Instance::CreateDevice() {
.ppEnabledExtensionNames = enabled_extensions.data(),
},
vk::PhysicalDeviceFeatures2{
.features = {.robustBufferAccess = available.robustBufferAccess,
.geometryShader = available.geometryShader,
.dualSrcBlend = available.dualSrcBlend,
.logicOp = available.logicOp,
.depthClamp = available.depthClamp,
.largePoints = available.largePoints,
.samplerAnisotropy = available.samplerAnisotropy,
.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
.shaderClipDistance = available.shaderClipDistance}},
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true},
//feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
.features = {.robustBufferAccess = features.robustBufferAccess,
.geometryShader = features.geometryShader,
.dualSrcBlend = features.dualSrcBlend,
.logicOp = features.logicOp,
.depthClamp = features.depthClamp,
.largePoints = features.largePoints,
.samplerAnisotropy = features.samplerAnisotropy,
.fragmentStoresAndAtomics = features.fragmentStoresAndAtomics,
.shaderStorageImageMultisample = features.shaderStorageImageMultisample,
.shaderClipDistance = features.shaderClipDistance}},
feature_chain.get<vk::PhysicalDeviceIndexTypeUint8FeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>(),
//feature_chain.get<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>()
};
feature_chain.get<vk::PhysicalDeviceCustomBorderColorFeaturesEXT>()
};
// Create logical device
try {

View File

@ -125,6 +125,11 @@ public:
return custom_border_color;
}
/// Returns true when VK_EXT_index_type_uint8 is supported
bool IsIndexTypeUint8Supported() const {
return index_type_uint8;
}
/// Returns the vendor ID of the physical device
u32 GetVendorID() const {
return properties.vendorID;
@ -210,12 +215,14 @@ private:
std::vector<std::string> available_extensions;
u32 present_queue_family_index{0};
u32 graphics_queue_family_index{0};
bool timeline_semaphores{false};
bool extended_dynamic_state{false};
bool push_descriptors{false};
bool custom_border_color{false};
bool enable_validation{false};
bool dump_command_buffers{false};
bool timeline_semaphores{};
bool extended_dynamic_state{};
bool push_descriptors{};
bool custom_border_color{};
bool index_type_uint8{};
bool enable_validation{};
bool dump_command_buffers{};
};
} // namespace Vulkan

View File

@ -5,6 +5,8 @@
#include <atomic>
#include <limits>
#include <deque>
#include <vector>
#include <thread>
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_common.h"

View File

@ -43,6 +43,12 @@ constexpr vk::ImageUsageFlags NULL_USAGE = vk::ImageUsageFlagBits::eSampled |
vk::ImageUsageFlagBits::eTransferDst;
constexpr vk::ImageUsageFlags NULL_STORAGE_USAGE = NULL_USAGE | vk::ImageUsageFlagBits::eStorage;
struct DrawParams {
u32 vertex_count;
s32 vertex_offset;
bool is_indexed;
};
RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instance& instance,
Scheduler& scheduler, DescriptorManager& desc_manager,
TextureRuntime& runtime, RenderpassCache& renderpass_cache)
@ -62,6 +68,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
texture_lf_buffer{instance, scheduler, TEXTURE_BUFFER_SIZE,
vk::BufferUsageFlagBits::eUniformTexelBuffer, TEXTURE_BUFFER_LF_FORMATS} {
vertex_buffers.fill(vertex_buffer.GetHandle());
uniform_buffer_alignment = instance.UniformMinAlignment();
uniform_size_aligned_vs =
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
@ -257,11 +265,9 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
SetupFixedAttribs();
// Bind the generated bindings
scheduler.Record([this, layout = pipeline_info.vertex_layout, offsets = binding_offsets](
scheduler.Record([this, vertex_offsets = binding_offsets](
vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
std::array<vk::Buffer, 16> buffers;
buffers.fill(vertex_buffer.GetHandle());
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(), offsets.data());
render_cmdbuf.bindVertexBuffers(0, vertex_buffers, vertex_offsets);
});
}
@ -373,6 +379,9 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
}
SetupVertexArray(vs_input_size, vs_input_index_min, vs_input_index_max);
if (is_indexed) {
SetupIndexArray();
}
if (!SetupVertexShader()) {
return false;
@ -385,42 +394,53 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology);
pipeline_cache.BindPipeline(pipeline_info);
if (is_indexed) {
bool index_u16 = regs.pipeline.index_array.format != 0;
const u32 index_buffer_size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
const DrawParams params = {
.vertex_count = regs.pipeline.num_vertices,
.vertex_offset = -static_cast<s32>(vs_input_index_min),
.is_indexed = is_indexed,
};
if (index_buffer_size > INDEX_BUFFER_SIZE) {
LOG_WARNING(Render_Vulkan, "Too large index input size {}", index_buffer_size);
return false;
scheduler.Record([params](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
if (params.is_indexed) {
render_cmdbuf.drawIndexed(params.vertex_count, 1, 0, params.vertex_offset, 0);
} else {
render_cmdbuf.draw(params.vertex_count, 1, 0, 0);
}
const u8* index_data = VideoCore::g_memory->GetPhysicalPointer(
regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
regs.pipeline.index_array.offset);
// Upload index buffer data to the GPU
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size);
std::memcpy(index_ptr, index_data, index_buffer_size);
index_buffer.Commit(index_buffer_size);
scheduler.Record([this, offset = index_offset, num_vertices = regs.pipeline.num_vertices,
index_u16, vertex_offset = vs_input_index_min](
vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::IndexType index_type =
index_u16 ? vk::IndexType::eUint16 : vk::IndexType::eUint8EXT;
render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), offset, index_type);
render_cmdbuf.drawIndexed(num_vertices, 1, 0, -vertex_offset, 0);
});
} else {
scheduler.Record([num_vertices = regs.pipeline.num_vertices](
vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.draw(num_vertices, 1, 0, 0);
});
}
});
return true;
}
void RasterizerVulkan::SetupIndexArray() {
const auto& regs = Pica::g_state.regs;
const bool index_u8 = regs.pipeline.index_array.format == 0;
const bool native_u8 = index_u8 && instance.IsIndexTypeUint8Supported();
const vk::IndexType index_type = native_u8 ? vk::IndexType::eUint8EXT : vk::IndexType::eUint16;
const u32 index_buffer_size = regs.pipeline.num_vertices * (native_u8 ? 1 : 2);
const u8* index_data = VideoCore::g_memory->GetPhysicalPointer(
regs.pipeline.vertex_attributes.GetPhysicalBaseAddress() +
regs.pipeline.index_array.offset);
auto [index_ptr, index_offset, _] = index_buffer.Map(index_buffer_size);
if (index_u8 && !native_u8) {
u16* index_ptr_u16 = reinterpret_cast<u16*>(index_ptr);
for (u32 i = 0; i < regs.pipeline.num_vertices; i++) {
index_ptr_u16[i] = index_data[i];
}
} else {
std::memcpy(index_ptr, index_data, index_buffer_size);
}
index_buffer.Commit(index_buffer_size);
scheduler.Record([this, index_offset = index_offset, index_type = index_type](
vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
render_cmdbuf.bindIndexBuffer(index_buffer.GetHandle(), index_offset, index_type);
});
}
void RasterizerVulkan::DrawTriangles() {
if (vertex_batch.empty()) {
return;
@ -707,20 +727,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
.clear = {},
};
renderpass_cache.ExitRenderpass();
scheduler.Record([](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
renderpass_cache.EnterRenderpass(renderpass_info);
// Draw the vertex batch
@ -769,11 +775,11 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface);
}
static int submit_threshold = 120;
static int submit_threshold = 80;
submit_threshold--;
if (!submit_threshold) {
submit_threshold = 120;
scheduler.Flush();
submit_threshold = 80;
scheduler.DispatchWork();
}
return succeeded;

View File

@ -156,6 +156,9 @@ private:
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
/// Setup index array for AccelerateDrawBatch
void SetupIndexArray();
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
@ -189,6 +192,7 @@ private:
VertexLayout software_layout;
std::array<u64, 16> binding_offsets{};
std::array<bool, 16> enable_attributes{};
std::array<vk::Buffer, 16> vertex_buffers;
vk::Sampler default_sampler;
Surface null_surface;
Surface null_storage_surface;

View File

@ -80,7 +80,7 @@ void Scheduler::WaitWorker() {
}
void Scheduler::DispatchWork() {
if (chunk->Empty()) {
if (!use_worker_thread || chunk->Empty()) {
return;
}
@ -133,26 +133,26 @@ void Scheduler::AllocateWorkerCommandBuffers() {
MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
renderer.FlushBuffers();
const auto handle = master_semaphore.Handle();
const u64 signal_value = master_semaphore.NextTick();
state = StateFlags::AllDirty;
renderer.FlushBuffers();
renderpass_cache.ExitRenderpass();
Record([signal_semaphore, wait_semaphore, signal_value, this](vk::CommandBuffer render_cmdbuf,
vk::CommandBuffer upload_cmdbuf) {
Record([signal_semaphore, wait_semaphore,
handle, signal_value, this](vk::CommandBuffer render_cmdbuf,
vk::CommandBuffer upload_cmdbuf) {
MICROPROFILE_SCOPE(Vulkan_Submit);
upload_cmdbuf.end();
render_cmdbuf.end();
const vk::Semaphore timeline_semaphore = master_semaphore.Handle();
const u32 num_signal_semaphores = signal_semaphore ? 2U : 1U;
const std::array signal_values{signal_value, u64(0)};
const std::array signal_semaphores{timeline_semaphore, signal_semaphore};
const std::array signal_semaphores{handle, signal_semaphore};
const u32 num_wait_semaphores = wait_semaphore ? 2U : 1U;
const std::array wait_values{signal_value - 1, u64(1)};
const std::array wait_semaphores{timeline_semaphore, wait_semaphore};
const std::array wait_semaphores{handle, wait_semaphore};
static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks = {
vk::PipelineStageFlagBits::eAllCommands,

View File

@ -663,16 +663,6 @@ bool TextureRuntime::CopyTextures(Surface& source, Surface& dest,
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, post_barriers);
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
return true;
@ -790,17 +780,6 @@ bool TextureRuntime::BlitTextures(Surface& source, Surface& dest,
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, write_barriers);
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
return true;
@ -983,16 +962,6 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion, {}, {}, write_barrier);
const vk::MemoryBarrier memory_write_barrier = {
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
};
render_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
vk::PipelineStageFlagBits::eAllCommands,
vk::DependencyFlagBits::eByRegion,
memory_write_barrier, {}, {});
});
runtime.upload_buffer.Commit(staging.size);