renderer_vulkan: Emulate 3-component vertex formats when unsupported

* This fixes the crashes on AMD
This commit is contained in:
GPUCode
2022-11-02 21:46:51 +02:00
parent 6a4ff8fa24
commit 6057b18172
14 changed files with 145 additions and 68 deletions

View File

@ -7,7 +7,7 @@ buildscript {
jcenter()
}
dependencies {
classpath 'com.android.tools.build:gradle:7.2.0'
classpath 'com.android.tools.build:gradle:7.3.1'
// NOTE: Do not place your application dependencies here; they belong
// in the individual module build.gradle files

View File

@ -181,7 +181,8 @@ static std::array<float, 3 * 2> MakeOrthographicMatrix(float width, float height
}
RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
: RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this},
: RendererBase{window}, instance{window, Settings::values.physical_device},
scheduler{instance, renderpass_cache, *this},
renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
runtime{instance, scheduler, renderpass_cache, desc_manager},
swapchain{instance, scheduler, renderpass_cache},
@ -919,7 +920,6 @@ void RendererVulkan::SwapBuffers() {
PrepareRendertarget();
const auto RecreateSwapchain = [&] {
renderpass_cache.ExitRenderpass();
scheduler.Finish();
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
swapchain.Create(layout.width, layout.height);

View File

@ -17,21 +17,21 @@
namespace Vulkan {
u32 AttribBytes(VertexAttribute attrib) {
switch (attrib.type) {
u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
switch (format) {
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
return sizeof(float) * attrib.size;
return sizeof(float) * size;
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
return sizeof(u16) * attrib.size;
return sizeof(u16) * size;
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
return sizeof(u8) * attrib.size;
return sizeof(u8) * size;
}
return 0;
}
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
vk::Format ToVkAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
constexpr std::array attribute_formats = {
std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint,
vk::Format::eR8G8B8A8Sint},
@ -42,8 +42,8 @@ vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
vk::Format::eR32G32B32A32Sfloat}};
ASSERT(attrib.size <= 4);
return attribute_formats[static_cast<u32>(attrib.type.Value())][attrib.size.Value() - 1];
ASSERT(size <= 4);
return attribute_formats[static_cast<u32>(format)][size - 1];
}
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
@ -62,6 +62,13 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
return vk::ShaderStageFlagBits::eVertex;
}
[[nodiscard]] bool IsAttribFormatSupported(const VertexAttribute& attrib, const Instance& instance) {
vk::PhysicalDevice physical_device = instance.GetPhysicalDevice();
const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
const vk::FormatFeatureFlags features = physical_device.getFormatProperties(format).bufferFeatures;
return (features & vk::FormatFeatureFlagBits::eVertexBuffer) == vk::FormatFeatureFlagBits::eVertexBuffer;
};
PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
: instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} {
@ -179,21 +186,26 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
desc_manager.BindDescriptorSets();
}
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
MICROPROFILE_SCOPE(Vulkan_VS);
PicaVSConfig config{regs.vs, setup};
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
for (u32 i = 0; i < layout.attribute_count; i++) {
const auto& attrib = layout.attributes[i];
config.state.attrib_types[attrib.location.Value()] = attrib.type.Value();
const u32 location = attrib.location.Value();
const bool is_supported = IsAttribFormatSupported(attrib, instance);
ASSERT(is_supported || attrib.size == 3);
config.state.attrib_types[location] = attrib.type.Value();
config.state.emulated_attrib_locations[location] =
is_supported ? 0 : emulated_attrib_loc++;
}
auto [handle, result] =
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
instance.GetDevice(), ShaderOptimization::Debug);
instance.GetDevice(), ShaderOptimization::High);
if (!handle) {
LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
return false;
@ -410,20 +422,36 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
: vk::VertexInputRate::eVertex};
}
// Populate vertex attribute structures
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
u32 emulated_attrib_count = 0;
std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES * 2> attributes;
for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
const auto& attr = info.vertex_layout.attributes[i];
attributes[i] = vk::VertexInputAttributeDescription{.location = attr.location,
.binding = attr.binding,
.format = ToVkAttributeFormat(attr),
.offset = attr.offset};
const VertexAttribute& attrib = info.vertex_layout.attributes[i];
const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
const bool is_supported = IsAttribFormatSupported(attrib, instance);
ASSERT_MSG(is_supported || attrib.size == 3);
attributes[i] = vk::VertexInputAttributeDescription{.location = attrib.location,
.binding = attrib.binding,
.format = is_supported ? format
: ToVkAttributeFormat(attrib.type, 2),
.offset = attrib.offset};
// When the requested 3-component vertex format is unsupported by the hardware
// is it emulated by breaking it into a vec2 + vec1. These are combined to a vec3
// by the vertex shader.
if (!is_supported) {
const u32 location = MAX_VERTEX_ATTRIBUTES + emulated_attrib_count++;
attributes[location] = vk::VertexInputAttributeDescription{.location = location,
.binding = attrib.binding,
.format = ToVkAttributeFormat(attrib.type, 1),
.offset = attrib.offset + AttribBytes(attrib.type, 2)};
}
}
const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
.vertexBindingDescriptionCount = info.vertex_layout.binding_count,
.pVertexBindingDescriptions = bindings.data(),
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
.vertexAttributeDescriptionCount = info.vertex_layout.attribute_count + emulated_attrib_count,
.pVertexAttributeDescriptions = attributes.data()};
const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {

View File

@ -126,7 +126,6 @@ class DescriptorManager;
/**
* Stores a collection of rasterizer pipelines used during rendering.
* In addition handles descriptor set management.
*/
class PipelineCache {
public:

View File

@ -109,7 +109,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
}
RasterizerVulkan::~RasterizerVulkan() {
renderpass_cache.ExitRenderpass();
scheduler.Finish();
vk::Device device = instance.GetDevice();
@ -178,21 +177,24 @@ void RasterizerVulkan::SyncFixedState() {
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
u32 vs_input_index_max) {
const u32 vertex_size = vs_input_size + sizeof(Common::Vec4f) * 16;
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vertex_size, 4);
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
// The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
// how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
// address containing the vertex array data. The data for each attribute loader (i) can be found
// by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
// as something analogous to Vulkan bindings. The user can store attributes in separate loaders
// or interleave them in the same loader.
/**
* The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
* how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
* address containing the vertex array data. The data for each attribute loader (i) can be found
* by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
* as something analogous to Vulkan bindings. The user can store attributes in separate loaders
* or interleave them in the same loader.
**/
const auto& regs = Pica::g_state.regs;
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE
std::array<bool, 16> enable_attributes{};
VertexLayout layout{};
VertexLayout& layout = pipeline_info.vertex_layout;
layout.attribute_count = 0;
layout.binding_count = 0;
enable_attributes.fill(false);
u32 buffer_offset = 0;
for (const auto& loader : vertex_attributes.attribute_loaders) {
@ -250,12 +252,33 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
buffer_offset += Common::AlignUp(data_size, 16);
}
array_ptr += buffer_offset;
binding_offsets[layout.binding_count] = array_offset + buffer_offset;
vertex_buffer.Commit(buffer_offset);
// Assign the rest of the attributes to the last binding
SetupFixedAttribs();
// Bind the generated bindings
scheduler.Record([this, layout = pipeline_info.vertex_layout,
offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
std::array<vk::Buffer, 16> buffers;
buffers.fill(vertex_buffer.GetHandle());
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
offsets.data());
});
}
void RasterizerVulkan::SetupFixedAttribs() {
const auto& regs = Pica::g_state.regs;
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
VertexLayout& layout = pipeline_info.vertex_layout;
auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
// Reserve the last binding for fixed and default attributes
// Place the default attrib at offset zero for easy access
const Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f);
std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
// Find all fixed attributes and assign them to the last binding
u32 offset = sizeof(Common::Vec4f);
@ -268,7 +291,7 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
attr.w.ToFloat32()};
const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
std::memcpy(array_ptr + offset, data.data(), data_size);
std::memcpy(fixed_ptr + offset, data.data(), data_size);
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
attribute.binding.Assign(layout.binding_count);
@ -299,26 +322,16 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
// Define the fixed+default binding
VertexBinding& binding = layout.bindings[layout.binding_count];
binding.binding.Assign(layout.binding_count);
binding.binding.Assign(layout.binding_count++);
binding.fixed.Assign(1);
binding.stride.Assign(offset);
binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
ASSERT(buffer_offset + offset <= vertex_size);
vertex_buffer.Commit(buffer_offset + offset);
// Update the pipeline vertex layout
pipeline_info.vertex_layout = layout;
scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
std::array<vk::Buffer, 16> buffers;
buffers.fill(vertex_buffer.GetHandle());
render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
offsets.data());
});
vertex_buffer.Commit(offset);
}
MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
bool RasterizerVulkan::SetupVertexShader() {
MICROPROFILE_SCOPE(Vulkan_VS);
return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs,
pipeline_info.vertex_layout);
}
@ -354,7 +367,7 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
const auto& regs = Pica::g_state.regs;
auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
if (vs_input_size > VERTEX_BUFFER_SIZE) {
LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size);

View File

@ -158,12 +158,12 @@ private:
/// Internal implementation for AccelerateDrawBatch
bool AccelerateDrawBatchInternal(bool is_indexed);
/// Copies vertex data performing needed convertions and casts
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
/// Setup vertex array for AccelerateDrawBatch
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
/// Setup the fixed attribute emulation in vulkan
void SetupFixedAttribs();
/// Setup vertex shader for AccelerateDrawBatch
bool SetupVertexShader();
@ -190,6 +190,7 @@ private:
VertexLayout software_layout;
std::array<u64, 16> binding_offsets{};
std::array<bool, 16> enable_attributes{};
vk::Sampler default_sampler;
Surface null_surface;
Surface null_storage_surface;

View File

@ -121,11 +121,12 @@ void DescriptorPool::RefreshTick() {
}
void DescriptorPool::Allocate(std::size_t begin, std::size_t end) {
LOG_INFO(Render_Vulkan, "Allocating new descriptor pool");
vk::DescriptorPool& pool = pools.emplace_back();
// Choose a sane pool size good for most games
static constexpr std::array<vk::DescriptorPoolSize, 5> pool_sizes = {{
{vk::DescriptorType::eUniformBuffer, 2048},
{vk::DescriptorType::eUniformBuffer, 4096},
{vk::DescriptorType::eSampledImage, 4096},
{vk::DescriptorType::eSampler, 4096},
{vk::DescriptorType::eUniformTexelBuffer, 2048},

View File

@ -25,9 +25,9 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co
last = nullptr;
}
Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer)
: instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore},
use_worker_thread{Settings::values.async_command_recording} {
Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, RendererVulkan& renderer)
: instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer}, master_semaphore{instance},
command_pool{instance, master_semaphore}, use_worker_thread{Settings::values.async_command_recording} {
AllocateWorkerCommandBuffers();
if (use_worker_thread) {
AcquireNewChunk();
@ -120,6 +120,7 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
const u64 signal_value = master_semaphore.NextTick();
state = StateFlags::AllDirty;
renderpass_cache.ExitRenderpass();
Record([signal_semaphore, wait_semaphore, signal_value, this]
(vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
MICROPROFILE_SCOPE(Vulkan_Submit);

View File

@ -27,13 +27,15 @@ enum class StateFlags {
DECLARE_ENUM_FLAG_OPERATORS(StateFlags)
class Instance;
class RenderpassCache;
class RendererVulkan;
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class Scheduler {
public:
explicit Scheduler(const Instance& instance, RendererVulkan& renderer);
explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
RendererVulkan& renderer);
~Scheduler();
/// Sends the current execution context to the GPU.
@ -193,6 +195,7 @@ private:
private:
const Instance& instance;
RenderpassCache& renderpass_cache;
RendererVulkan& renderer;
MasterSemaphore master_semaphore;
CommandPool command_pool;

View File

@ -1667,7 +1667,7 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
prefix = "u";
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]);
LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
UNREACHABLE();
}
@ -1675,12 +1675,42 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix);
}
}
// Some 3-component attributes might be emulated by breaking them to vec2 + scalar.
// Define them here and combine them below
for (std::size_t i = 0; i < used_regs.size(); ++i) {
if (const u32 location = config.state.emulated_attrib_locations[i]; location != 0 && used_regs[i]) {
std::string_view type;
switch (config.state.attrib_types[i]) {
case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
type = "float";
break;
case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
type = "int";
break;
case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
type = "uint";
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
UNREACHABLE();
}
out += fmt::format("layout(location = {}) in {} vs_in_typed_reg{}_part2;\n", location, type, i);
}
}
out += '\n';
// cast input registers to float to avoid computational errors
for (std::size_t i = 0; i < used_regs.size(); ++i) {
if (used_regs[i]) {
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
if (config.state.emulated_attrib_locations[i] != 0) {
out += fmt::format("vec4 vs_in_reg{0} = vec4(vec2(vs_in_typed_reg{0}), float(vs_in_typed_reg{0}_part2), 0.f);\n", i);
} else {
out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
}
}
}
out += '\n';

View File

@ -157,6 +157,7 @@ struct PicaShaderConfigCommon {
u32 main_offset;
bool sanitize_mul;
std::array<Pica::PipelineRegs::VertexAttributeFormat, 16> attrib_types;
std::array<u8, 16> emulated_attrib_locations;
u32 num_outputs;

View File

@ -5,6 +5,7 @@
#include <algorithm>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"

View File

@ -114,7 +114,7 @@ void Swapchain::Present() {
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &index};
MICROPROFILE_SCOPE(Vulkan_Present);
vk::Queue present_queue = instance.GetPresentQueue();
try {
[[maybe_unused]] vk::Result result = present_queue.presentKHR(present_info);

View File

@ -60,7 +60,7 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
return depth_offset;
}
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
constexpr u32 UPLOAD_BUFFER_SIZE = 64 * 1024 * 1024;
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
TextureRuntime::TextureRuntime(const Instance& instance, Scheduler& scheduler,
@ -124,7 +124,6 @@ void TextureRuntime::FlushBuffers() {
MICROPROFILE_DEFINE(Vulkan_Finish, "Vulkan", "Scheduler Finish", MP_RGB(52, 192, 235));
void TextureRuntime::Finish() {
MICROPROFILE_SCOPE(Vulkan_Finish);
renderpass_cache.ExitRenderpass();
scheduler.Finish();
download_buffer.Invalidate();
}