renderer_vulkan: Complete hardware shader support
* With these changes all commercial games I tested work fine and get a massive performance boost
This commit is contained in:
@ -252,6 +252,7 @@ bool Instance::CreateDevice() {
|
||||
|
||||
AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
|
||||
AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
|
||||
timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
|
||||
extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
|
||||
push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
|
||||
@ -322,6 +323,7 @@ bool Instance::CreateDevice() {
|
||||
.shaderStorageImageMultisample = available.shaderStorageImageMultisample,
|
||||
.shaderClipDistance = available.shaderClipDistance}},
|
||||
vk::PhysicalDeviceDepthClipControlFeaturesEXT{.depthClipControl = true},
|
||||
vk::PhysicalDeviceIndexTypeUint8FeaturesEXT{.indexTypeUint8 = true},
|
||||
feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
|
||||
feature_chain.get<vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR>()};
|
||||
|
||||
|
@ -78,33 +78,20 @@ u32 AttribBytes(VertexAttribute attrib) {
|
||||
}
|
||||
|
||||
vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
|
||||
switch (attrib.type) {
|
||||
case AttribType::Float:
|
||||
switch (attrib.size) {
|
||||
case 1:
|
||||
return vk::Format::eR32Sfloat;
|
||||
case 2:
|
||||
return vk::Format::eR32G32Sfloat;
|
||||
case 3:
|
||||
return vk::Format::eR32G32B32Sfloat;
|
||||
case 4:
|
||||
return vk::Format::eR32G32B32A32Sfloat;
|
||||
}
|
||||
case AttribType::Ubyte:
|
||||
switch (attrib.size) {
|
||||
case 4:
|
||||
return vk::Format::eR8G8B8A8Uint;
|
||||
default:
|
||||
fmt::print("{}\n", attrib.size.Value());
|
||||
UNREACHABLE();
|
||||
}
|
||||
constexpr std::array attribute_formats = {
|
||||
std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
|
||||
vk::Format::eR32G32B32A32Sfloat},
|
||||
std::array{vk::Format::eR32Sint, vk::Format::eR32G32Sint, vk::Format::eR32G32B32Sint,
|
||||
vk::Format::eR32G32B32A32Sint},
|
||||
std::array{vk::Format::eR16Sint, vk::Format::eR16G16Sint, vk::Format::eR16G16B16Sint,
|
||||
vk::Format::eR16G16B16A16Sint},
|
||||
std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint,
|
||||
vk::Format::eR8G8B8A8Sint},
|
||||
std::array{vk::Format::eR8Uint, vk::Format::eR8G8Uint, vk::Format::eR8G8B8Uint,
|
||||
vk::Format::eR8G8B8A8Uint}};
|
||||
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute type {}", attrib.type.Value());
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return vk::Format::eR32Sfloat;
|
||||
ASSERT(attrib.size <= 4);
|
||||
return attribute_formats[static_cast<u32>(attrib.type.Value())][attrib.size.Value() - 1];
|
||||
}
|
||||
|
||||
vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
|
||||
@ -197,8 +184,14 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
}
|
||||
|
||||
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
|
||||
Pica::Shader::ShaderSetup& setup) {
|
||||
const PicaVSConfig config{regs.vs, setup};
|
||||
Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout) {
|
||||
PicaVSConfig config{regs.vs, setup};
|
||||
for (u32 i = 0; i < layout.attribute_count; i++) {
|
||||
const auto& attrib = layout.attributes[i];
|
||||
config.state.attrib_types[attrib.location.Value()] = attrib.type.Value();
|
||||
}
|
||||
|
||||
auto [handle, result] =
|
||||
programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
|
@ -22,8 +22,6 @@ constexpr u32 MAX_VERTEX_BINDINGS = 16;
|
||||
constexpr u32 MAX_DESCRIPTORS = 8;
|
||||
constexpr u32 MAX_DESCRIPTOR_SETS = 6;
|
||||
|
||||
enum class AttribType : u32 { Float = 0, Int = 1, Short = 2, Byte = 3, Ubyte = 4 };
|
||||
|
||||
/**
|
||||
* The pipeline state is tightly packed with bitfields to reduce
|
||||
* the overhead of hashing as much as possible
|
||||
@ -154,7 +152,8 @@ public:
|
||||
void BindPipeline(const PipelineInfo& info);
|
||||
|
||||
/// Binds a PICA decompiled vertex shader
|
||||
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
bool UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout);
|
||||
|
||||
/// Binds a passthrough vertex shader
|
||||
void UseTrivialVertexShader();
|
||||
|
@ -328,7 +328,7 @@ RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_i
|
||||
|
||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||
u32 vs_input_index_max) {
|
||||
auto [array_ptr, array_offset, _] = vertex_buffer.Map(vs_input_size, 4);
|
||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
||||
|
||||
// The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
|
||||
// how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
|
||||
@ -340,9 +340,8 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE
|
||||
|
||||
VertexLayout layout{};
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
std::array<u64, 16> binding_offsets{};
|
||||
VertexLayout layout{};
|
||||
|
||||
u32 buffer_offset = array_offset;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
@ -387,26 +386,32 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
const PAddr data_addr =
|
||||
base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
|
||||
const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
|
||||
const u32 data_size = loader.byte_count * vertex_num;
|
||||
u32 data_size = loader.byte_count * vertex_num;
|
||||
|
||||
res_cache.FlushRegion(data_addr, data_size, nullptr);
|
||||
std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
|
||||
|
||||
// Create the binding associated with this loader
|
||||
VertexBinding& binding = layout.bindings.at(layout.binding_count);
|
||||
VertexBinding& binding = layout.bindings[layout.binding_count];
|
||||
binding.binding.Assign(layout.binding_count);
|
||||
binding.fixed.Assign(0);
|
||||
binding.stride.Assign(loader.byte_count);
|
||||
|
||||
// Keep track of the binding offsets so we can bind the vertex buffer later
|
||||
binding_offsets[layout.binding_count++] = buffer_offset;
|
||||
data_size = Common::AlignUp(data_size, 16);
|
||||
array_ptr += data_size;
|
||||
buffer_offset += data_size;
|
||||
}
|
||||
|
||||
// Reserve the last binding for fixed attributes
|
||||
u32 offset = 0;
|
||||
bool has_fixed_binding = false;
|
||||
// Reserve the last binding for fixed and default attributes
|
||||
// Place the default attrib at offset zero for easy access
|
||||
constexpr Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f);
|
||||
u32 offset = sizeof(Common::Vec4f);
|
||||
std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
|
||||
array_ptr += sizeof(Common::Vec4f);
|
||||
|
||||
// Find all fixed attributes and assign them to the last binding
|
||||
for (std::size_t i = 0; i < 16; i++) {
|
||||
if (vertex_attributes.IsDefaultAttribute(i)) {
|
||||
const u32 reg = regs.vs.GetRegisterForAttribute(i);
|
||||
@ -415,11 +420,10 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
const std::array data = {attr.x.ToFloat32(), attr.y.ToFloat32(), attr.z.ToFloat32(),
|
||||
attr.w.ToFloat32()};
|
||||
|
||||
// Copy the data to the end of the buffer
|
||||
const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
|
||||
std::memcpy(array_ptr, data.data(), data_size);
|
||||
|
||||
VertexAttribute& attribute = layout.attributes.at(layout.attribute_count++);
|
||||
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
|
||||
attribute.binding.Assign(layout.binding_count);
|
||||
attribute.location.Assign(reg);
|
||||
attribute.offset.Assign(offset);
|
||||
@ -428,21 +432,36 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
|
||||
offset += data_size;
|
||||
array_ptr += data_size;
|
||||
has_fixed_binding = true;
|
||||
enable_attributes[reg] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (has_fixed_binding) {
|
||||
VertexBinding& binding = layout.bindings.at(layout.binding_count);
|
||||
binding.binding.Assign(layout.binding_count);
|
||||
binding.fixed.Assign(1);
|
||||
binding.stride.Assign(offset);
|
||||
|
||||
binding_offsets[layout.binding_count++] = buffer_offset;
|
||||
buffer_offset += offset;
|
||||
// Loop one more time to find unused attributes and assign them to the default one
|
||||
// This needs to happen because i = 2 might be assigned to location = 3 so the loop
|
||||
// above would skip setting it
|
||||
for (std::size_t i = 0; i < 16; i++) {
|
||||
// If the attribute is just disabled, shove the default attribute to avoid
|
||||
// errors if the shader ever decides to use it. The pipeline cache can discard
|
||||
// this if needed since it has access to the usage mask from the code generator
|
||||
if (!enable_attributes[i]) {
|
||||
VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
|
||||
attribute.binding.Assign(layout.binding_count);
|
||||
attribute.location.Assign(i);
|
||||
attribute.offset.Assign(0);
|
||||
attribute.type.Assign(AttribType::Float);
|
||||
attribute.size.Assign(4);
|
||||
}
|
||||
}
|
||||
|
||||
// Define the fixed+default binding
|
||||
VertexBinding& binding = layout.bindings[layout.binding_count];
|
||||
binding.binding.Assign(layout.binding_count);
|
||||
binding.fixed.Assign(1);
|
||||
binding.stride.Assign(offset);
|
||||
binding_offsets[layout.binding_count++] = buffer_offset;
|
||||
buffer_offset += offset;
|
||||
|
||||
pipeline_info.vertex_layout = layout;
|
||||
vertex_buffer.Commit(buffer_offset - array_offset);
|
||||
|
||||
@ -457,7 +476,8 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
|
||||
|
||||
bool RasterizerVulkan::SetupVertexShader() {
|
||||
MICROPROFILE_SCOPE(OpenGL_VS);
|
||||
return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs);
|
||||
return pipeline_cache.UseProgrammableVertexShader(Pica::g_state.regs, Pica::g_state.vs,
|
||||
pipeline_info.vertex_layout);
|
||||
}
|
||||
|
||||
bool RasterizerVulkan::SetupGeometryShader() {
|
||||
@ -484,14 +504,6 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!SetupVertexShader()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!SetupGeometryShader()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return Draw(true, is_indexed);
|
||||
}
|
||||
|
||||
@ -506,6 +518,15 @@ bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
|
||||
}
|
||||
|
||||
SetupVertexArray(vs_input_size, vs_input_index_min, vs_input_index_max);
|
||||
|
||||
if (!SetupVertexShader()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!SetupGeometryShader()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
pipeline_info.rasterization.topology.Assign(regs.pipeline.triangle_topology);
|
||||
pipeline_cache.BindPipeline(pipeline_info);
|
||||
|
||||
@ -848,6 +869,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
succeeded = AccelerateDrawBatchInternal(is_indexed);
|
||||
} else {
|
||||
pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
|
||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
||||
pipeline_cache.UseTrivialVertexShader();
|
||||
pipeline_cache.UseTrivialGeometryShader();
|
||||
pipeline_cache.BindPipeline(pipeline_info);
|
||||
|
@ -273,6 +273,7 @@ private:
|
||||
};
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
std::array<u64, 16> binding_offsets{};
|
||||
ImageAlloc default_texture;
|
||||
vk::Sampler default_sampler;
|
||||
|
||||
@ -289,8 +290,6 @@ private:
|
||||
bool dirty = true;
|
||||
} uniform_block_data = {};
|
||||
|
||||
std::array<bool, 16> hw_enabled_attributes{};
|
||||
|
||||
std::array<SamplerInfo, 3> texture_samplers;
|
||||
SamplerInfo texture_cube_sampler;
|
||||
std::unordered_map<SamplerInfo, vk::Sampler> samplers;
|
||||
|
@ -1625,7 +1625,24 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
// input attributes declaration
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (used_regs[i]) {
|
||||
out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_reg{0};\n", i, i == 3 ? "" : "");
|
||||
std::string_view prefix;
|
||||
switch (config.state.attrib_types[i]) {
|
||||
case AttribType::Float:
|
||||
prefix = "";
|
||||
break;
|
||||
case AttribType::Byte:
|
||||
case AttribType::Short:
|
||||
prefix = "i";
|
||||
break;
|
||||
case AttribType::Ubyte:
|
||||
prefix = "u";
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
out += fmt::format("layout(location = {0}) in {1}vec4 vs_in_reg{0};\n", i, prefix);
|
||||
}
|
||||
}
|
||||
out += '\n';
|
||||
|
@ -12,6 +12,8 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
enum class AttribType : u32 { Float = 0, Int = 1, Short = 2, Byte = 3, Ubyte = 4 };
|
||||
|
||||
enum Attributes {
|
||||
ATTRIBUTE_POSITION,
|
||||
ATTRIBUTE_COLOR,
|
||||
@ -147,6 +149,7 @@ struct PicaShaderConfigCommon {
|
||||
u64 swizzle_hash;
|
||||
u32 main_offset;
|
||||
bool sanitize_mul;
|
||||
std::array<AttribType, 16> attrib_types;
|
||||
|
||||
u32 num_outputs;
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
@ -115,6 +116,11 @@ std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
|
||||
|
||||
const u32 current_bucket = scheduler.GetCurrentSlotIndex();
|
||||
auto& bucket = buckets[current_bucket];
|
||||
|
||||
if (alignment > 0) {
|
||||
bucket.offset = Common::AlignUp(bucket.offset, alignment);
|
||||
}
|
||||
|
||||
if (bucket.offset + size > bucket_size) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
Reference in New Issue
Block a user