video_core: Move HardwareVertex to RasterizerAccelerated
This commit is contained in:
@ -20,10 +20,73 @@ static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
|
||||
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
|
||||
}
|
||||
|
||||
RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
|
||||
bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
position[3] = v.pos.w.ToFloat32();
|
||||
color[0] = v.color.x.ToFloat32();
|
||||
color[1] = v.color.y.ToFloat32();
|
||||
color[2] = v.color.z.ToFloat32();
|
||||
color[3] = v.color.w.ToFloat32();
|
||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
normquat = -normquat;
|
||||
}
|
||||
}
|
||||
|
||||
RasterizerAccelerated::RasterizerAccelerated() {
|
||||
uniform_block_data.lighting_lut_dirty.fill(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||
* for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(Q1, -Q2) positive.
|
||||
*
|
||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||
* between each step would work for those cases at the cost of being more complex to implement.
|
||||
*
|
||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||
|
||||
return (Common::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
|
||||
const u32 page_start = addr >> Memory::CITRA_PAGE_BITS;
|
||||
const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1;
|
||||
@ -116,6 +179,44 @@ void RasterizerAccelerated::ClearAll(bool flush) {
|
||||
cached_pages = {};
|
||||
}
|
||||
|
||||
RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
|
||||
u32 vertex_min;
|
||||
u32 vertex_max;
|
||||
if (is_indexed) {
|
||||
const auto& index_info = regs.pipeline.index_array;
|
||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||
const bool index_u16 = index_info.format != 0;
|
||||
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
FlushRegion(address, size);
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
vertex_max = std::max(vertex_max, vertex);
|
||||
}
|
||||
} else {
|
||||
vertex_min = regs.pipeline.vertex_offset;
|
||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||
}
|
||||
|
||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
}
|
||||
}
|
||||
|
||||
return {vertex_min, vertex_max, vs_input_size};
|
||||
}
|
||||
|
||||
void RasterizerAccelerated::SyncDepthScale() {
|
||||
float depth_scale =
|
||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||
|
@ -16,8 +16,11 @@ public:
|
||||
RasterizerAccelerated();
|
||||
virtual ~RasterizerAccelerated() = default;
|
||||
|
||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
|
||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
|
||||
void ClearAll(bool flush) override;
|
||||
|
||||
protected:
|
||||
@ -79,7 +82,8 @@ protected:
|
||||
/// Syncs the shadow texture bias to match the PICA register
|
||||
void SyncShadowTextureBias();
|
||||
|
||||
private:
|
||||
protected:
|
||||
/// Structure that keeps tracks of the uniform state
|
||||
struct UniformBlockData {
|
||||
Pica::Shader::UniformData data{};
|
||||
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
|
||||
@ -93,8 +97,34 @@ private:
|
||||
bool dirty = true;
|
||||
};
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
protected:
|
||||
std::array<u16, 0x30000> cached_pages{};
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
bool shader_dirty = true;
|
||||
|
||||
UniformBlockData uniform_block_data{};
|
||||
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
||||
|
@ -202,39 +202,6 @@ void RasterizerOpenGL::SyncEntireState() {
|
||||
SyncShadowTextureBias();
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||
* for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(Q1, -Q2) positive.
|
||||
*
|
||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||
* between each step would work for those cases at the cost of being more complex to implement.
|
||||
*
|
||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||
|
||||
return (Common::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
static constexpr std::array<GLenum, 4> vs_attrib_types{
|
||||
GL_BYTE, // VertexAttributeFormat::BYTE
|
||||
GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
|
||||
@ -242,50 +209,6 @@ static constexpr std::array<GLenum, 4> vs_attrib_types{
|
||||
GL_FLOAT // VertexAttributeFormat::FLOAT
|
||||
};
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
|
||||
u32 vertex_min;
|
||||
u32 vertex_max;
|
||||
if (is_indexed) {
|
||||
const auto& index_info = regs.pipeline.index_array;
|
||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||
const bool index_u16 = index_info.format != 0;
|
||||
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
res_cache.FlushRegion(address, size, nullptr);
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
vertex_max = std::max(vertex_max, vertex);
|
||||
}
|
||||
} else {
|
||||
vertex_min = regs.pipeline.vertex_offset;
|
||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||
}
|
||||
|
||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
}
|
||||
}
|
||||
|
||||
return {vertex_min, vertex_max, vs_input_size};
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||
GLuint vs_input_index_min, GLuint vs_input_index_max) {
|
||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||
|
@ -5,13 +5,11 @@
|
||||
#pragma once
|
||||
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/pica_types.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_runtime.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
@ -32,8 +30,6 @@ public:
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
void DrawTriangles() override;
|
||||
void NotifyPicaRegisterChanged(u32 id) override;
|
||||
void FlushAll() override;
|
||||
@ -77,48 +73,6 @@ private:
|
||||
bool supress_mipmap_for_cube = false;
|
||||
};
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
position[3] = v.pos.w.ToFloat32();
|
||||
color[0] = v.color.x.ToFloat32();
|
||||
color[1] = v.color.y.ToFloat32();
|
||||
color[2] = v.color.z.ToFloat32();
|
||||
color[3] = v.color.w.ToFloat32();
|
||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
normquat = -normquat;
|
||||
}
|
||||
}
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
/// Syncs the clip enabled status to match the PICA register
|
||||
void SyncClipEnabled();
|
||||
|
||||
@ -171,15 +125,6 @@ private:
|
||||
/// Internal implementation for AccelerateDrawBatch
|
||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
|
||||
GLuint vs_input_index_max);
|
||||
|
@ -20,74 +20,6 @@
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
|
||||
bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
position[3] = v.pos.w.ToFloat32();
|
||||
color[0] = v.color.x.ToFloat32();
|
||||
color[1] = v.color.y.ToFloat32();
|
||||
color[2] = v.color.z.ToFloat32();
|
||||
color[3] = v.color.w.ToFloat32();
|
||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
normquat = -normquat;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This maps to the following layout in GLSL code:
|
||||
* layout(location = 0) in vec4 vert_position;
|
||||
* layout(location = 1) in vec4 vert_color;
|
||||
* layout(location = 2) in vec2 vert_texcoord0;
|
||||
* layout(location = 3) in vec2 vert_texcoord1;
|
||||
* layout(location = 4) in vec2 vert_texcoord2;
|
||||
* layout(location = 5) in float vert_texcoord0_w;
|
||||
* layout(location = 6) in vec4 vert_normquat;
|
||||
* layout(location = 7) in vec3 vert_view;
|
||||
*/
|
||||
constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
|
||||
VertexLayout layout{};
|
||||
layout.attribute_count = 8;
|
||||
layout.binding_count = 1;
|
||||
|
||||
// Define binding
|
||||
layout.bindings[0].binding.Assign(0);
|
||||
layout.bindings[0].fixed.Assign(0);
|
||||
layout.bindings[0].stride.Assign(sizeof(HardwareVertex));
|
||||
|
||||
// Define attributes
|
||||
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
|
||||
u32 offset = 0;
|
||||
|
||||
for (u32 loc = 0; loc < 8; loc++) {
|
||||
VertexAttribute& attribute = layout.attributes[loc];
|
||||
attribute.binding.Assign(0);
|
||||
attribute.location.Assign(loc);
|
||||
attribute.offset.Assign(offset);
|
||||
attribute.type.Assign(AttribType::Float);
|
||||
attribute.size.Assign(sizes[loc]);
|
||||
offset += sizes[loc] * sizeof(float);
|
||||
}
|
||||
|
||||
return layout;
|
||||
}
|
||||
|
||||
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||
@ -139,7 +71,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||
|
||||
// Define vertex layout for software shaders
|
||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
||||
MakeSoftwareVertexLayout();
|
||||
pipeline_info.vertex_layout = software_layout;
|
||||
|
||||
const SamplerInfo default_sampler_info = {
|
||||
.mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear,
|
||||
@ -242,39 +175,6 @@ void RasterizerVulkan::SyncFixedState() {
|
||||
SyncDepthWriteMask();
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||
* for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(Q1, -Q2) positive.
|
||||
*
|
||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||
* between each step would work for those cases at the cost of being more complex to implement.
|
||||
*
|
||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||
|
||||
return (Common::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
static constexpr std::array vs_attrib_types = {
|
||||
AttribType::Byte, // VertexAttributeFormat::BYTE
|
||||
AttribType::Ubyte, // VertexAttributeFormat::UBYTE
|
||||
@ -282,50 +182,6 @@ static constexpr std::array vs_attrib_types = {
|
||||
AttribType::Float // VertexAttributeFormat::FLOAT
|
||||
};
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||
|
||||
u32 vertex_min;
|
||||
u32 vertex_max;
|
||||
if (is_indexed) {
|
||||
const auto& index_info = regs.pipeline.index_array;
|
||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||
const bool index_u16 = index_info.format != 0;
|
||||
|
||||
vertex_min = 0xFFFF;
|
||||
vertex_max = 0;
|
||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||
res_cache.FlushRegion(address, size, nullptr);
|
||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||
vertex_min = std::min(vertex_min, vertex);
|
||||
vertex_max = std::max(vertex_max, vertex);
|
||||
}
|
||||
} else {
|
||||
vertex_min = regs.pipeline.vertex_offset;
|
||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||
}
|
||||
|
||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||
u32 vs_input_size = 0;
|
||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||
if (loader.component_count != 0) {
|
||||
vs_input_size += loader.byte_count * vertex_num;
|
||||
}
|
||||
}
|
||||
|
||||
return {vertex_min, vertex_max, vs_input_size};
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||
u32 vs_input_index_max) {
|
||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
||||
@ -877,7 +733,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
succeeded = AccelerateDrawBatchInternal(is_indexed);
|
||||
} else {
|
||||
pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
|
||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
||||
pipeline_info.vertex_layout = software_layout;
|
||||
pipeline_cache.UseTrivialVertexShader();
|
||||
pipeline_cache.UseTrivialGeometryShader();
|
||||
pipeline_cache.BindPipeline(pipeline_info);
|
||||
@ -1604,6 +1460,33 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerVulkan::MakeSoftwareVertexLayout() {
|
||||
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
|
||||
|
||||
software_layout = VertexLayout{
|
||||
.binding_count = 1,
|
||||
.attribute_count = 8
|
||||
};
|
||||
|
||||
for (u32 i = 0; i < software_layout.binding_count; i++) {
|
||||
VertexBinding& binding = software_layout.bindings[i];
|
||||
binding.binding.Assign(i);
|
||||
binding.fixed.Assign(0);
|
||||
binding.stride.Assign(sizeof(HardwareVertex));
|
||||
}
|
||||
|
||||
u32 offset = 0;
|
||||
for (u32 i = 0; i < 8; i++) {
|
||||
VertexAttribute& attribute = software_layout.attributes[i];
|
||||
attribute.binding.Assign(0);
|
||||
attribute.location.Assign(i);
|
||||
attribute.offset.Assign(offset);
|
||||
attribute.type.Assign(AttribType::Float);
|
||||
attribute.size.Assign(sizes[i]);
|
||||
offset += sizes[i] * sizeof(float);
|
||||
}
|
||||
}
|
||||
|
||||
vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) {
|
||||
const bool use_border_color = instance.IsCustomBorderColorSupported() &&
|
||||
(info.wrap_s == SamplerInfo::TextureConfig::ClampToBorder ||
|
||||
|
@ -9,7 +9,6 @@
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
#include "video_core/shader/shader.h"
|
||||
|
||||
namespace Frontend {
|
||||
class EmuWindow;
|
||||
@ -84,8 +83,6 @@ public:
|
||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) override;
|
||||
void DrawTriangles() override;
|
||||
void NotifyPicaRegisterChanged(u32 id) override;
|
||||
void FlushAll() override;
|
||||
@ -164,15 +161,6 @@ private:
|
||||
/// Copies vertex data performing needed convertions and casts
|
||||
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
|
||||
|
||||
struct VertexArrayInfo {
|
||||
u32 vs_input_index_min;
|
||||
u32 vs_input_index_max;
|
||||
u32 vs_input_size;
|
||||
};
|
||||
|
||||
/// Retrieve the range and the size of the input vertex
|
||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||
|
||||
/// Setup vertex array for AccelerateDrawBatch
|
||||
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
|
||||
|
||||
@ -182,6 +170,9 @@ private:
|
||||
/// Setup geometry shader for AccelerateDrawBatch
|
||||
bool SetupGeometryShader();
|
||||
|
||||
/// Creates the vertex layout struct used for software shader pipelines
|
||||
void MakeSoftwareVertexLayout();
|
||||
|
||||
/// Creates a new sampler object
|
||||
vk::Sampler CreateSampler(const SamplerInfo& info);
|
||||
|
||||
@ -196,26 +187,8 @@ private:
|
||||
DescriptorManager& desc_manager;
|
||||
RasterizerCache res_cache;
|
||||
PipelineCache pipeline_cache;
|
||||
bool shader_dirty = true;
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex() = default;
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
||||
|
||||
constexpr static VertexLayout GetVertexLayout();
|
||||
|
||||
Common::Vec4f position;
|
||||
Common::Vec4f color;
|
||||
Common::Vec2f tex_coord0;
|
||||
Common::Vec2f tex_coord1;
|
||||
Common::Vec2f tex_coord2;
|
||||
float tex_coord0_w;
|
||||
Common::Vec4f normquat;
|
||||
Common::Vec3f view;
|
||||
};
|
||||
|
||||
std::vector<HardwareVertex> vertex_batch;
|
||||
VertexLayout software_layout;
|
||||
std::array<u64, 16> binding_offsets{};
|
||||
vk::Sampler default_sampler;
|
||||
Surface null_surface;
|
||||
|
Reference in New Issue
Block a user