video_core: Move HardwareVertex to RasterizerAccelerated
This commit is contained in:
@ -20,10 +20,73 @@ static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
|
|||||||
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
|
return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
|
||||||
|
bool flip_quaternion) {
|
||||||
|
position[0] = v.pos.x.ToFloat32();
|
||||||
|
position[1] = v.pos.y.ToFloat32();
|
||||||
|
position[2] = v.pos.z.ToFloat32();
|
||||||
|
position[3] = v.pos.w.ToFloat32();
|
||||||
|
color[0] = v.color.x.ToFloat32();
|
||||||
|
color[1] = v.color.y.ToFloat32();
|
||||||
|
color[2] = v.color.z.ToFloat32();
|
||||||
|
color[3] = v.color.w.ToFloat32();
|
||||||
|
tex_coord0[0] = v.tc0.x.ToFloat32();
|
||||||
|
tex_coord0[1] = v.tc0.y.ToFloat32();
|
||||||
|
tex_coord1[0] = v.tc1.x.ToFloat32();
|
||||||
|
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||||
|
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||||
|
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||||
|
tex_coord0_w = v.tc0_w.ToFloat32();
|
||||||
|
normquat[0] = v.quat.x.ToFloat32();
|
||||||
|
normquat[1] = v.quat.y.ToFloat32();
|
||||||
|
normquat[2] = v.quat.z.ToFloat32();
|
||||||
|
normquat[3] = v.quat.w.ToFloat32();
|
||||||
|
view[0] = v.view.x.ToFloat32();
|
||||||
|
view[1] = v.view.y.ToFloat32();
|
||||||
|
view[2] = v.view.z.ToFloat32();
|
||||||
|
|
||||||
|
if (flip_quaternion) {
|
||||||
|
normquat = -normquat;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RasterizerAccelerated::RasterizerAccelerated() {
|
RasterizerAccelerated::RasterizerAccelerated() {
|
||||||
uniform_block_data.lighting_lut_dirty.fill(true);
|
uniform_block_data.lighting_lut_dirty.fill(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
||||||
|
* for a detailed description of this issue (yuriks):
|
||||||
|
*
|
||||||
|
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||||
|
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||||
|
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||||
|
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
||||||
|
* making Dot(Q1, -Q2) positive.
|
||||||
|
*
|
||||||
|
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
||||||
|
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
||||||
|
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
||||||
|
* between each step would work for those cases at the cost of being more complex to implement.
|
||||||
|
*
|
||||||
|
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
||||||
|
* these issues, making this basic implementation actually more accurate to the hardware.
|
||||||
|
*/
|
||||||
|
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
||||||
|
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
||||||
|
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
||||||
|
|
||||||
|
return (Common::Dot(a, b) < 0.f);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||||
|
const Pica::Shader::OutputVertex& v1,
|
||||||
|
const Pica::Shader::OutputVertex& v2) {
|
||||||
|
vertex_batch.emplace_back(v0, false);
|
||||||
|
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||||
|
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
|
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
|
||||||
const u32 page_start = addr >> Memory::CITRA_PAGE_BITS;
|
const u32 page_start = addr >> Memory::CITRA_PAGE_BITS;
|
||||||
const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1;
|
const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1;
|
||||||
@ -116,6 +179,44 @@ void RasterizerAccelerated::ClearAll(bool flush) {
|
|||||||
cached_pages = {};
|
cached_pages = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(bool is_indexed) {
|
||||||
|
const auto& regs = Pica::g_state.regs;
|
||||||
|
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
||||||
|
|
||||||
|
u32 vertex_min;
|
||||||
|
u32 vertex_max;
|
||||||
|
if (is_indexed) {
|
||||||
|
const auto& index_info = regs.pipeline.index_array;
|
||||||
|
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
||||||
|
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
||||||
|
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
||||||
|
const bool index_u16 = index_info.format != 0;
|
||||||
|
|
||||||
|
vertex_min = 0xFFFF;
|
||||||
|
vertex_max = 0;
|
||||||
|
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
||||||
|
FlushRegion(address, size);
|
||||||
|
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
||||||
|
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
||||||
|
vertex_min = std::min(vertex_min, vertex);
|
||||||
|
vertex_max = std::max(vertex_max, vertex);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
vertex_min = regs.pipeline.vertex_offset;
|
||||||
|
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const u32 vertex_num = vertex_max - vertex_min + 1;
|
||||||
|
u32 vs_input_size = 0;
|
||||||
|
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
||||||
|
if (loader.component_count != 0) {
|
||||||
|
vs_input_size += loader.byte_count * vertex_num;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {vertex_min, vertex_max, vs_input_size};
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerAccelerated::SyncDepthScale() {
|
void RasterizerAccelerated::SyncDepthScale() {
|
||||||
float depth_scale =
|
float depth_scale =
|
||||||
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
|
Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
|
||||||
|
@ -16,8 +16,11 @@ public:
|
|||||||
RasterizerAccelerated();
|
RasterizerAccelerated();
|
||||||
virtual ~RasterizerAccelerated() = default;
|
virtual ~RasterizerAccelerated() = default;
|
||||||
|
|
||||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
|
void AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||||
|
const Pica::Shader::OutputVertex& v1,
|
||||||
|
const Pica::Shader::OutputVertex& v2) override;
|
||||||
|
|
||||||
|
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
|
||||||
void ClearAll(bool flush) override;
|
void ClearAll(bool flush) override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
@ -79,7 +82,8 @@ protected:
|
|||||||
/// Syncs the shadow texture bias to match the PICA register
|
/// Syncs the shadow texture bias to match the PICA register
|
||||||
void SyncShadowTextureBias();
|
void SyncShadowTextureBias();
|
||||||
|
|
||||||
private:
|
protected:
|
||||||
|
/// Structure that keeps tracks of the uniform state
|
||||||
struct UniformBlockData {
|
struct UniformBlockData {
|
||||||
Pica::Shader::UniformData data{};
|
Pica::Shader::UniformData data{};
|
||||||
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
|
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
|
||||||
@ -93,8 +97,34 @@ private:
|
|||||||
bool dirty = true;
|
bool dirty = true;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Structure that the hardware rendered vertices are composed of
|
||||||
|
struct HardwareVertex {
|
||||||
|
HardwareVertex() = default;
|
||||||
|
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
||||||
|
|
||||||
|
Common::Vec4f position;
|
||||||
|
Common::Vec4f color;
|
||||||
|
Common::Vec2f tex_coord0;
|
||||||
|
Common::Vec2f tex_coord1;
|
||||||
|
Common::Vec2f tex_coord2;
|
||||||
|
float tex_coord0_w;
|
||||||
|
Common::Vec4f normquat;
|
||||||
|
Common::Vec3f view;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct VertexArrayInfo {
|
||||||
|
u32 vs_input_index_min;
|
||||||
|
u32 vs_input_index_max;
|
||||||
|
u32 vs_input_size;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Retrieve the range and the size of the input vertex
|
||||||
|
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::array<u16, 0x30000> cached_pages{};
|
std::array<u16, 0x30000> cached_pages{};
|
||||||
|
std::vector<HardwareVertex> vertex_batch;
|
||||||
|
bool shader_dirty = true;
|
||||||
|
|
||||||
UniformBlockData uniform_block_data{};
|
UniformBlockData uniform_block_data{};
|
||||||
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
|
||||||
|
@ -202,39 +202,6 @@ void RasterizerOpenGL::SyncEntireState() {
|
|||||||
SyncShadowTextureBias();
|
SyncShadowTextureBias();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
|
||||||
* for a detailed description of this issue (yuriks):
|
|
||||||
*
|
|
||||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
|
||||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
|
||||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
|
||||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
|
||||||
* making Dot(Q1, -Q2) positive.
|
|
||||||
*
|
|
||||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
|
||||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
|
||||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
|
||||||
* between each step would work for those cases at the cost of being more complex to implement.
|
|
||||||
*
|
|
||||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
|
||||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
|
||||||
*/
|
|
||||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
|
||||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
|
||||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
|
||||||
|
|
||||||
return (Common::Dot(a, b) < 0.f);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
|
||||||
const Pica::Shader::OutputVertex& v1,
|
|
||||||
const Pica::Shader::OutputVertex& v2) {
|
|
||||||
vertex_batch.emplace_back(v0, false);
|
|
||||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
|
||||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::array<GLenum, 4> vs_attrib_types{
|
static constexpr std::array<GLenum, 4> vs_attrib_types{
|
||||||
GL_BYTE, // VertexAttributeFormat::BYTE
|
GL_BYTE, // VertexAttributeFormat::BYTE
|
||||||
GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
|
GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
|
||||||
@ -242,50 +209,6 @@ static constexpr std::array<GLenum, 4> vs_attrib_types{
|
|||||||
GL_FLOAT // VertexAttributeFormat::FLOAT
|
GL_FLOAT // VertexAttributeFormat::FLOAT
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VertexArrayInfo {
|
|
||||||
u32 vs_input_index_min;
|
|
||||||
u32 vs_input_index_max;
|
|
||||||
u32 vs_input_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
|
|
||||||
const auto& regs = Pica::g_state.regs;
|
|
||||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
|
||||||
|
|
||||||
u32 vertex_min;
|
|
||||||
u32 vertex_max;
|
|
||||||
if (is_indexed) {
|
|
||||||
const auto& index_info = regs.pipeline.index_array;
|
|
||||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
|
||||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
|
||||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
|
||||||
const bool index_u16 = index_info.format != 0;
|
|
||||||
|
|
||||||
vertex_min = 0xFFFF;
|
|
||||||
vertex_max = 0;
|
|
||||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
|
||||||
res_cache.FlushRegion(address, size, nullptr);
|
|
||||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
|
||||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
|
||||||
vertex_min = std::min(vertex_min, vertex);
|
|
||||||
vertex_max = std::max(vertex_max, vertex);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
vertex_min = regs.pipeline.vertex_offset;
|
|
||||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
|
||||||
u32 vs_input_size = 0;
|
|
||||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
|
||||||
if (loader.component_count != 0) {
|
|
||||||
vs_input_size += loader.byte_count * vertex_num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {vertex_min, vertex_max, vs_input_size};
|
|
||||||
}
|
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||||
GLuint vs_input_index_min, GLuint vs_input_index_max) {
|
GLuint vs_input_index_min, GLuint vs_input_index_max) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_VAO);
|
MICROPROFILE_SCOPE(OpenGL_VAO);
|
||||||
|
@ -5,13 +5,11 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "core/hw/gpu.h"
|
#include "core/hw/gpu.h"
|
||||||
#include "video_core/pica_types.h"
|
|
||||||
#include "video_core/rasterizer_accelerated.h"
|
#include "video_core/rasterizer_accelerated.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_state.h"
|
#include "video_core/renderer_opengl/gl_state.h"
|
||||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||||
#include "video_core/renderer_opengl/gl_texture_runtime.h"
|
#include "video_core/renderer_opengl/gl_texture_runtime.h"
|
||||||
#include "video_core/shader/shader.h"
|
|
||||||
|
|
||||||
namespace Frontend {
|
namespace Frontend {
|
||||||
class EmuWindow;
|
class EmuWindow;
|
||||||
@ -32,8 +30,6 @@ public:
|
|||||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||||
|
|
||||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
|
||||||
const Pica::Shader::OutputVertex& v2) override;
|
|
||||||
void DrawTriangles() override;
|
void DrawTriangles() override;
|
||||||
void NotifyPicaRegisterChanged(u32 id) override;
|
void NotifyPicaRegisterChanged(u32 id) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
@ -77,48 +73,6 @@ private:
|
|||||||
bool supress_mipmap_for_cube = false;
|
bool supress_mipmap_for_cube = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Structure that the hardware rendered vertices are composed of
|
|
||||||
struct HardwareVertex {
|
|
||||||
HardwareVertex() = default;
|
|
||||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
|
||||||
position[0] = v.pos.x.ToFloat32();
|
|
||||||
position[1] = v.pos.y.ToFloat32();
|
|
||||||
position[2] = v.pos.z.ToFloat32();
|
|
||||||
position[3] = v.pos.w.ToFloat32();
|
|
||||||
color[0] = v.color.x.ToFloat32();
|
|
||||||
color[1] = v.color.y.ToFloat32();
|
|
||||||
color[2] = v.color.z.ToFloat32();
|
|
||||||
color[3] = v.color.w.ToFloat32();
|
|
||||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
|
||||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
|
||||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
|
||||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
|
||||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
|
||||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
|
||||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
|
||||||
normquat[0] = v.quat.x.ToFloat32();
|
|
||||||
normquat[1] = v.quat.y.ToFloat32();
|
|
||||||
normquat[2] = v.quat.z.ToFloat32();
|
|
||||||
normquat[3] = v.quat.w.ToFloat32();
|
|
||||||
view[0] = v.view.x.ToFloat32();
|
|
||||||
view[1] = v.view.y.ToFloat32();
|
|
||||||
view[2] = v.view.z.ToFloat32();
|
|
||||||
|
|
||||||
if (flip_quaternion) {
|
|
||||||
normquat = -normquat;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Common::Vec4f position;
|
|
||||||
Common::Vec4f color;
|
|
||||||
Common::Vec2f tex_coord0;
|
|
||||||
Common::Vec2f tex_coord1;
|
|
||||||
Common::Vec2f tex_coord2;
|
|
||||||
float tex_coord0_w;
|
|
||||||
Common::Vec4f normquat;
|
|
||||||
Common::Vec3f view;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Syncs the clip enabled status to match the PICA register
|
/// Syncs the clip enabled status to match the PICA register
|
||||||
void SyncClipEnabled();
|
void SyncClipEnabled();
|
||||||
|
|
||||||
@ -171,15 +125,6 @@ private:
|
|||||||
/// Internal implementation for AccelerateDrawBatch
|
/// Internal implementation for AccelerateDrawBatch
|
||||||
bool AccelerateDrawBatchInternal(bool is_indexed);
|
bool AccelerateDrawBatchInternal(bool is_indexed);
|
||||||
|
|
||||||
struct VertexArrayInfo {
|
|
||||||
u32 vs_input_index_min;
|
|
||||||
u32 vs_input_index_max;
|
|
||||||
u32 vs_input_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Retrieve the range and the size of the input vertex
|
|
||||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
|
||||||
|
|
||||||
/// Setup vertex array for AccelerateDrawBatch
|
/// Setup vertex array for AccelerateDrawBatch
|
||||||
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
|
void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
|
||||||
GLuint vs_input_index_max);
|
GLuint vs_input_index_max);
|
||||||
|
@ -20,74 +20,6 @@
|
|||||||
|
|
||||||
namespace Vulkan {
|
namespace Vulkan {
|
||||||
|
|
||||||
RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
|
|
||||||
bool flip_quaternion) {
|
|
||||||
position[0] = v.pos.x.ToFloat32();
|
|
||||||
position[1] = v.pos.y.ToFloat32();
|
|
||||||
position[2] = v.pos.z.ToFloat32();
|
|
||||||
position[3] = v.pos.w.ToFloat32();
|
|
||||||
color[0] = v.color.x.ToFloat32();
|
|
||||||
color[1] = v.color.y.ToFloat32();
|
|
||||||
color[2] = v.color.z.ToFloat32();
|
|
||||||
color[3] = v.color.w.ToFloat32();
|
|
||||||
tex_coord0[0] = v.tc0.x.ToFloat32();
|
|
||||||
tex_coord0[1] = v.tc0.y.ToFloat32();
|
|
||||||
tex_coord1[0] = v.tc1.x.ToFloat32();
|
|
||||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
|
||||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
|
||||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
|
||||||
tex_coord0_w = v.tc0_w.ToFloat32();
|
|
||||||
normquat[0] = v.quat.x.ToFloat32();
|
|
||||||
normquat[1] = v.quat.y.ToFloat32();
|
|
||||||
normquat[2] = v.quat.z.ToFloat32();
|
|
||||||
normquat[3] = v.quat.w.ToFloat32();
|
|
||||||
view[0] = v.view.x.ToFloat32();
|
|
||||||
view[1] = v.view.y.ToFloat32();
|
|
||||||
view[2] = v.view.z.ToFloat32();
|
|
||||||
|
|
||||||
if (flip_quaternion) {
|
|
||||||
normquat = -normquat;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This maps to the following layout in GLSL code:
|
|
||||||
* layout(location = 0) in vec4 vert_position;
|
|
||||||
* layout(location = 1) in vec4 vert_color;
|
|
||||||
* layout(location = 2) in vec2 vert_texcoord0;
|
|
||||||
* layout(location = 3) in vec2 vert_texcoord1;
|
|
||||||
* layout(location = 4) in vec2 vert_texcoord2;
|
|
||||||
* layout(location = 5) in float vert_texcoord0_w;
|
|
||||||
* layout(location = 6) in vec4 vert_normquat;
|
|
||||||
* layout(location = 7) in vec3 vert_view;
|
|
||||||
*/
|
|
||||||
constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
|
|
||||||
VertexLayout layout{};
|
|
||||||
layout.attribute_count = 8;
|
|
||||||
layout.binding_count = 1;
|
|
||||||
|
|
||||||
// Define binding
|
|
||||||
layout.bindings[0].binding.Assign(0);
|
|
||||||
layout.bindings[0].fixed.Assign(0);
|
|
||||||
layout.bindings[0].stride.Assign(sizeof(HardwareVertex));
|
|
||||||
|
|
||||||
// Define attributes
|
|
||||||
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
|
|
||||||
u32 offset = 0;
|
|
||||||
|
|
||||||
for (u32 loc = 0; loc < 8; loc++) {
|
|
||||||
VertexAttribute& attribute = layout.attributes[loc];
|
|
||||||
attribute.binding.Assign(0);
|
|
||||||
attribute.location.Assign(loc);
|
|
||||||
attribute.offset.Assign(offset);
|
|
||||||
attribute.type.Assign(AttribType::Float);
|
|
||||||
attribute.size.Assign(sizes[loc]);
|
|
||||||
offset += sizes[loc] * sizeof(float);
|
|
||||||
}
|
|
||||||
|
|
||||||
return layout;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
|
constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
|
||||||
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
|
||||||
@ -139,7 +71,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
|
|||||||
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);
|
||||||
|
|
||||||
// Define vertex layout for software shaders
|
// Define vertex layout for software shaders
|
||||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
MakeSoftwareVertexLayout();
|
||||||
|
pipeline_info.vertex_layout = software_layout;
|
||||||
|
|
||||||
const SamplerInfo default_sampler_info = {
|
const SamplerInfo default_sampler_info = {
|
||||||
.mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear,
|
.mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear,
|
||||||
@ -242,39 +175,6 @@ void RasterizerVulkan::SyncFixedState() {
|
|||||||
SyncDepthWriteMask();
|
SyncDepthWriteMask();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a helper function to resolve an issue when interpolating opposite quaternions. See below
|
|
||||||
* for a detailed description of this issue (yuriks):
|
|
||||||
*
|
|
||||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
|
||||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
|
||||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
|
||||||
* opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
|
|
||||||
* making Dot(Q1, -Q2) positive.
|
|
||||||
*
|
|
||||||
* This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
|
|
||||||
* correct for most cases but can still rotate around the long way sometimes. An implementation
|
|
||||||
* which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
|
|
||||||
* between each step would work for those cases at the cost of being more complex to implement.
|
|
||||||
*
|
|
||||||
* Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
|
|
||||||
* these issues, making this basic implementation actually more accurate to the hardware.
|
|
||||||
*/
|
|
||||||
static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
|
|
||||||
Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
|
|
||||||
Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
|
|
||||||
|
|
||||||
return (Common::Dot(a, b) < 0.f);
|
|
||||||
}
|
|
||||||
|
|
||||||
void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
|
||||||
const Pica::Shader::OutputVertex& v1,
|
|
||||||
const Pica::Shader::OutputVertex& v2) {
|
|
||||||
vertex_batch.emplace_back(v0, false);
|
|
||||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
|
||||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
|
||||||
}
|
|
||||||
|
|
||||||
static constexpr std::array vs_attrib_types = {
|
static constexpr std::array vs_attrib_types = {
|
||||||
AttribType::Byte, // VertexAttributeFormat::BYTE
|
AttribType::Byte, // VertexAttributeFormat::BYTE
|
||||||
AttribType::Ubyte, // VertexAttributeFormat::UBYTE
|
AttribType::Ubyte, // VertexAttributeFormat::UBYTE
|
||||||
@ -282,50 +182,6 @@ static constexpr std::array vs_attrib_types = {
|
|||||||
AttribType::Float // VertexAttributeFormat::FLOAT
|
AttribType::Float // VertexAttributeFormat::FLOAT
|
||||||
};
|
};
|
||||||
|
|
||||||
struct VertexArrayInfo {
|
|
||||||
u32 vs_input_index_min;
|
|
||||||
u32 vs_input_index_max;
|
|
||||||
u32 vs_input_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) {
|
|
||||||
const auto& regs = Pica::g_state.regs;
|
|
||||||
const auto& vertex_attributes = regs.pipeline.vertex_attributes;
|
|
||||||
|
|
||||||
u32 vertex_min;
|
|
||||||
u32 vertex_max;
|
|
||||||
if (is_indexed) {
|
|
||||||
const auto& index_info = regs.pipeline.index_array;
|
|
||||||
const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
|
|
||||||
const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
|
|
||||||
const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
|
|
||||||
const bool index_u16 = index_info.format != 0;
|
|
||||||
|
|
||||||
vertex_min = 0xFFFF;
|
|
||||||
vertex_max = 0;
|
|
||||||
const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
|
|
||||||
res_cache.FlushRegion(address, size, nullptr);
|
|
||||||
for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
|
|
||||||
const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
|
|
||||||
vertex_min = std::min(vertex_min, vertex);
|
|
||||||
vertex_max = std::max(vertex_max, vertex);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
vertex_min = regs.pipeline.vertex_offset;
|
|
||||||
vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
const u32 vertex_num = vertex_max - vertex_min + 1;
|
|
||||||
u32 vs_input_size = 0;
|
|
||||||
for (const auto& loader : vertex_attributes.attribute_loaders) {
|
|
||||||
if (loader.component_count != 0) {
|
|
||||||
vs_input_size += loader.byte_count * vertex_num;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return {vertex_min, vertex_max, vs_input_size};
|
|
||||||
}
|
|
||||||
|
|
||||||
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
|
||||||
u32 vs_input_index_max) {
|
u32 vs_input_index_max) {
|
||||||
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);
|
||||||
@ -877,7 +733,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
|||||||
succeeded = AccelerateDrawBatchInternal(is_indexed);
|
succeeded = AccelerateDrawBatchInternal(is_indexed);
|
||||||
} else {
|
} else {
|
||||||
pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
|
pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
|
||||||
pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
|
pipeline_info.vertex_layout = software_layout;
|
||||||
pipeline_cache.UseTrivialVertexShader();
|
pipeline_cache.UseTrivialVertexShader();
|
||||||
pipeline_cache.UseTrivialGeometryShader();
|
pipeline_cache.UseTrivialGeometryShader();
|
||||||
pipeline_cache.BindPipeline(pipeline_info);
|
pipeline_cache.BindPipeline(pipeline_info);
|
||||||
@ -1604,6 +1460,33 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::MakeSoftwareVertexLayout() {
|
||||||
|
constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
|
||||||
|
|
||||||
|
software_layout = VertexLayout{
|
||||||
|
.binding_count = 1,
|
||||||
|
.attribute_count = 8
|
||||||
|
};
|
||||||
|
|
||||||
|
for (u32 i = 0; i < software_layout.binding_count; i++) {
|
||||||
|
VertexBinding& binding = software_layout.bindings[i];
|
||||||
|
binding.binding.Assign(i);
|
||||||
|
binding.fixed.Assign(0);
|
||||||
|
binding.stride.Assign(sizeof(HardwareVertex));
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 offset = 0;
|
||||||
|
for (u32 i = 0; i < 8; i++) {
|
||||||
|
VertexAttribute& attribute = software_layout.attributes[i];
|
||||||
|
attribute.binding.Assign(0);
|
||||||
|
attribute.location.Assign(i);
|
||||||
|
attribute.offset.Assign(offset);
|
||||||
|
attribute.type.Assign(AttribType::Float);
|
||||||
|
attribute.size.Assign(sizes[i]);
|
||||||
|
offset += sizes[i] * sizeof(float);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) {
|
vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) {
|
||||||
const bool use_border_color = instance.IsCustomBorderColorSupported() &&
|
const bool use_border_color = instance.IsCustomBorderColorSupported() &&
|
||||||
(info.wrap_s == SamplerInfo::TextureConfig::ClampToBorder ||
|
(info.wrap_s == SamplerInfo::TextureConfig::ClampToBorder ||
|
||||||
|
@ -9,7 +9,6 @@
|
|||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||||
#include "video_core/shader/shader.h"
|
|
||||||
|
|
||||||
namespace Frontend {
|
namespace Frontend {
|
||||||
class EmuWindow;
|
class EmuWindow;
|
||||||
@ -84,8 +83,6 @@ public:
|
|||||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||||
|
|
||||||
void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
|
|
||||||
const Pica::Shader::OutputVertex& v2) override;
|
|
||||||
void DrawTriangles() override;
|
void DrawTriangles() override;
|
||||||
void NotifyPicaRegisterChanged(u32 id) override;
|
void NotifyPicaRegisterChanged(u32 id) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
@ -164,15 +161,6 @@ private:
|
|||||||
/// Copies vertex data performing needed convertions and casts
|
/// Copies vertex data performing needed convertions and casts
|
||||||
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
|
void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
|
||||||
|
|
||||||
struct VertexArrayInfo {
|
|
||||||
u32 vs_input_index_min;
|
|
||||||
u32 vs_input_index_max;
|
|
||||||
u32 vs_input_size;
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Retrieve the range and the size of the input vertex
|
|
||||||
VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
|
|
||||||
|
|
||||||
/// Setup vertex array for AccelerateDrawBatch
|
/// Setup vertex array for AccelerateDrawBatch
|
||||||
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
|
void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);
|
||||||
|
|
||||||
@ -182,6 +170,9 @@ private:
|
|||||||
/// Setup geometry shader for AccelerateDrawBatch
|
/// Setup geometry shader for AccelerateDrawBatch
|
||||||
bool SetupGeometryShader();
|
bool SetupGeometryShader();
|
||||||
|
|
||||||
|
/// Creates the vertex layout struct used for software shader pipelines
|
||||||
|
void MakeSoftwareVertexLayout();
|
||||||
|
|
||||||
/// Creates a new sampler object
|
/// Creates a new sampler object
|
||||||
vk::Sampler CreateSampler(const SamplerInfo& info);
|
vk::Sampler CreateSampler(const SamplerInfo& info);
|
||||||
|
|
||||||
@ -196,26 +187,8 @@ private:
|
|||||||
DescriptorManager& desc_manager;
|
DescriptorManager& desc_manager;
|
||||||
RasterizerCache res_cache;
|
RasterizerCache res_cache;
|
||||||
PipelineCache pipeline_cache;
|
PipelineCache pipeline_cache;
|
||||||
bool shader_dirty = true;
|
|
||||||
|
|
||||||
/// Structure that the hardware rendered vertices are composed of
|
VertexLayout software_layout;
|
||||||
struct HardwareVertex {
|
|
||||||
HardwareVertex() = default;
|
|
||||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
|
|
||||||
|
|
||||||
constexpr static VertexLayout GetVertexLayout();
|
|
||||||
|
|
||||||
Common::Vec4f position;
|
|
||||||
Common::Vec4f color;
|
|
||||||
Common::Vec2f tex_coord0;
|
|
||||||
Common::Vec2f tex_coord1;
|
|
||||||
Common::Vec2f tex_coord2;
|
|
||||||
float tex_coord0_w;
|
|
||||||
Common::Vec4f normquat;
|
|
||||||
Common::Vec3f view;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<HardwareVertex> vertex_batch;
|
|
||||||
std::array<u64, 16> binding_offsets{};
|
std::array<u64, 16> binding_offsets{};
|
||||||
vk::Sampler default_sampler;
|
vk::Sampler default_sampler;
|
||||||
Surface null_surface;
|
Surface null_surface;
|
||||||
|
Reference in New Issue
Block a user