diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index fd99abe6a..83a7f2d7c 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -20,10 +20,73 @@ static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) { return Common::Vec3u{color.r, color.g, color.b} / 255.0f; } +RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v, + bool flip_quaternion) { + position[0] = v.pos.x.ToFloat32(); + position[1] = v.pos.y.ToFloat32(); + position[2] = v.pos.z.ToFloat32(); + position[3] = v.pos.w.ToFloat32(); + color[0] = v.color.x.ToFloat32(); + color[1] = v.color.y.ToFloat32(); + color[2] = v.color.z.ToFloat32(); + color[3] = v.color.w.ToFloat32(); + tex_coord0[0] = v.tc0.x.ToFloat32(); + tex_coord0[1] = v.tc0.y.ToFloat32(); + tex_coord1[0] = v.tc1.x.ToFloat32(); + tex_coord1[1] = v.tc1.y.ToFloat32(); + tex_coord2[0] = v.tc2.x.ToFloat32(); + tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); + normquat[0] = v.quat.x.ToFloat32(); + normquat[1] = v.quat.y.ToFloat32(); + normquat[2] = v.quat.z.ToFloat32(); + normquat[3] = v.quat.w.ToFloat32(); + view[0] = v.view.x.ToFloat32(); + view[1] = v.view.y.ToFloat32(); + view[2] = v.view.z.ToFloat32(); + + if (flip_quaternion) { + normquat = -normquat; + } +} + RasterizerAccelerated::RasterizerAccelerated() { uniform_block_data.lighting_lut_dirty.fill(true); } +/** + * This is a helper function to resolve an issue when interpolating opposite quaternions. See below + * for a detailed description of this issue (yuriks): + * + * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you + * interpolate two quaternions that are opposite, instead of going from one rotation to another + * using the shortest path, you'll go around the longest path. You can test if two quaternions are + * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore + * making Dot(Q1, -Q2) positive. + * + * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is + * correct for most cases but can still rotate around the long way sometimes. An implementation + * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check + * between each step would work for those cases at the cost of being more complex to implement. + * + * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around + * these issues, making this basic implementation actually more accurate to the hardware. + */ +static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { + Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; + Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; + + return (Common::Dot(a, b) < 0.f); +} + +void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) { + vertex_batch.emplace_back(v0, false); + vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); + vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); +} + void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { const u32 page_start = addr >> Memory::CITRA_PAGE_BITS; const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1; @@ -116,6 +179,44 @@ void RasterizerAccelerated::ClearAll(bool flush) { cached_pages = {}; } +RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(bool is_indexed) { + const auto& regs = Pica::g_state.regs; + const auto& vertex_attributes = regs.pipeline.vertex_attributes; + + u32 vertex_min; + u32 vertex_max; + if (is_indexed) { + const auto& index_info = regs.pipeline.index_array; + const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; + const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); + const u16* index_address_16 = reinterpret_cast(index_address_8); + const bool index_u16 = index_info.format != 0; + + vertex_min = 0xFFFF; + vertex_max = 0; + const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); + FlushRegion(address, size); + for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { + const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; + vertex_min = std::min(vertex_min, vertex); + vertex_max = std::max(vertex_max, vertex); + } + } else { + vertex_min = regs.pipeline.vertex_offset; + vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; + } + + const u32 vertex_num = vertex_max - vertex_min + 1; + u32 vs_input_size = 0; + for (const auto& loader : vertex_attributes.attribute_loaders) { + if (loader.component_count != 0) { + vs_input_size += loader.byte_count * vertex_num; + } + } + + return {vertex_min, vertex_max, vs_input_size}; +} + void RasterizerAccelerated::SyncDepthScale() { float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32(); diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 184b74e00..114132701 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -16,8 +16,11 @@ public: RasterizerAccelerated(); virtual ~RasterizerAccelerated() = default; - void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override; + void AddTriangle(const Pica::Shader::OutputVertex& v0, + const Pica::Shader::OutputVertex& v1, + const Pica::Shader::OutputVertex& v2) override; + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override; void ClearAll(bool flush) override; protected: @@ -79,7 +82,8 @@ protected: /// Syncs the shadow texture bias to match the PICA register void SyncShadowTextureBias(); -private: +protected: + /// Structure that keeps tracks of the uniform state struct UniformBlockData { Pica::Shader::UniformData data{}; std::array lighting_lut_dirty{}; @@ -93,8 +97,34 @@ private: bool dirty = true; }; + /// Structure that the hardware rendered vertices are composed of + struct HardwareVertex { + HardwareVertex() = default; + HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion); + + Common::Vec4f position; + Common::Vec4f color; + Common::Vec2f tex_coord0; + Common::Vec2f tex_coord1; + Common::Vec2f tex_coord2; + float tex_coord0_w; + Common::Vec4f normquat; + Common::Vec3f view; + }; + + struct VertexArrayInfo { + u32 vs_input_index_min; + u32 vs_input_index_max; + u32 vs_input_size; + }; + + /// Retrieve the range and the size of the input vertex + VertexArrayInfo AnalyzeVertexArray(bool is_indexed); + protected: std::array cached_pages{}; + std::vector vertex_batch; + bool shader_dirty = true; UniformBlockData uniform_block_data{}; std::array, Pica::LightingRegs::NumLightingSampler> diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cc9b6b543..25b87abf2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -202,39 +202,6 @@ void RasterizerOpenGL::SyncEntireState() { SyncShadowTextureBias(); } -/** - * This is a helper function to resolve an issue when interpolating opposite quaternions. See below - * for a detailed description of this issue (yuriks): - * - * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you - * interpolate two quaternions that are opposite, instead of going from one rotation to another - * using the shortest path, you'll go around the longest path. You can test if two quaternions are - * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore - * making Dot(Q1, -Q2) positive. - * - * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is - * correct for most cases but can still rotate around the long way sometimes. An implementation - * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check - * between each step would work for those cases at the cost of being more complex to implement. - * - * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around - * these issues, making this basic implementation actually more accurate to the hardware. - */ -static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { - Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; - Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; - - return (Common::Dot(a, b) < 0.f); -} - -void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0, false); - vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); - vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); -} - static constexpr std::array vs_attrib_types{ GL_BYTE, // VertexAttributeFormat::BYTE GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE @@ -242,50 +209,6 @@ static constexpr std::array vs_attrib_types{ GL_FLOAT // VertexAttributeFormat::FLOAT }; -struct VertexArrayInfo { - u32 vs_input_index_min; - u32 vs_input_index_max; - u32 vs_input_size; -}; - -RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) { - const auto& regs = Pica::g_state.regs; - const auto& vertex_attributes = regs.pipeline.vertex_attributes; - - u32 vertex_min; - u32 vertex_max; - if (is_indexed) { - const auto& index_info = regs.pipeline.index_array; - const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; - const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); - const u16* index_address_16 = reinterpret_cast(index_address_8); - const bool index_u16 = index_info.format != 0; - - vertex_min = 0xFFFF; - vertex_max = 0; - const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); - res_cache.FlushRegion(address, size, nullptr); - for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { - const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; - vertex_min = std::min(vertex_min, vertex); - vertex_max = std::max(vertex_max, vertex); - } - } else { - vertex_min = regs.pipeline.vertex_offset; - vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; - } - - const u32 vertex_num = vertex_max - vertex_min + 1; - u32 vs_input_size = 0; - for (const auto& loader : vertex_attributes.attribute_loaders) { - if (loader.component_count != 0) { - vs_input_size += loader.byte_count * vertex_num; - } - } - - return {vertex_min, vertex_max, vs_input_size}; -} - void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min, GLuint vs_input_index_max) { MICROPROFILE_SCOPE(OpenGL_VAO); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b5d08a23a..efdefd80e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -5,13 +5,11 @@ #pragma once #include "core/hw/gpu.h" -#include "video_core/pica_types.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_texture_runtime.h" -#include "video_core/shader/shader.h" namespace Frontend { class EmuWindow; @@ -32,8 +30,6 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; - void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) override; void DrawTriangles() override; void NotifyPicaRegisterChanged(u32 id) override; void FlushAll() override; @@ -77,48 +73,6 @@ private: bool supress_mipmap_for_cube = false; }; - /// Structure that the hardware rendered vertices are composed of - struct HardwareVertex { - HardwareVertex() = default; - HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) { - position[0] = v.pos.x.ToFloat32(); - position[1] = v.pos.y.ToFloat32(); - position[2] = v.pos.z.ToFloat32(); - position[3] = v.pos.w.ToFloat32(); - color[0] = v.color.x.ToFloat32(); - color[1] = v.color.y.ToFloat32(); - color[2] = v.color.z.ToFloat32(); - color[3] = v.color.w.ToFloat32(); - tex_coord0[0] = v.tc0.x.ToFloat32(); - tex_coord0[1] = v.tc0.y.ToFloat32(); - tex_coord1[0] = v.tc1.x.ToFloat32(); - tex_coord1[1] = v.tc1.y.ToFloat32(); - tex_coord2[0] = v.tc2.x.ToFloat32(); - tex_coord2[1] = v.tc2.y.ToFloat32(); - tex_coord0_w = v.tc0_w.ToFloat32(); - normquat[0] = v.quat.x.ToFloat32(); - normquat[1] = v.quat.y.ToFloat32(); - normquat[2] = v.quat.z.ToFloat32(); - normquat[3] = v.quat.w.ToFloat32(); - view[0] = v.view.x.ToFloat32(); - view[1] = v.view.y.ToFloat32(); - view[2] = v.view.z.ToFloat32(); - - if (flip_quaternion) { - normquat = -normquat; - } - } - - Common::Vec4f position; - Common::Vec4f color; - Common::Vec2f tex_coord0; - Common::Vec2f tex_coord1; - Common::Vec2f tex_coord2; - float tex_coord0_w; - Common::Vec4f normquat; - Common::Vec3f view; - }; - /// Syncs the clip enabled status to match the PICA register void SyncClipEnabled(); @@ -171,15 +125,6 @@ private: /// Internal implementation for AccelerateDrawBatch bool AccelerateDrawBatchInternal(bool is_indexed); - struct VertexArrayInfo { - u32 vs_input_index_min; - u32 vs_input_index_max; - u32 vs_input_size; - }; - - /// Retrieve the range and the size of the input vertex - VertexArrayInfo AnalyzeVertexArray(bool is_indexed); - /// Setup vertex array for AccelerateDrawBatch void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min, GLuint vs_input_index_max); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index da839999d..74ccb7c98 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -20,74 +20,6 @@ namespace Vulkan { -RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v, - bool flip_quaternion) { - position[0] = v.pos.x.ToFloat32(); - position[1] = v.pos.y.ToFloat32(); - position[2] = v.pos.z.ToFloat32(); - position[3] = v.pos.w.ToFloat32(); - color[0] = v.color.x.ToFloat32(); - color[1] = v.color.y.ToFloat32(); - color[2] = v.color.z.ToFloat32(); - color[3] = v.color.w.ToFloat32(); - tex_coord0[0] = v.tc0.x.ToFloat32(); - tex_coord0[1] = v.tc0.y.ToFloat32(); - tex_coord1[0] = v.tc1.x.ToFloat32(); - tex_coord1[1] = v.tc1.y.ToFloat32(); - tex_coord2[0] = v.tc2.x.ToFloat32(); - tex_coord2[1] = v.tc2.y.ToFloat32(); - tex_coord0_w = v.tc0_w.ToFloat32(); - normquat[0] = v.quat.x.ToFloat32(); - normquat[1] = v.quat.y.ToFloat32(); - normquat[2] = v.quat.z.ToFloat32(); - normquat[3] = v.quat.w.ToFloat32(); - view[0] = v.view.x.ToFloat32(); - view[1] = v.view.y.ToFloat32(); - view[2] = v.view.z.ToFloat32(); - - if (flip_quaternion) { - normquat = -normquat; - } -} - -/** - * This maps to the following layout in GLSL code: - * layout(location = 0) in vec4 vert_position; - * layout(location = 1) in vec4 vert_color; - * layout(location = 2) in vec2 vert_texcoord0; - * layout(location = 3) in vec2 vert_texcoord1; - * layout(location = 4) in vec2 vert_texcoord2; - * layout(location = 5) in float vert_texcoord0_w; - * layout(location = 6) in vec4 vert_normquat; - * layout(location = 7) in vec3 vert_view; - */ -constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() { - VertexLayout layout{}; - layout.attribute_count = 8; - layout.binding_count = 1; - - // Define binding - layout.bindings[0].binding.Assign(0); - layout.bindings[0].fixed.Assign(0); - layout.bindings[0].stride.Assign(sizeof(HardwareVertex)); - - // Define attributes - constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; - u32 offset = 0; - - for (u32 loc = 0; loc < 8; loc++) { - VertexAttribute& attribute = layout.attributes[loc]; - attribute.binding.Assign(0); - attribute.location.Assign(loc); - attribute.offset.Assign(offset); - attribute.type.Assign(AttribType::Float); - attribute.size.Assign(sizes[loc]); - offset += sizes[loc] * sizeof(float); - } - - return layout; -} - constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024; constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024; constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024; @@ -139,7 +71,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan Common::AlignUp(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment); // Define vertex layout for software shaders - pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout(); + MakeSoftwareVertexLayout(); + pipeline_info.vertex_layout = software_layout; const SamplerInfo default_sampler_info = { .mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear, @@ -242,39 +175,6 @@ void RasterizerVulkan::SyncFixedState() { SyncDepthWriteMask(); } -/** - * This is a helper function to resolve an issue when interpolating opposite quaternions. See below - * for a detailed description of this issue (yuriks): - * - * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you - * interpolate two quaternions that are opposite, instead of going from one rotation to another - * using the shortest path, you'll go around the longest path. You can test if two quaternions are - * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore - * making Dot(Q1, -Q2) positive. - * - * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is - * correct for most cases but can still rotate around the long way sometimes. An implementation - * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check - * between each step would work for those cases at the cost of being more complex to implement. - * - * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around - * these issues, making this basic implementation actually more accurate to the hardware. - */ -static bool AreQuaternionsOpposite(Common::Vec4 qa, Common::Vec4 qb) { - Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; - Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()}; - - return (Common::Dot(a, b) < 0.f); -} - -void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0, - const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) { - vertex_batch.emplace_back(v0, false); - vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat)); - vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat)); -} - static constexpr std::array vs_attrib_types = { AttribType::Byte, // VertexAttributeFormat::BYTE AttribType::Ubyte, // VertexAttributeFormat::UBYTE @@ -282,50 +182,6 @@ static constexpr std::array vs_attrib_types = { AttribType::Float // VertexAttributeFormat::FLOAT }; -struct VertexArrayInfo { - u32 vs_input_index_min; - u32 vs_input_index_max; - u32 vs_input_size; -}; - -RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) { - const auto& regs = Pica::g_state.regs; - const auto& vertex_attributes = regs.pipeline.vertex_attributes; - - u32 vertex_min; - u32 vertex_max; - if (is_indexed) { - const auto& index_info = regs.pipeline.index_array; - const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset; - const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address); - const u16* index_address_16 = reinterpret_cast(index_address_8); - const bool index_u16 = index_info.format != 0; - - vertex_min = 0xFFFF; - vertex_max = 0; - const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1); - res_cache.FlushRegion(address, size, nullptr); - for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) { - const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index]; - vertex_min = std::min(vertex_min, vertex); - vertex_max = std::max(vertex_max, vertex); - } - } else { - vertex_min = regs.pipeline.vertex_offset; - vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1; - } - - const u32 vertex_num = vertex_max - vertex_min + 1; - u32 vs_input_size = 0; - for (const auto& loader : vertex_attributes.attribute_loaders) { - if (loader.component_count != 0) { - vs_input_size += loader.byte_count * vertex_num; - } - } - - return {vertex_min, vertex_max, vs_input_size}; -} - void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max) { auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4); @@ -877,7 +733,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { succeeded = AccelerateDrawBatchInternal(is_indexed); } else { pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List); - pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout(); + pipeline_info.vertex_layout = software_layout; pipeline_cache.UseTrivialVertexShader(); pipeline_cache.UseTrivialGeometryShader(); pipeline_cache.BindPipeline(pipeline_info); @@ -1604,6 +1460,33 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con return true; } +void RasterizerVulkan::MakeSoftwareVertexLayout() { + constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3}; + + software_layout = VertexLayout{ + .binding_count = 1, + .attribute_count = 8 + }; + + for (u32 i = 0; i < software_layout.binding_count; i++) { + VertexBinding& binding = software_layout.bindings[i]; + binding.binding.Assign(i); + binding.fixed.Assign(0); + binding.stride.Assign(sizeof(HardwareVertex)); + } + + u32 offset = 0; + for (u32 i = 0; i < 8; i++) { + VertexAttribute& attribute = software_layout.attributes[i]; + attribute.binding.Assign(0); + attribute.location.Assign(i); + attribute.offset.Assign(offset); + attribute.type.Assign(AttribType::Float); + attribute.size.Assign(sizes[i]); + offset += sizes[i] * sizeof(float); + } +} + vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) { const bool use_border_color = instance.IsCustomBorderColorSupported() && (info.wrap_s == SamplerInfo::TextureConfig::ClampToBorder || diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 4df96fa03..968559f25 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -9,7 +9,6 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/vk_texture_runtime.h" -#include "video_core/shader/shader.h" namespace Frontend { class EmuWindow; @@ -84,8 +83,6 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; - void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1, - const Pica::Shader::OutputVertex& v2) override; void DrawTriangles() override; void NotifyPicaRegisterChanged(u32 id) override; void FlushAll() override; @@ -164,15 +161,6 @@ private: /// Copies vertex data performing needed convertions and casts void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data); - struct VertexArrayInfo { - u32 vs_input_index_min; - u32 vs_input_index_max; - u32 vs_input_size; - }; - - /// Retrieve the range and the size of the input vertex - VertexArrayInfo AnalyzeVertexArray(bool is_indexed); - /// Setup vertex array for AccelerateDrawBatch void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max); @@ -182,6 +170,9 @@ private: /// Setup geometry shader for AccelerateDrawBatch bool SetupGeometryShader(); + /// Creates the vertex layout struct used for software shader pipelines + void MakeSoftwareVertexLayout(); + /// Creates a new sampler object vk::Sampler CreateSampler(const SamplerInfo& info); @@ -196,26 +187,8 @@ private: DescriptorManager& desc_manager; RasterizerCache res_cache; PipelineCache pipeline_cache; - bool shader_dirty = true; - /// Structure that the hardware rendered vertices are composed of - struct HardwareVertex { - HardwareVertex() = default; - HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion); - - constexpr static VertexLayout GetVertexLayout(); - - Common::Vec4f position; - Common::Vec4f color; - Common::Vec2f tex_coord0; - Common::Vec2f tex_coord1; - Common::Vec2f tex_coord2; - float tex_coord0_w; - Common::Vec4f normquat; - Common::Vec3f view; - }; - - std::vector vertex_batch; + VertexLayout software_layout; std::array binding_offsets{}; vk::Sampler default_sampler; Surface null_surface;