Merge pull request #2865 from wwylele/gs++
PICA: implemented geometry shader
This commit is contained in:
		| @@ -1,6 +1,7 @@ | ||||
| set(SRCS | ||||
|             command_processor.cpp | ||||
|             debug_utils/debug_utils.cpp | ||||
|             geometry_pipeline.cpp | ||||
|             pica.cpp | ||||
|             primitive_assembly.cpp | ||||
|             regs.cpp | ||||
| @@ -29,6 +30,7 @@ set(SRCS | ||||
| set(HEADERS | ||||
|             command_processor.h | ||||
|             debug_utils/debug_utils.h | ||||
|             geometry_pipeline.h | ||||
|             gpu_debugger.h | ||||
|             pica.h | ||||
|             pica_state.h | ||||
|   | ||||
| @@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|  | ||||
|     case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): | ||||
|         g_state.immediate.current_attribute = 0; | ||||
|         g_state.immediate.reset_geometry_pipeline = true; | ||||
|         default_attr_counter = 0; | ||||
|         break; | ||||
|  | ||||
| @@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|                     shader_engine->Run(g_state.vs, shader_unit); | ||||
|                     shader_unit.WriteOutput(regs.vs, output); | ||||
|  | ||||
|                     // Send to renderer | ||||
|                     using Pica::Shader::OutputVertex; | ||||
|                     auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||||
|                                           const OutputVertex& v2) { | ||||
|                         VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||||
|                     }; | ||||
|  | ||||
|                     g_state.primitive_assembler.SubmitVertex( | ||||
|                         Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), | ||||
|                         AddTriangle); | ||||
|                     // Send to geometry pipeline | ||||
|                     if (g_state.immediate.reset_geometry_pipeline) { | ||||
|                         g_state.geometry_pipeline.Reconfigure(); | ||||
|                         g_state.immediate.reset_geometry_pipeline = false; | ||||
|                     } | ||||
|                     ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); | ||||
|                     g_state.geometry_pipeline.Setup(shader_engine); | ||||
|                     g_state.geometry_pipeline.SubmitVertex(output); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| @@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|         // The size has been tuned for optimal balance between hit-rate and the cost of lookup | ||||
|         const size_t VERTEX_CACHE_SIZE = 32; | ||||
|         std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||||
|         std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; | ||||
|         Shader::OutputVertex output_vertex; | ||||
|         std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; | ||||
|         Shader::AttributeBuffer vs_output; | ||||
|  | ||||
|         unsigned int vertex_cache_pos = 0; | ||||
|         vertex_cache_ids.fill(-1); | ||||
| @@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|  | ||||
|         shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||||
|  | ||||
|         g_state.geometry_pipeline.Reconfigure(); | ||||
|         g_state.geometry_pipeline.Setup(shader_engine); | ||||
|         if (g_state.geometry_pipeline.NeedIndexInput()) | ||||
|             ASSERT(is_indexed); | ||||
|  | ||||
|         for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { | ||||
|             // Indexed rendering doesn't use the start offset | ||||
|             unsigned int vertex = | ||||
| @@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|             bool vertex_cache_hit = false; | ||||
|  | ||||
|             if (is_indexed) { | ||||
|                 if (g_state.geometry_pipeline.NeedIndexInput()) { | ||||
|                     g_state.geometry_pipeline.SubmitIndex(vertex); | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 if (g_debug_context && Pica::g_debug_context->recorder) { | ||||
|                     int size = index_u16 ? 2 : 1; | ||||
|                     memory_accesses.AddAccess(base_address + index_info.offset + size * index, | ||||
| @@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|  | ||||
|                 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||||
|                     if (vertex == vertex_cache_ids[i]) { | ||||
|                         output_vertex = vertex_cache[i]; | ||||
|                         vs_output = vertex_cache[i]; | ||||
|                         vertex_cache_hit = true; | ||||
|                         break; | ||||
|                     } | ||||
| @@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|  | ||||
|             if (!vertex_cache_hit) { | ||||
|                 // Initialize data for the current vertex | ||||
|                 Shader::AttributeBuffer input, output{}; | ||||
|                 Shader::AttributeBuffer input; | ||||
|                 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | ||||
|  | ||||
|                 // Send to vertex shader | ||||
| @@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | ||||
|                                              (void*)&input); | ||||
|                 shader_unit.LoadInput(regs.vs, input); | ||||
|                 shader_engine->Run(g_state.vs, shader_unit); | ||||
|                 shader_unit.WriteOutput(regs.vs, output); | ||||
|  | ||||
|                 // Retrieve vertex from register data | ||||
|                 output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); | ||||
|                 shader_unit.WriteOutput(regs.vs, vs_output); | ||||
|  | ||||
|                 if (is_indexed) { | ||||
|                     vertex_cache[vertex_cache_pos] = output_vertex; | ||||
|                     vertex_cache[vertex_cache_pos] = vs_output; | ||||
|                     vertex_cache_ids[vertex_cache_pos] = vertex; | ||||
|                     vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // Send to renderer | ||||
|             using Pica::Shader::OutputVertex; | ||||
|             auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, | ||||
|                                   const OutputVertex& v2) { | ||||
|                 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||||
|             }; | ||||
|  | ||||
|             primitive_assembler.SubmitVertex(output_vertex, AddTriangle); | ||||
|             // Send to geometry pipeline | ||||
|             g_state.geometry_pipeline.SubmitVertex(vs_output); | ||||
|         } | ||||
|  | ||||
|         for (auto& range : memory_accesses.ranges) { | ||||
|   | ||||
							
								
								
									
										274
									
								
								src/video_core/geometry_pipeline.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										274
									
								
								src/video_core/geometry_pipeline.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,274 @@ | ||||
| // Copyright 2017 Citra Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "video_core/geometry_pipeline.h" | ||||
| #include "video_core/pica_state.h" | ||||
| #include "video_core/regs.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/video_core.h" | ||||
|  | ||||
| namespace Pica { | ||||
|  | ||||
| /// An attribute buffering interface for different pipeline modes | ||||
| class GeometryPipelineBackend { | ||||
| public: | ||||
|     virtual ~GeometryPipelineBackend() = default; | ||||
|  | ||||
|     /// Checks if there is no incomplete data transfer | ||||
|     virtual bool IsEmpty() const = 0; | ||||
|  | ||||
|     /// Checks if the pipeline needs a direct input from index buffer | ||||
|     virtual bool NeedIndexInput() const = 0; | ||||
|  | ||||
|     /// Submits an index from index buffer | ||||
|     virtual void SubmitIndex(unsigned int val) = 0; | ||||
|  | ||||
|     /** | ||||
|      * Submits vertex attributes | ||||
|      * @param input attributes of a vertex output from vertex shader | ||||
|      * @return if the buffer is full and the geometry shader should be invoked | ||||
|      */ | ||||
|     virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; | ||||
| }; | ||||
|  | ||||
| // In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. | ||||
| // The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is | ||||
| // invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry | ||||
| // shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices | ||||
| // for one geometry shader invocation. | ||||
| // TODO: what happens when the input size is not divisible by the output size? | ||||
| class GeometryPipeline_Point : public GeometryPipelineBackend { | ||||
| public: | ||||
|     GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { | ||||
|         ASSERT(regs.pipeline.variable_primitive == 0); | ||||
|         ASSERT(regs.gs.input_to_uniform == 0); | ||||
|         vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||||
|         size_t gs_input_num = regs.gs.max_input_attribute_index + 1; | ||||
|         ASSERT(gs_input_num % vs_output_num == 0); | ||||
|         buffer_cur = attribute_buffer.attr; | ||||
|         buffer_end = attribute_buffer.attr + gs_input_num; | ||||
|     } | ||||
|  | ||||
|     bool IsEmpty() const override { | ||||
|         return buffer_cur == attribute_buffer.attr; | ||||
|     } | ||||
|  | ||||
|     bool NeedIndexInput() const override { | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     void SubmitIndex(unsigned int val) override { | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
|  | ||||
|     bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||||
|         buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||||
|         if (buffer_cur == buffer_end) { | ||||
|             buffer_cur = attribute_buffer.attr; | ||||
|             unit.LoadInput(regs.gs, attribute_buffer); | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     const Regs& regs; | ||||
|     Shader::GSUnitState& unit; | ||||
|     Shader::AttributeBuffer attribute_buffer; | ||||
|     Math::Vec4<float24>* buffer_cur; | ||||
|     Math::Vec4<float24>* buffer_end; | ||||
|     unsigned int vs_output_num; | ||||
| }; | ||||
|  | ||||
| // In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the | ||||
| // geometry shader unit. The number of vertex is variable, which is specified by the first index | ||||
| // value in the batch. This mode is usually used for subdivision. | ||||
| class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { | ||||
| public: | ||||
|     GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||||
|         : regs(regs), setup(setup) { | ||||
|         ASSERT(regs.pipeline.variable_primitive == 1); | ||||
|         ASSERT(regs.gs.input_to_uniform == 1); | ||||
|         vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||||
|     } | ||||
|  | ||||
|     bool IsEmpty() const override { | ||||
|         return need_index; | ||||
|     } | ||||
|  | ||||
|     bool NeedIndexInput() const override { | ||||
|         return need_index; | ||||
|     } | ||||
|  | ||||
|     void SubmitIndex(unsigned int val) override { | ||||
|         DEBUG_ASSERT(need_index); | ||||
|  | ||||
|         // The number of vertex input is put to the uniform register | ||||
|         float24 vertex_num = float24::FromFloat32(val); | ||||
|         setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); | ||||
|  | ||||
|         // The second uniform register and so on are used for receiving input vertices | ||||
|         buffer_cur = setup.uniforms.f + 1; | ||||
|  | ||||
|         main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; | ||||
|         total_vertex_num = val; | ||||
|         need_index = false; | ||||
|     } | ||||
|  | ||||
|     bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||||
|         DEBUG_ASSERT(!need_index); | ||||
|         if (main_vertex_num != 0) { | ||||
|             // For main vertices, receive all attributes | ||||
|             buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||||
|             --main_vertex_num; | ||||
|         } else { | ||||
|             // For other vertices, only receive the first attribute (usually the position) | ||||
|             *(buffer_cur++) = input.attr[0]; | ||||
|         } | ||||
|         --total_vertex_num; | ||||
|  | ||||
|         if (total_vertex_num == 0) { | ||||
|             need_index = true; | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     bool need_index = true; | ||||
|     const Regs& regs; | ||||
|     Shader::ShaderSetup& setup; | ||||
|     unsigned int main_vertex_num; | ||||
|     unsigned int total_vertex_num; | ||||
|     Math::Vec4<float24>* buffer_cur; | ||||
|     unsigned int vs_output_num; | ||||
| }; | ||||
|  | ||||
| // In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry | ||||
| // shader unit. The number of vertex per shader invocation is constant. This is usually used for | ||||
| // particle system. | ||||
| class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { | ||||
| public: | ||||
|     GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||||
|         : regs(regs), setup(setup) { | ||||
|         ASSERT(regs.pipeline.variable_primitive == 0); | ||||
|         ASSERT(regs.gs.input_to_uniform == 1); | ||||
|         vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||||
|         ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); | ||||
|         size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; | ||||
|         buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; | ||||
|         buffer_end = buffer_begin + vs_output_num * vertex_num; | ||||
|     } | ||||
|  | ||||
|     bool IsEmpty() const override { | ||||
|         return buffer_cur == buffer_begin; | ||||
|     } | ||||
|  | ||||
|     bool NeedIndexInput() const override { | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     void SubmitIndex(unsigned int val) override { | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
|  | ||||
|     bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||||
|         buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||||
|         if (buffer_cur == buffer_end) { | ||||
|             buffer_cur = buffer_begin; | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     const Regs& regs; | ||||
|     Shader::ShaderSetup& setup; | ||||
|     Math::Vec4<float24>* buffer_begin; | ||||
|     Math::Vec4<float24>* buffer_cur; | ||||
|     Math::Vec4<float24>* buffer_end; | ||||
|     unsigned int vs_output_num; | ||||
| }; | ||||
|  | ||||
| GeometryPipeline::GeometryPipeline(State& state) : state(state) {} | ||||
|  | ||||
| GeometryPipeline::~GeometryPipeline() = default; | ||||
|  | ||||
| void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { | ||||
|     this->vertex_handler = vertex_handler; | ||||
| } | ||||
|  | ||||
| void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { | ||||
|     if (!backend) | ||||
|         return; | ||||
|  | ||||
|     this->shader_engine = shader_engine; | ||||
|     shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); | ||||
| } | ||||
|  | ||||
| void GeometryPipeline::Reconfigure() { | ||||
|     ASSERT(!backend || backend->IsEmpty()); | ||||
|  | ||||
|     if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { | ||||
|         backend = nullptr; | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); | ||||
|  | ||||
|     // The following assumes that when geometry shader is in use, the shader unit 3 is configured as | ||||
|     // a geometry shader unit. | ||||
|     // TODO: what happens if this is not true? | ||||
|     ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); | ||||
|     ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); | ||||
|  | ||||
|     state.gs_unit.ConfigOutput(state.regs.gs); | ||||
|  | ||||
|     ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == | ||||
|            state.regs.pipeline.vs_outmap_total_minus_1_b); | ||||
|  | ||||
|     switch (state.regs.pipeline.gs_config.mode) { | ||||
|     case PipelineRegs::GSMode::Point: | ||||
|         backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit); | ||||
|         break; | ||||
|     case PipelineRegs::GSMode::VariablePrimitive: | ||||
|         backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs); | ||||
|         break; | ||||
|     case PipelineRegs::GSMode::FixedPrimitive: | ||||
|         backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs); | ||||
|         break; | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| bool GeometryPipeline::NeedIndexInput() const { | ||||
|     if (!backend) | ||||
|         return false; | ||||
|     return backend->NeedIndexInput(); | ||||
| } | ||||
|  | ||||
| void GeometryPipeline::SubmitIndex(unsigned int val) { | ||||
|     backend->SubmitIndex(val); | ||||
| } | ||||
|  | ||||
| void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { | ||||
|     if (!backend) { | ||||
|         // No backend means the geometry shader is disabled, so we send the vertex shader output | ||||
|         // directly to the primitive assembler. | ||||
|         vertex_handler(input); | ||||
|     } else { | ||||
|         if (backend->SubmitVertex(input)) { | ||||
|             shader_engine->Run(state.gs, state.gs_unit); | ||||
|  | ||||
|             // The uniform b15 is set to true after every geometry shader invocation. This is useful | ||||
|             // for the shader to know if this is the first invocation in a batch, if the program set | ||||
|             // b15 to false first. | ||||
|             state.gs.uniforms.b[15] = true; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| } // namespace Pica | ||||
							
								
								
									
										49
									
								
								src/video_core/geometry_pipeline.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								src/video_core/geometry_pipeline.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | ||||
| // Copyright 2017 Citra Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <memory> | ||||
| #include "video_core/shader/shader.h" | ||||
|  | ||||
| namespace Pica { | ||||
|  | ||||
| struct State; | ||||
|  | ||||
| class GeometryPipelineBackend; | ||||
|  | ||||
| /// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler | ||||
| class GeometryPipeline { | ||||
| public: | ||||
|     explicit GeometryPipeline(State& state); | ||||
|     ~GeometryPipeline(); | ||||
|  | ||||
|     /// Sets the handler for receiving vertex outputs from vertex shader | ||||
|     void SetVertexHandler(Shader::VertexHandler vertex_handler); | ||||
|  | ||||
|     /** | ||||
|      * Setup the geometry shader unit if it is in use | ||||
|      * @param shader_engine the shader engine for the geometry shader to run | ||||
|      */ | ||||
|     void Setup(Shader::ShaderEngine* shader_engine); | ||||
|  | ||||
|     /// Reconfigures the pipeline according to current register settings | ||||
|     void Reconfigure(); | ||||
|  | ||||
|     /// Checks if the pipeline needs a direct input from index buffer | ||||
|     bool NeedIndexInput() const; | ||||
|  | ||||
|     /// Submits an index from index buffer. Call this only when NeedIndexInput returns true | ||||
|     void SubmitIndex(unsigned int val); | ||||
|  | ||||
|     /// Submits vertex attributes output from vertex shader | ||||
|     void SubmitVertex(const Shader::AttributeBuffer& input); | ||||
|  | ||||
| private: | ||||
|     Shader::VertexHandler vertex_handler; | ||||
|     Shader::ShaderEngine* shader_engine; | ||||
|     std::unique_ptr<GeometryPipelineBackend> backend; | ||||
|     State& state; | ||||
| }; | ||||
| } // namespace Pica | ||||
| @@ -3,9 +3,11 @@ | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <cstring> | ||||
| #include "video_core/geometry_pipeline.h" | ||||
| #include "video_core/pica.h" | ||||
| #include "video_core/pica_state.h" | ||||
| #include "video_core/regs_pipeline.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/video_core.h" | ||||
|  | ||||
| namespace Pica { | ||||
|  | ||||
| @@ -24,6 +26,23 @@ void Zero(T& o) { | ||||
|     memset(&o, 0, sizeof(o)); | ||||
| } | ||||
|  | ||||
| State::State() : geometry_pipeline(*this) { | ||||
|     auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { | ||||
|         using Pica::Shader::OutputVertex; | ||||
|         auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, | ||||
|                                   const OutputVertex& v2) { | ||||
|             VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||||
|         }; | ||||
|         primitive_assembler.SubmitVertex( | ||||
|             Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); | ||||
|     }; | ||||
|  | ||||
|     auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; | ||||
|  | ||||
|     g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); | ||||
|     g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); | ||||
| } | ||||
|  | ||||
| void State::Reset() { | ||||
|     Zero(regs); | ||||
|     Zero(vs); | ||||
|   | ||||
| @@ -8,6 +8,7 @@ | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/vector_math.h" | ||||
| #include "video_core/geometry_pipeline.h" | ||||
| #include "video_core/primitive_assembly.h" | ||||
| #include "video_core/regs.h" | ||||
| #include "video_core/shader/shader.h" | ||||
| @@ -16,6 +17,7 @@ namespace Pica { | ||||
|  | ||||
| /// Struct used to describe current Pica state | ||||
| struct State { | ||||
|     State(); | ||||
|     void Reset(); | ||||
|  | ||||
|     /// Pica registers | ||||
| @@ -137,8 +139,17 @@ struct State { | ||||
|         Shader::AttributeBuffer input_vertex; | ||||
|         // Index of the next attribute to be loaded into `input_vertex`. | ||||
|         u32 current_attribute = 0; | ||||
|         // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure | ||||
|         bool reset_geometry_pipeline = true; | ||||
|     } immediate; | ||||
|  | ||||
|     // the geometry shader needs to be kept in the global state because some shaders relie on | ||||
|     // preserved register value across shader invocation. | ||||
|     // TODO: also bring the three vertex shader units here and implement the shader scheduler. | ||||
|     Shader::GSUnitState gs_unit; | ||||
|  | ||||
|     GeometryPipeline geometry_pipeline; | ||||
|  | ||||
|     // This is constructed with a dummy triangle topology | ||||
|     PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; | ||||
| }; | ||||
|   | ||||
| @@ -17,15 +17,18 @@ template <typename VertexType> | ||||
| void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | ||||
|                                                   TriangleHandler triangle_handler) { | ||||
|     switch (topology) { | ||||
|     // TODO: Figure out what's different with TriangleTopology::Shader. | ||||
|     case PipelineRegs::TriangleTopology::List: | ||||
|     case PipelineRegs::TriangleTopology::Shader: | ||||
|         if (buffer_index < 2) { | ||||
|             buffer[buffer_index++] = vtx; | ||||
|         } else { | ||||
|             buffer_index = 0; | ||||
|  | ||||
|             triangle_handler(buffer[0], buffer[1], vtx); | ||||
|             if (topology == PipelineRegs::TriangleTopology::Shader && winding) { | ||||
|                 triangle_handler(buffer[1], buffer[0], vtx); | ||||
|                 winding = false; | ||||
|             } else { | ||||
|                 triangle_handler(buffer[0], buffer[1], vtx); | ||||
|             } | ||||
|         } | ||||
|         break; | ||||
|  | ||||
| @@ -50,10 +53,16 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <typename VertexType> | ||||
| void PrimitiveAssembler<VertexType>::SetWinding() { | ||||
|     winding = true; | ||||
| } | ||||
|  | ||||
| template <typename VertexType> | ||||
| void PrimitiveAssembler<VertexType>::Reset() { | ||||
|     buffer_index = 0; | ||||
|     strip_ready = false; | ||||
|     winding = false; | ||||
| } | ||||
|  | ||||
| template <typename VertexType> | ||||
|   | ||||
| @@ -29,6 +29,12 @@ struct PrimitiveAssembler { | ||||
|      */ | ||||
|     void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); | ||||
|  | ||||
|     /** | ||||
|      * Invert the vertex order of the next triangle. Called by geometry shader emitter. | ||||
|      * This only takes effect for TriangleTopology::Shader. | ||||
|      */ | ||||
|     void SetWinding(); | ||||
|  | ||||
|     /** | ||||
|      * Resets the internal state of the PrimitiveAssembler. | ||||
|      */ | ||||
| @@ -45,6 +51,7 @@ private: | ||||
|     int buffer_index; | ||||
|     VertexType buffer[2]; | ||||
|     bool strip_ready = false; | ||||
|     bool winding = false; | ||||
| }; | ||||
|  | ||||
| } // namespace | ||||
|   | ||||
| @@ -147,7 +147,15 @@ struct PipelineRegs { | ||||
|     // Number of vertices to render | ||||
|     u32 num_vertices; | ||||
|  | ||||
|     INSERT_PADDING_WORDS(0x1); | ||||
|     enum class UseGS : u32 { | ||||
|         No = 0, | ||||
|         Yes = 2, | ||||
|     }; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 2, UseGS> use_gs; | ||||
|         BitField<31, 1, u32> variable_primitive; | ||||
|     }; | ||||
|  | ||||
|     // The index of the first vertex to render | ||||
|     u32 vertex_offset; | ||||
| @@ -218,7 +226,29 @@ struct PipelineRegs { | ||||
|  | ||||
|     GPUMode gpu_mode; | ||||
|  | ||||
|     INSERT_PADDING_WORDS(0x18); | ||||
|     INSERT_PADDING_WORDS(0x4); | ||||
|     BitField<0, 4, u32> vs_outmap_total_minus_1_a; | ||||
|     INSERT_PADDING_WORDS(0x6); | ||||
|     BitField<0, 4, u32> vs_outmap_total_minus_1_b; | ||||
|  | ||||
|     enum class GSMode : u32 { | ||||
|         Point = 0, | ||||
|         VariablePrimitive = 1, | ||||
|         FixedPrimitive = 2, | ||||
|     }; | ||||
|  | ||||
|     union { | ||||
|         BitField<0, 8, GSMode> mode; | ||||
|         BitField<8, 4, u32> fixed_vertex_num_minus_1; | ||||
|         BitField<12, 4, u32> stride_minus_1; | ||||
|         BitField<16, 4, u32> start_index; | ||||
|     } gs_config; | ||||
|  | ||||
|     INSERT_PADDING_WORDS(0x1); | ||||
|  | ||||
|     u32 variable_vertex_main_num_minus_1; | ||||
|  | ||||
|     INSERT_PADDING_WORDS(0x9); | ||||
|  | ||||
|     enum class TriangleTopology : u32 { | ||||
|         List = 0, | ||||
|   | ||||
| @@ -24,9 +24,16 @@ struct ShaderRegs { | ||||
|  | ||||
|     INSERT_PADDING_WORDS(0x4); | ||||
|  | ||||
|     enum ShaderMode { | ||||
|         GS = 0x08, | ||||
|         VS = 0xA0, | ||||
|     }; | ||||
|  | ||||
|     union { | ||||
|         // Number of input attributes to shader unit - 1 | ||||
|         BitField<0, 4, u32> max_input_attribute_index; | ||||
|         BitField<8, 8, u32> input_to_uniform; | ||||
|         BitField<24, 8, ShaderMode> shader_mode; | ||||
|     }; | ||||
|  | ||||
|     // Offset to shader program entry point (in words) | ||||
|   | ||||
| @@ -21,7 +21,8 @@ namespace Pica { | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { | ||||
| OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, | ||||
|                                                const AttributeBuffer& input) { | ||||
|     // Setup output data | ||||
|     union { | ||||
|         OutputVertex ret{}; | ||||
| @@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} | ||||
|  | ||||
| GSEmitter::GSEmitter() { | ||||
|     handlers = new Handlers; | ||||
| } | ||||
|  | ||||
| GSEmitter::~GSEmitter() { | ||||
|     delete handlers; | ||||
| } | ||||
|  | ||||
| void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) { | ||||
|     ASSERT(vertex_id < 3); | ||||
|     std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); | ||||
|     if (prim_emit) { | ||||
|         if (winding) | ||||
|             handlers->winding_setter(); | ||||
|         for (size_t i = 0; i < buffer.size(); ++i) { | ||||
|             AttributeBuffer output; | ||||
|             unsigned int output_i = 0; | ||||
|             for (unsigned int reg : Common::BitSet<u32>(output_mask)) { | ||||
|                 output.attr[output_i++] = buffer[i][reg]; | ||||
|             } | ||||
|             handlers->vertex_handler(output); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| GSUnitState::GSUnitState() : UnitState(&emitter) {} | ||||
|  | ||||
| void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { | ||||
|     emitter.handlers->vertex_handler = std::move(vertex_handler); | ||||
|     emitter.handlers->winding_setter = std::move(winding_setter); | ||||
| } | ||||
|  | ||||
| void GSUnitState::ConfigOutput(const ShaderRegs& config) { | ||||
|     emitter.output_mask = config.output_mask; | ||||
| } | ||||
|  | ||||
| MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||||
|  | ||||
| #ifdef ARCHITECTURE_x86_64 | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
|  | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <functional> | ||||
| #include <type_traits> | ||||
| #include <nihstro/shader_bytecode.h> | ||||
| #include "common/assert.h" | ||||
| @@ -31,6 +32,12 @@ struct AttributeBuffer { | ||||
|     alignas(16) Math::Vec4<float24> attr[16]; | ||||
| }; | ||||
|  | ||||
| /// Handler type for receiving vertex outputs from vertex shader or geometry shader | ||||
| using VertexHandler = std::function<void(const AttributeBuffer&)>; | ||||
|  | ||||
| /// Handler type for signaling to invert the vertex order of the next triangle | ||||
| using WindingSetter = std::function<void()>; | ||||
|  | ||||
| struct OutputVertex { | ||||
|     Math::Vec4<float24> pos; | ||||
|     Math::Vec4<float24> quat; | ||||
| @@ -43,7 +50,8 @@ struct OutputVertex { | ||||
|     INSERT_PADDING_WORDS(1); | ||||
|     Math::Vec2<float24> tc2; | ||||
|  | ||||
|     static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); | ||||
|     static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, | ||||
|                                             const AttributeBuffer& output); | ||||
| }; | ||||
| #define ASSERT_POS(var, pos)                                                                       \ | ||||
|     static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \ | ||||
| @@ -60,6 +68,29 @@ ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U); | ||||
| static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||
| static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); | ||||
|  | ||||
| /** | ||||
|  * This structure contains state information for primitive emitting in geometry shader. | ||||
|  */ | ||||
| struct GSEmitter { | ||||
|     std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer; | ||||
|     u8 vertex_id; | ||||
|     bool prim_emit; | ||||
|     bool winding; | ||||
|     u32 output_mask; | ||||
|  | ||||
|     // Function objects are hidden behind a raw pointer to make the structure standard layout type, | ||||
|     // for JIT to use offsetof to access other members. | ||||
|     struct Handlers { | ||||
|         VertexHandler vertex_handler; | ||||
|         WindingSetter winding_setter; | ||||
|     } * handlers; | ||||
|  | ||||
|     GSEmitter(); | ||||
|     ~GSEmitter(); | ||||
|     void Emit(Math::Vec4<float24> (&vertex)[16]); | ||||
| }; | ||||
| static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type"); | ||||
|  | ||||
| /** | ||||
|  * This structure contains the state information that needs to be unique for a shader unit. The 3DS | ||||
|  * has four shader units that process shaders in parallel. At the present, Citra only implements a | ||||
| @@ -67,6 +98,7 @@ static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has inva | ||||
|  * here will make it easier for us to parallelize the shader processing later. | ||||
|  */ | ||||
| struct UnitState { | ||||
|     explicit UnitState(GSEmitter* emitter = nullptr); | ||||
|     struct Registers { | ||||
|         // The registers are accessed by the shader JIT using SSE instructions, and are therefore | ||||
|         // required to be 16-byte aligned. | ||||
| @@ -82,6 +114,8 @@ struct UnitState { | ||||
|     // TODO: How many bits do these actually have? | ||||
|     s32 address_registers[3]; | ||||
|  | ||||
|     GSEmitter* emitter_ptr; | ||||
|  | ||||
|     static size_t InputOffset(const SourceRegister& reg) { | ||||
|         switch (reg.GetRegisterType()) { | ||||
|         case RegisterType::Input: | ||||
| @@ -125,6 +159,19 @@ struct UnitState { | ||||
|     void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * This is an extended shader unit state that represents the special unit that can run both vertex | ||||
|  * shader and geometry shader. It contains an additional primitive emitter and utilities for | ||||
|  * geometry shader. | ||||
|  */ | ||||
| struct GSUnitState : public UnitState { | ||||
|     GSUnitState(); | ||||
|     void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); | ||||
|     void ConfigOutput(const ShaderRegs& config); | ||||
|  | ||||
|     GSEmitter emitter; | ||||
| }; | ||||
|  | ||||
| struct ShaderSetup { | ||||
|     struct { | ||||
|         // The float uniforms are accessed by the shader JIT using SSE instructions, and are | ||||
|   | ||||
| @@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::EMIT: { | ||||
|                 GSEmitter* emitter = state.emitter_ptr; | ||||
|                 ASSERT_MSG(emitter, "Execute EMIT on VS"); | ||||
|                 emitter->Emit(state.registers.output); | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             case OpCode::Id::SETEMIT: { | ||||
|                 GSEmitter* emitter = state.emitter_ptr; | ||||
|                 ASSERT_MSG(emitter, "Execute SETEMIT on VS"); | ||||
|                 emitter->vertex_id = instr.setemit.vertex_id; | ||||
|                 emitter->prim_emit = instr.setemit.prim_emit != 0; | ||||
|                 emitter->winding = instr.setemit.winding != 0; | ||||
|                 break; | ||||
|             } | ||||
|  | ||||
|             default: | ||||
|                 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||||
|                           (int)instr.opcode.Value().EffectiveOpCode(), | ||||
|   | ||||
| @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { | ||||
|     &JitShader::Compile_IF,    // ifu | ||||
|     &JitShader::Compile_IF,    // ifc | ||||
|     &JitShader::Compile_LOOP,  // loop | ||||
|     nullptr,                   // emit | ||||
|     nullptr,                   // sete | ||||
|     &JitShader::Compile_EMIT,  // emit | ||||
|     &JitShader::Compile_SETE,  // sete | ||||
|     &JitShader::Compile_JMP,   // jmpc | ||||
|     &JitShader::Compile_JMP,   // jmpu | ||||
|     &JitShader::Compile_CMP,   // cmp | ||||
| @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) { | ||||
|     emitter->Emit(*output); | ||||
| } | ||||
|  | ||||
| void JitShader::Compile_EMIT(Instruction instr) { | ||||
|     Label have_emitter, end; | ||||
|     mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||||
|     test(rax, rax); | ||||
|     jnz(have_emitter); | ||||
|  | ||||
|     ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS")); | ||||
|     CallFarFunction(*this, LogCritical); | ||||
|     ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     jmp(end); | ||||
|  | ||||
|     L(have_emitter); | ||||
|     ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     mov(ABI_PARAM1, rax); | ||||
|     mov(ABI_PARAM2, STATE); | ||||
|     add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output))); | ||||
|     CallFarFunction(*this, Emit); | ||||
|     ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     L(end); | ||||
| } | ||||
|  | ||||
| void JitShader::Compile_SETE(Instruction instr) { | ||||
|     Label have_emitter, end; | ||||
|     mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||||
|     test(rax, rax); | ||||
|     jnz(have_emitter); | ||||
|  | ||||
|     ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS")); | ||||
|     CallFarFunction(*this, LogCritical); | ||||
|     ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||
|     jmp(end); | ||||
|  | ||||
|     L(have_emitter); | ||||
|     mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); | ||||
|     mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); | ||||
|     mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); | ||||
|     L(end); | ||||
| } | ||||
|  | ||||
| void JitShader::Compile_Block(unsigned end) { | ||||
|     while (program_counter < end) { | ||||
|         Compile_NextInstr(); | ||||
|   | ||||
| @@ -66,6 +66,8 @@ public: | ||||
|     void Compile_JMP(Instruction instr); | ||||
|     void Compile_CMP(Instruction instr); | ||||
|     void Compile_MAD(Instruction instr); | ||||
|     void Compile_EMIT(Instruction instr); | ||||
|     void Compile_SETE(Instruction instr); | ||||
|  | ||||
| private: | ||||
|     void Compile_Block(unsigned end); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user