Merge pull request #2865 from wwylele/gs++
PICA: implemented geometry shader
This commit is contained in:
		| @@ -1,6 +1,7 @@ | |||||||
| set(SRCS | set(SRCS | ||||||
|             command_processor.cpp |             command_processor.cpp | ||||||
|             debug_utils/debug_utils.cpp |             debug_utils/debug_utils.cpp | ||||||
|  |             geometry_pipeline.cpp | ||||||
|             pica.cpp |             pica.cpp | ||||||
|             primitive_assembly.cpp |             primitive_assembly.cpp | ||||||
|             regs.cpp |             regs.cpp | ||||||
| @@ -29,6 +30,7 @@ set(SRCS | |||||||
| set(HEADERS | set(HEADERS | ||||||
|             command_processor.h |             command_processor.h | ||||||
|             debug_utils/debug_utils.h |             debug_utils/debug_utils.h | ||||||
|  |             geometry_pipeline.h | ||||||
|             gpu_debugger.h |             gpu_debugger.h | ||||||
|             pica.h |             pica.h | ||||||
|             pica_state.h |             pica_state.h | ||||||
|   | |||||||
| @@ -161,6 +161,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|  |  | ||||||
|     case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): |     case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index): | ||||||
|         g_state.immediate.current_attribute = 0; |         g_state.immediate.current_attribute = 0; | ||||||
|  |         g_state.immediate.reset_geometry_pipeline = true; | ||||||
|         default_attr_counter = 0; |         default_attr_counter = 0; | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
| @@ -234,16 +235,14 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|                     shader_engine->Run(g_state.vs, shader_unit); |                     shader_engine->Run(g_state.vs, shader_unit); | ||||||
|                     shader_unit.WriteOutput(regs.vs, output); |                     shader_unit.WriteOutput(regs.vs, output); | ||||||
|  |  | ||||||
|                     // Send to renderer |                     // Send to geometry pipeline | ||||||
|                     using Pica::Shader::OutputVertex; |                     if (g_state.immediate.reset_geometry_pipeline) { | ||||||
|                     auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, |                         g_state.geometry_pipeline.Reconfigure(); | ||||||
|                                           const OutputVertex& v2) { |                         g_state.immediate.reset_geometry_pipeline = false; | ||||||
|                         VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); |                     } | ||||||
|                     }; |                     ASSERT(!g_state.geometry_pipeline.NeedIndexInput()); | ||||||
|  |                     g_state.geometry_pipeline.Setup(shader_engine); | ||||||
|                     g_state.primitive_assembler.SubmitVertex( |                     g_state.geometry_pipeline.SubmitVertex(output); | ||||||
|                         Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output), |  | ||||||
|                         AddTriangle); |  | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @@ -321,8 +320,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|         // The size has been tuned for optimal balance between hit-rate and the cost of lookup |         // The size has been tuned for optimal balance between hit-rate and the cost of lookup | ||||||
|         const size_t VERTEX_CACHE_SIZE = 32; |         const size_t VERTEX_CACHE_SIZE = 32; | ||||||
|         std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; |         std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; | ||||||
|         std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; |         std::array<Shader::AttributeBuffer, VERTEX_CACHE_SIZE> vertex_cache; | ||||||
|         Shader::OutputVertex output_vertex; |         Shader::AttributeBuffer vs_output; | ||||||
|  |  | ||||||
|         unsigned int vertex_cache_pos = 0; |         unsigned int vertex_cache_pos = 0; | ||||||
|         vertex_cache_ids.fill(-1); |         vertex_cache_ids.fill(-1); | ||||||
| @@ -332,6 +331,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|  |  | ||||||
|         shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); |         shader_engine->SetupBatch(g_state.vs, regs.vs.main_offset); | ||||||
|  |  | ||||||
|  |         g_state.geometry_pipeline.Reconfigure(); | ||||||
|  |         g_state.geometry_pipeline.Setup(shader_engine); | ||||||
|  |         if (g_state.geometry_pipeline.NeedIndexInput()) | ||||||
|  |             ASSERT(is_indexed); | ||||||
|  |  | ||||||
|         for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { |         for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { | ||||||
|             // Indexed rendering doesn't use the start offset |             // Indexed rendering doesn't use the start offset | ||||||
|             unsigned int vertex = |             unsigned int vertex = | ||||||
| @@ -345,6 +349,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|             bool vertex_cache_hit = false; |             bool vertex_cache_hit = false; | ||||||
|  |  | ||||||
|             if (is_indexed) { |             if (is_indexed) { | ||||||
|  |                 if (g_state.geometry_pipeline.NeedIndexInput()) { | ||||||
|  |                     g_state.geometry_pipeline.SubmitIndex(vertex); | ||||||
|  |                     continue; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|                 if (g_debug_context && Pica::g_debug_context->recorder) { |                 if (g_debug_context && Pica::g_debug_context->recorder) { | ||||||
|                     int size = index_u16 ? 2 : 1; |                     int size = index_u16 ? 2 : 1; | ||||||
|                     memory_accesses.AddAccess(base_address + index_info.offset + size * index, |                     memory_accesses.AddAccess(base_address + index_info.offset + size * index, | ||||||
| @@ -353,7 +362,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|  |  | ||||||
|                 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { |                 for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||||||
|                     if (vertex == vertex_cache_ids[i]) { |                     if (vertex == vertex_cache_ids[i]) { | ||||||
|                         output_vertex = vertex_cache[i]; |                         vs_output = vertex_cache[i]; | ||||||
|                         vertex_cache_hit = true; |                         vertex_cache_hit = true; | ||||||
|                         break; |                         break; | ||||||
|                     } |                     } | ||||||
| @@ -362,7 +371,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|  |  | ||||||
|             if (!vertex_cache_hit) { |             if (!vertex_cache_hit) { | ||||||
|                 // Initialize data for the current vertex |                 // Initialize data for the current vertex | ||||||
|                 Shader::AttributeBuffer input, output{}; |                 Shader::AttributeBuffer input; | ||||||
|                 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); |                 loader.LoadVertex(base_address, index, vertex, input, memory_accesses); | ||||||
|  |  | ||||||
|                 // Send to vertex shader |                 // Send to vertex shader | ||||||
| @@ -371,26 +380,17 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||||||
|                                              (void*)&input); |                                              (void*)&input); | ||||||
|                 shader_unit.LoadInput(regs.vs, input); |                 shader_unit.LoadInput(regs.vs, input); | ||||||
|                 shader_engine->Run(g_state.vs, shader_unit); |                 shader_engine->Run(g_state.vs, shader_unit); | ||||||
|                 shader_unit.WriteOutput(regs.vs, output); |                 shader_unit.WriteOutput(regs.vs, vs_output); | ||||||
|  |  | ||||||
|                 // Retrieve vertex from register data |  | ||||||
|                 output_vertex = Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, output); |  | ||||||
|  |  | ||||||
|                 if (is_indexed) { |                 if (is_indexed) { | ||||||
|                     vertex_cache[vertex_cache_pos] = output_vertex; |                     vertex_cache[vertex_cache_pos] = vs_output; | ||||||
|                     vertex_cache_ids[vertex_cache_pos] = vertex; |                     vertex_cache_ids[vertex_cache_pos] = vertex; | ||||||
|                     vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; |                     vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             // Send to renderer |             // Send to geometry pipeline | ||||||
|             using Pica::Shader::OutputVertex; |             g_state.geometry_pipeline.SubmitVertex(vs_output); | ||||||
|             auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, |  | ||||||
|                                   const OutputVertex& v2) { |  | ||||||
|                 VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); |  | ||||||
|             }; |  | ||||||
|  |  | ||||||
|             primitive_assembler.SubmitVertex(output_vertex, AddTriangle); |  | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         for (auto& range : memory_accesses.ranges) { |         for (auto& range : memory_accesses.ranges) { | ||||||
|   | |||||||
							
								
								
									
										274
									
								
								src/video_core/geometry_pipeline.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										274
									
								
								src/video_core/geometry_pipeline.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,274 @@ | |||||||
|  | // Copyright 2017 Citra Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #include "video_core/geometry_pipeline.h" | ||||||
|  | #include "video_core/pica_state.h" | ||||||
|  | #include "video_core/regs.h" | ||||||
|  | #include "video_core/renderer_base.h" | ||||||
|  | #include "video_core/video_core.h" | ||||||
|  |  | ||||||
|  | namespace Pica { | ||||||
|  |  | ||||||
|  | /// An attribute buffering interface for different pipeline modes | ||||||
|  | class GeometryPipelineBackend { | ||||||
|  | public: | ||||||
|  |     virtual ~GeometryPipelineBackend() = default; | ||||||
|  |  | ||||||
|  |     /// Checks if there is no incomplete data transfer | ||||||
|  |     virtual bool IsEmpty() const = 0; | ||||||
|  |  | ||||||
|  |     /// Checks if the pipeline needs a direct input from index buffer | ||||||
|  |     virtual bool NeedIndexInput() const = 0; | ||||||
|  |  | ||||||
|  |     /// Submits an index from index buffer | ||||||
|  |     virtual void SubmitIndex(unsigned int val) = 0; | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Submits vertex attributes | ||||||
|  |      * @param input attributes of a vertex output from vertex shader | ||||||
|  |      * @return if the buffer is full and the geometry shader should be invoked | ||||||
|  |      */ | ||||||
|  |     virtual bool SubmitVertex(const Shader::AttributeBuffer& input) = 0; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // In the Point mode, vertex attributes are sent to the input registers in the geometry shader unit. | ||||||
|  | // The size of vertex shader outputs and geometry shader inputs are constants. Geometry shader is | ||||||
|  | // invoked upon inputs buffer filled up by vertex shader outputs. For example, if we have a geometry | ||||||
|  | // shader that takes 6 inputs, and the vertex shader outputs 2 attributes, it would take 3 vertices | ||||||
|  | // for one geometry shader invocation. | ||||||
|  | // TODO: what happens when the input size is not divisible by the output size? | ||||||
|  | class GeometryPipeline_Point : public GeometryPipelineBackend { | ||||||
|  | public: | ||||||
|  |     GeometryPipeline_Point(const Regs& regs, Shader::GSUnitState& unit) : regs(regs), unit(unit) { | ||||||
|  |         ASSERT(regs.pipeline.variable_primitive == 0); | ||||||
|  |         ASSERT(regs.gs.input_to_uniform == 0); | ||||||
|  |         vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||||||
|  |         size_t gs_input_num = regs.gs.max_input_attribute_index + 1; | ||||||
|  |         ASSERT(gs_input_num % vs_output_num == 0); | ||||||
|  |         buffer_cur = attribute_buffer.attr; | ||||||
|  |         buffer_end = attribute_buffer.attr + gs_input_num; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool IsEmpty() const override { | ||||||
|  |         return buffer_cur == attribute_buffer.attr; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool NeedIndexInput() const override { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void SubmitIndex(unsigned int val) override { | ||||||
|  |         UNREACHABLE(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||||||
|  |         buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||||||
|  |         if (buffer_cur == buffer_end) { | ||||||
|  |             buffer_cur = attribute_buffer.attr; | ||||||
|  |             unit.LoadInput(regs.gs, attribute_buffer); | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     const Regs& regs; | ||||||
|  |     Shader::GSUnitState& unit; | ||||||
|  |     Shader::AttributeBuffer attribute_buffer; | ||||||
|  |     Math::Vec4<float24>* buffer_cur; | ||||||
|  |     Math::Vec4<float24>* buffer_end; | ||||||
|  |     unsigned int vs_output_num; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // In VariablePrimitive mode, vertex attributes are buffered into the uniform registers in the | ||||||
|  | // geometry shader unit. The number of vertex is variable, which is specified by the first index | ||||||
|  | // value in the batch. This mode is usually used for subdivision. | ||||||
|  | class GeometryPipeline_VariablePrimitive : public GeometryPipelineBackend { | ||||||
|  | public: | ||||||
|  |     GeometryPipeline_VariablePrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||||||
|  |         : regs(regs), setup(setup) { | ||||||
|  |         ASSERT(regs.pipeline.variable_primitive == 1); | ||||||
|  |         ASSERT(regs.gs.input_to_uniform == 1); | ||||||
|  |         vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool IsEmpty() const override { | ||||||
|  |         return need_index; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool NeedIndexInput() const override { | ||||||
|  |         return need_index; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void SubmitIndex(unsigned int val) override { | ||||||
|  |         DEBUG_ASSERT(need_index); | ||||||
|  |  | ||||||
|  |         // The number of vertex input is put to the uniform register | ||||||
|  |         float24 vertex_num = float24::FromFloat32(val); | ||||||
|  |         setup.uniforms.f[0] = Math::MakeVec(vertex_num, vertex_num, vertex_num, vertex_num); | ||||||
|  |  | ||||||
|  |         // The second uniform register and so on are used for receiving input vertices | ||||||
|  |         buffer_cur = setup.uniforms.f + 1; | ||||||
|  |  | ||||||
|  |         main_vertex_num = regs.pipeline.variable_vertex_main_num_minus_1 + 1; | ||||||
|  |         total_vertex_num = val; | ||||||
|  |         need_index = false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||||||
|  |         DEBUG_ASSERT(!need_index); | ||||||
|  |         if (main_vertex_num != 0) { | ||||||
|  |             // For main vertices, receive all attributes | ||||||
|  |             buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||||||
|  |             --main_vertex_num; | ||||||
|  |         } else { | ||||||
|  |             // For other vertices, only receive the first attribute (usually the position) | ||||||
|  |             *(buffer_cur++) = input.attr[0]; | ||||||
|  |         } | ||||||
|  |         --total_vertex_num; | ||||||
|  |  | ||||||
|  |         if (total_vertex_num == 0) { | ||||||
|  |             need_index = true; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     bool need_index = true; | ||||||
|  |     const Regs& regs; | ||||||
|  |     Shader::ShaderSetup& setup; | ||||||
|  |     unsigned int main_vertex_num; | ||||||
|  |     unsigned int total_vertex_num; | ||||||
|  |     Math::Vec4<float24>* buffer_cur; | ||||||
|  |     unsigned int vs_output_num; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | // In FixedPrimitive mode, vertex attributes are buffered into the uniform registers in the geometry | ||||||
|  | // shader unit. The number of vertex per shader invocation is constant. This is usually used for | ||||||
|  | // particle system. | ||||||
|  | class GeometryPipeline_FixedPrimitive : public GeometryPipelineBackend { | ||||||
|  | public: | ||||||
|  |     GeometryPipeline_FixedPrimitive(const Regs& regs, Shader::ShaderSetup& setup) | ||||||
|  |         : regs(regs), setup(setup) { | ||||||
|  |         ASSERT(regs.pipeline.variable_primitive == 0); | ||||||
|  |         ASSERT(regs.gs.input_to_uniform == 1); | ||||||
|  |         vs_output_num = regs.pipeline.vs_outmap_total_minus_1_a + 1; | ||||||
|  |         ASSERT(vs_output_num == regs.pipeline.gs_config.stride_minus_1 + 1); | ||||||
|  |         size_t vertex_num = regs.pipeline.gs_config.fixed_vertex_num_minus_1 + 1; | ||||||
|  |         buffer_cur = buffer_begin = setup.uniforms.f + regs.pipeline.gs_config.start_index; | ||||||
|  |         buffer_end = buffer_begin + vs_output_num * vertex_num; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool IsEmpty() const override { | ||||||
|  |         return buffer_cur == buffer_begin; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool NeedIndexInput() const override { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     void SubmitIndex(unsigned int val) override { | ||||||
|  |         UNREACHABLE(); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     bool SubmitVertex(const Shader::AttributeBuffer& input) override { | ||||||
|  |         buffer_cur = std::copy(input.attr, input.attr + vs_output_num, buffer_cur); | ||||||
|  |         if (buffer_cur == buffer_end) { | ||||||
|  |             buffer_cur = buffer_begin; | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     const Regs& regs; | ||||||
|  |     Shader::ShaderSetup& setup; | ||||||
|  |     Math::Vec4<float24>* buffer_begin; | ||||||
|  |     Math::Vec4<float24>* buffer_cur; | ||||||
|  |     Math::Vec4<float24>* buffer_end; | ||||||
|  |     unsigned int vs_output_num; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | GeometryPipeline::GeometryPipeline(State& state) : state(state) {} | ||||||
|  |  | ||||||
|  | GeometryPipeline::~GeometryPipeline() = default; | ||||||
|  |  | ||||||
|  | void GeometryPipeline::SetVertexHandler(Shader::VertexHandler vertex_handler) { | ||||||
|  |     this->vertex_handler = vertex_handler; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void GeometryPipeline::Setup(Shader::ShaderEngine* shader_engine) { | ||||||
|  |     if (!backend) | ||||||
|  |         return; | ||||||
|  |  | ||||||
|  |     this->shader_engine = shader_engine; | ||||||
|  |     shader_engine->SetupBatch(state.gs, state.regs.gs.main_offset); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void GeometryPipeline::Reconfigure() { | ||||||
|  |     ASSERT(!backend || backend->IsEmpty()); | ||||||
|  |  | ||||||
|  |     if (state.regs.pipeline.use_gs == PipelineRegs::UseGS::No) { | ||||||
|  |         backend = nullptr; | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     ASSERT(state.regs.pipeline.use_gs == PipelineRegs::UseGS::Yes); | ||||||
|  |  | ||||||
|  |     // The following assumes that when geometry shader is in use, the shader unit 3 is configured as | ||||||
|  |     // a geometry shader unit. | ||||||
|  |     // TODO: what happens if this is not true? | ||||||
|  |     ASSERT(state.regs.pipeline.gs_unit_exclusive_configuration == 1); | ||||||
|  |     ASSERT(state.regs.gs.shader_mode == ShaderRegs::ShaderMode::GS); | ||||||
|  |  | ||||||
|  |     state.gs_unit.ConfigOutput(state.regs.gs); | ||||||
|  |  | ||||||
|  |     ASSERT(state.regs.pipeline.vs_outmap_total_minus_1_a == | ||||||
|  |            state.regs.pipeline.vs_outmap_total_minus_1_b); | ||||||
|  |  | ||||||
|  |     switch (state.regs.pipeline.gs_config.mode) { | ||||||
|  |     case PipelineRegs::GSMode::Point: | ||||||
|  |         backend = std::make_unique<GeometryPipeline_Point>(state.regs, state.gs_unit); | ||||||
|  |         break; | ||||||
|  |     case PipelineRegs::GSMode::VariablePrimitive: | ||||||
|  |         backend = std::make_unique<GeometryPipeline_VariablePrimitive>(state.regs, state.gs); | ||||||
|  |         break; | ||||||
|  |     case PipelineRegs::GSMode::FixedPrimitive: | ||||||
|  |         backend = std::make_unique<GeometryPipeline_FixedPrimitive>(state.regs, state.gs); | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         UNREACHABLE(); | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | bool GeometryPipeline::NeedIndexInput() const { | ||||||
|  |     if (!backend) | ||||||
|  |         return false; | ||||||
|  |     return backend->NeedIndexInput(); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void GeometryPipeline::SubmitIndex(unsigned int val) { | ||||||
|  |     backend->SubmitIndex(val); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void GeometryPipeline::SubmitVertex(const Shader::AttributeBuffer& input) { | ||||||
|  |     if (!backend) { | ||||||
|  |         // No backend means the geometry shader is disabled, so we send the vertex shader output | ||||||
|  |         // directly to the primitive assembler. | ||||||
|  |         vertex_handler(input); | ||||||
|  |     } else { | ||||||
|  |         if (backend->SubmitVertex(input)) { | ||||||
|  |             shader_engine->Run(state.gs, state.gs_unit); | ||||||
|  |  | ||||||
|  |             // The uniform b15 is set to true after every geometry shader invocation. This is useful | ||||||
|  |             // for the shader to know if this is the first invocation in a batch, if the program set | ||||||
|  |             // b15 to false first. | ||||||
|  |             state.gs.uniforms.b[15] = true; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | } // namespace Pica | ||||||
							
								
								
									
										49
									
								
								src/video_core/geometry_pipeline.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								src/video_core/geometry_pipeline.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,49 @@ | |||||||
|  | // Copyright 2017 Citra Emulator Project | ||||||
|  | // Licensed under GPLv2 or any later version | ||||||
|  | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
|  | #pragma once | ||||||
|  |  | ||||||
|  | #include <memory> | ||||||
|  | #include "video_core/shader/shader.h" | ||||||
|  |  | ||||||
|  | namespace Pica { | ||||||
|  |  | ||||||
|  | struct State; | ||||||
|  |  | ||||||
|  | class GeometryPipelineBackend; | ||||||
|  |  | ||||||
|  | /// A pipeline receiving from vertex shader and sending to geometry shader and primitive assembler | ||||||
|  | class GeometryPipeline { | ||||||
|  | public: | ||||||
|  |     explicit GeometryPipeline(State& state); | ||||||
|  |     ~GeometryPipeline(); | ||||||
|  |  | ||||||
|  |     /// Sets the handler for receiving vertex outputs from vertex shader | ||||||
|  |     void SetVertexHandler(Shader::VertexHandler vertex_handler); | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Setup the geometry shader unit if it is in use | ||||||
|  |      * @param shader_engine the shader engine for the geometry shader to run | ||||||
|  |      */ | ||||||
|  |     void Setup(Shader::ShaderEngine* shader_engine); | ||||||
|  |  | ||||||
|  |     /// Reconfigures the pipeline according to current register settings | ||||||
|  |     void Reconfigure(); | ||||||
|  |  | ||||||
|  |     /// Checks if the pipeline needs a direct input from index buffer | ||||||
|  |     bool NeedIndexInput() const; | ||||||
|  |  | ||||||
|  |     /// Submits an index from index buffer. Call this only when NeedIndexInput returns true | ||||||
|  |     void SubmitIndex(unsigned int val); | ||||||
|  |  | ||||||
|  |     /// Submits vertex attributes output from vertex shader | ||||||
|  |     void SubmitVertex(const Shader::AttributeBuffer& input); | ||||||
|  |  | ||||||
|  | private: | ||||||
|  |     Shader::VertexHandler vertex_handler; | ||||||
|  |     Shader::ShaderEngine* shader_engine; | ||||||
|  |     std::unique_ptr<GeometryPipelineBackend> backend; | ||||||
|  |     State& state; | ||||||
|  | }; | ||||||
|  | } // namespace Pica | ||||||
| @@ -3,9 +3,11 @@ | |||||||
| // Refer to the license.txt file included. | // Refer to the license.txt file included. | ||||||
|  |  | ||||||
| #include <cstring> | #include <cstring> | ||||||
|  | #include "video_core/geometry_pipeline.h" | ||||||
| #include "video_core/pica.h" | #include "video_core/pica.h" | ||||||
| #include "video_core/pica_state.h" | #include "video_core/pica_state.h" | ||||||
| #include "video_core/regs_pipeline.h" | #include "video_core/renderer_base.h" | ||||||
|  | #include "video_core/video_core.h" | ||||||
|  |  | ||||||
| namespace Pica { | namespace Pica { | ||||||
|  |  | ||||||
| @@ -24,6 +26,23 @@ void Zero(T& o) { | |||||||
|     memset(&o, 0, sizeof(o)); |     memset(&o, 0, sizeof(o)); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | State::State() : geometry_pipeline(*this) { | ||||||
|  |     auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) { | ||||||
|  |         using Pica::Shader::OutputVertex; | ||||||
|  |         auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1, | ||||||
|  |                                   const OutputVertex& v2) { | ||||||
|  |             VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); | ||||||
|  |         }; | ||||||
|  |         primitive_assembler.SubmitVertex( | ||||||
|  |             Shader::OutputVertex::FromAttributeBuffer(regs.rasterizer, vertex), AddTriangle); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     auto SetWinding = [this]() { primitive_assembler.SetWinding(); }; | ||||||
|  |  | ||||||
|  |     g_state.gs_unit.SetVertexHandler(SubmitVertex, SetWinding); | ||||||
|  |     g_state.geometry_pipeline.SetVertexHandler(SubmitVertex); | ||||||
|  | } | ||||||
|  |  | ||||||
| void State::Reset() { | void State::Reset() { | ||||||
|     Zero(regs); |     Zero(regs); | ||||||
|     Zero(vs); |     Zero(vs); | ||||||
|   | |||||||
| @@ -8,6 +8,7 @@ | |||||||
| #include "common/bit_field.h" | #include "common/bit_field.h" | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/vector_math.h" | #include "common/vector_math.h" | ||||||
|  | #include "video_core/geometry_pipeline.h" | ||||||
| #include "video_core/primitive_assembly.h" | #include "video_core/primitive_assembly.h" | ||||||
| #include "video_core/regs.h" | #include "video_core/regs.h" | ||||||
| #include "video_core/shader/shader.h" | #include "video_core/shader/shader.h" | ||||||
| @@ -16,6 +17,7 @@ namespace Pica { | |||||||
|  |  | ||||||
| /// Struct used to describe current Pica state | /// Struct used to describe current Pica state | ||||||
| struct State { | struct State { | ||||||
|  |     State(); | ||||||
|     void Reset(); |     void Reset(); | ||||||
|  |  | ||||||
|     /// Pica registers |     /// Pica registers | ||||||
| @@ -137,8 +139,17 @@ struct State { | |||||||
|         Shader::AttributeBuffer input_vertex; |         Shader::AttributeBuffer input_vertex; | ||||||
|         // Index of the next attribute to be loaded into `input_vertex`. |         // Index of the next attribute to be loaded into `input_vertex`. | ||||||
|         u32 current_attribute = 0; |         u32 current_attribute = 0; | ||||||
|  |         // Indicates the immediate mode just started and the geometry pipeline needs to reconfigure | ||||||
|  |         bool reset_geometry_pipeline = true; | ||||||
|     } immediate; |     } immediate; | ||||||
|  |  | ||||||
|  |     // the geometry shader needs to be kept in the global state because some shaders relie on | ||||||
|  |     // preserved register value across shader invocation. | ||||||
|  |     // TODO: also bring the three vertex shader units here and implement the shader scheduler. | ||||||
|  |     Shader::GSUnitState gs_unit; | ||||||
|  |  | ||||||
|  |     GeometryPipeline geometry_pipeline; | ||||||
|  |  | ||||||
|     // This is constructed with a dummy triangle topology |     // This is constructed with a dummy triangle topology | ||||||
|     PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; |     PrimitiveAssembler<Shader::OutputVertex> primitive_assembler; | ||||||
| }; | }; | ||||||
|   | |||||||
| @@ -17,16 +17,19 @@ template <typename VertexType> | |||||||
| void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | ||||||
|                                                   TriangleHandler triangle_handler) { |                                                   TriangleHandler triangle_handler) { | ||||||
|     switch (topology) { |     switch (topology) { | ||||||
|     // TODO: Figure out what's different with TriangleTopology::Shader. |  | ||||||
|     case PipelineRegs::TriangleTopology::List: |     case PipelineRegs::TriangleTopology::List: | ||||||
|     case PipelineRegs::TriangleTopology::Shader: |     case PipelineRegs::TriangleTopology::Shader: | ||||||
|         if (buffer_index < 2) { |         if (buffer_index < 2) { | ||||||
|             buffer[buffer_index++] = vtx; |             buffer[buffer_index++] = vtx; | ||||||
|         } else { |         } else { | ||||||
|             buffer_index = 0; |             buffer_index = 0; | ||||||
|  |             if (topology == PipelineRegs::TriangleTopology::Shader && winding) { | ||||||
|  |                 triangle_handler(buffer[1], buffer[0], vtx); | ||||||
|  |                 winding = false; | ||||||
|  |             } else { | ||||||
|                 triangle_handler(buffer[0], buffer[1], vtx); |                 triangle_handler(buffer[0], buffer[1], vtx); | ||||||
|             } |             } | ||||||
|  |         } | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|     case PipelineRegs::TriangleTopology::Strip: |     case PipelineRegs::TriangleTopology::Strip: | ||||||
| @@ -50,10 +53,16 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(const VertexType& vtx, | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | template <typename VertexType> | ||||||
|  | void PrimitiveAssembler<VertexType>::SetWinding() { | ||||||
|  |     winding = true; | ||||||
|  | } | ||||||
|  |  | ||||||
| template <typename VertexType> | template <typename VertexType> | ||||||
| void PrimitiveAssembler<VertexType>::Reset() { | void PrimitiveAssembler<VertexType>::Reset() { | ||||||
|     buffer_index = 0; |     buffer_index = 0; | ||||||
|     strip_ready = false; |     strip_ready = false; | ||||||
|  |     winding = false; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename VertexType> | template <typename VertexType> | ||||||
|   | |||||||
| @@ -29,6 +29,12 @@ struct PrimitiveAssembler { | |||||||
|      */ |      */ | ||||||
|     void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); |     void SubmitVertex(const VertexType& vtx, TriangleHandler triangle_handler); | ||||||
|  |  | ||||||
|  |     /** | ||||||
|  |      * Invert the vertex order of the next triangle. Called by geometry shader emitter. | ||||||
|  |      * This only takes effect for TriangleTopology::Shader. | ||||||
|  |      */ | ||||||
|  |     void SetWinding(); | ||||||
|  |  | ||||||
|     /** |     /** | ||||||
|      * Resets the internal state of the PrimitiveAssembler. |      * Resets the internal state of the PrimitiveAssembler. | ||||||
|      */ |      */ | ||||||
| @@ -45,6 +51,7 @@ private: | |||||||
|     int buffer_index; |     int buffer_index; | ||||||
|     VertexType buffer[2]; |     VertexType buffer[2]; | ||||||
|     bool strip_ready = false; |     bool strip_ready = false; | ||||||
|  |     bool winding = false; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|   | |||||||
| @@ -147,7 +147,15 @@ struct PipelineRegs { | |||||||
|     // Number of vertices to render |     // Number of vertices to render | ||||||
|     u32 num_vertices; |     u32 num_vertices; | ||||||
|  |  | ||||||
|     INSERT_PADDING_WORDS(0x1); |     enum class UseGS : u32 { | ||||||
|  |         No = 0, | ||||||
|  |         Yes = 2, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         BitField<0, 2, UseGS> use_gs; | ||||||
|  |         BitField<31, 1, u32> variable_primitive; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     // The index of the first vertex to render |     // The index of the first vertex to render | ||||||
|     u32 vertex_offset; |     u32 vertex_offset; | ||||||
| @@ -218,7 +226,29 @@ struct PipelineRegs { | |||||||
|  |  | ||||||
|     GPUMode gpu_mode; |     GPUMode gpu_mode; | ||||||
|  |  | ||||||
|     INSERT_PADDING_WORDS(0x18); |     INSERT_PADDING_WORDS(0x4); | ||||||
|  |     BitField<0, 4, u32> vs_outmap_total_minus_1_a; | ||||||
|  |     INSERT_PADDING_WORDS(0x6); | ||||||
|  |     BitField<0, 4, u32> vs_outmap_total_minus_1_b; | ||||||
|  |  | ||||||
|  |     enum class GSMode : u32 { | ||||||
|  |         Point = 0, | ||||||
|  |         VariablePrimitive = 1, | ||||||
|  |         FixedPrimitive = 2, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         BitField<0, 8, GSMode> mode; | ||||||
|  |         BitField<8, 4, u32> fixed_vertex_num_minus_1; | ||||||
|  |         BitField<12, 4, u32> stride_minus_1; | ||||||
|  |         BitField<16, 4, u32> start_index; | ||||||
|  |     } gs_config; | ||||||
|  |  | ||||||
|  |     INSERT_PADDING_WORDS(0x1); | ||||||
|  |  | ||||||
|  |     u32 variable_vertex_main_num_minus_1; | ||||||
|  |  | ||||||
|  |     INSERT_PADDING_WORDS(0x9); | ||||||
|  |  | ||||||
|     enum class TriangleTopology : u32 { |     enum class TriangleTopology : u32 { | ||||||
|         List = 0, |         List = 0, | ||||||
|   | |||||||
| @@ -24,9 +24,16 @@ struct ShaderRegs { | |||||||
|  |  | ||||||
|     INSERT_PADDING_WORDS(0x4); |     INSERT_PADDING_WORDS(0x4); | ||||||
|  |  | ||||||
|  |     enum ShaderMode { | ||||||
|  |         GS = 0x08, | ||||||
|  |         VS = 0xA0, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     union { |     union { | ||||||
|         // Number of input attributes to shader unit - 1 |         // Number of input attributes to shader unit - 1 | ||||||
|         BitField<0, 4, u32> max_input_attribute_index; |         BitField<0, 4, u32> max_input_attribute_index; | ||||||
|  |         BitField<8, 8, u32> input_to_uniform; | ||||||
|  |         BitField<24, 8, ShaderMode> shader_mode; | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // Offset to shader program entry point (in words) |     // Offset to shader program entry point (in words) | ||||||
|   | |||||||
| @@ -21,7 +21,8 @@ namespace Pica { | |||||||
|  |  | ||||||
| namespace Shader { | namespace Shader { | ||||||
|  |  | ||||||
| OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& input) { | OutputVertex OutputVertex::FromAttributeBuffer(const RasterizerRegs& regs, | ||||||
|  |                                                const AttributeBuffer& input) { | ||||||
|     // Setup output data |     // Setup output data | ||||||
|     union { |     union { | ||||||
|         OutputVertex ret{}; |         OutputVertex ret{}; | ||||||
| @@ -82,6 +83,44 @@ void UnitState::WriteOutput(const ShaderRegs& config, AttributeBuffer& output) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | UnitState::UnitState(GSEmitter* emitter) : emitter_ptr(emitter) {} | ||||||
|  |  | ||||||
|  | GSEmitter::GSEmitter() { | ||||||
|  |     handlers = new Handlers; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | GSEmitter::~GSEmitter() { | ||||||
|  |     delete handlers; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void GSEmitter::Emit(Math::Vec4<float24> (&vertex)[16]) { | ||||||
|  |     ASSERT(vertex_id < 3); | ||||||
|  |     std::copy(std::begin(vertex), std::end(vertex), buffer[vertex_id].begin()); | ||||||
|  |     if (prim_emit) { | ||||||
|  |         if (winding) | ||||||
|  |             handlers->winding_setter(); | ||||||
|  |         for (size_t i = 0; i < buffer.size(); ++i) { | ||||||
|  |             AttributeBuffer output; | ||||||
|  |             unsigned int output_i = 0; | ||||||
|  |             for (unsigned int reg : Common::BitSet<u32>(output_mask)) { | ||||||
|  |                 output.attr[output_i++] = buffer[i][reg]; | ||||||
|  |             } | ||||||
|  |             handlers->vertex_handler(output); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | GSUnitState::GSUnitState() : UnitState(&emitter) {} | ||||||
|  |  | ||||||
|  | void GSUnitState::SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter) { | ||||||
|  |     emitter.handlers->vertex_handler = std::move(vertex_handler); | ||||||
|  |     emitter.handlers->winding_setter = std::move(winding_setter); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void GSUnitState::ConfigOutput(const ShaderRegs& config) { | ||||||
|  |     emitter.output_mask = config.output_mask; | ||||||
|  | } | ||||||
|  |  | ||||||
| MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); | ||||||
|  |  | ||||||
| #ifdef ARCHITECTURE_x86_64 | #ifdef ARCHITECTURE_x86_64 | ||||||
|   | |||||||
| @@ -6,6 +6,7 @@ | |||||||
|  |  | ||||||
| #include <array> | #include <array> | ||||||
| #include <cstddef> | #include <cstddef> | ||||||
|  | #include <functional> | ||||||
| #include <type_traits> | #include <type_traits> | ||||||
| #include <nihstro/shader_bytecode.h> | #include <nihstro/shader_bytecode.h> | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| @@ -31,6 +32,12 @@ struct AttributeBuffer { | |||||||
|     alignas(16) Math::Vec4<float24> attr[16]; |     alignas(16) Math::Vec4<float24> attr[16]; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | /// Handler type for receiving vertex outputs from vertex shader or geometry shader | ||||||
|  | using VertexHandler = std::function<void(const AttributeBuffer&)>; | ||||||
|  |  | ||||||
|  | /// Handler type for signaling to invert the vertex order of the next triangle | ||||||
|  | using WindingSetter = std::function<void()>; | ||||||
|  |  | ||||||
| struct OutputVertex { | struct OutputVertex { | ||||||
|     Math::Vec4<float24> pos; |     Math::Vec4<float24> pos; | ||||||
|     Math::Vec4<float24> quat; |     Math::Vec4<float24> quat; | ||||||
| @@ -43,7 +50,8 @@ struct OutputVertex { | |||||||
|     INSERT_PADDING_WORDS(1); |     INSERT_PADDING_WORDS(1); | ||||||
|     Math::Vec2<float24> tc2; |     Math::Vec2<float24> tc2; | ||||||
|  |  | ||||||
|     static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, AttributeBuffer& output); |     static OutputVertex FromAttributeBuffer(const RasterizerRegs& regs, | ||||||
|  |                                             const AttributeBuffer& output); | ||||||
| }; | }; | ||||||
| #define ASSERT_POS(var, pos)                                                                       \ | #define ASSERT_POS(var, pos)                                                                       \ | ||||||
|     static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \ |     static_assert(offsetof(OutputVertex, var) == pos * sizeof(float24), "Semantic at wrong "       \ | ||||||
| @@ -60,6 +68,29 @@ ASSERT_POS(tc2, RasterizerRegs::VSOutputAttributes::TEXCOORD2_U); | |||||||
| static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); | ||||||
| static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); | static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has invalid size"); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * This structure contains state information for primitive emitting in geometry shader. | ||||||
|  |  */ | ||||||
|  | struct GSEmitter { | ||||||
|  |     std::array<std::array<Math::Vec4<float24>, 16>, 3> buffer; | ||||||
|  |     u8 vertex_id; | ||||||
|  |     bool prim_emit; | ||||||
|  |     bool winding; | ||||||
|  |     u32 output_mask; | ||||||
|  |  | ||||||
|  |     // Function objects are hidden behind a raw pointer to make the structure standard layout type, | ||||||
|  |     // for JIT to use offsetof to access other members. | ||||||
|  |     struct Handlers { | ||||||
|  |         VertexHandler vertex_handler; | ||||||
|  |         WindingSetter winding_setter; | ||||||
|  |     } * handlers; | ||||||
|  |  | ||||||
|  |     GSEmitter(); | ||||||
|  |     ~GSEmitter(); | ||||||
|  |     void Emit(Math::Vec4<float24> (&vertex)[16]); | ||||||
|  | }; | ||||||
|  | static_assert(std::is_standard_layout<GSEmitter>::value, "GSEmitter is not standard layout type"); | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * This structure contains the state information that needs to be unique for a shader unit. The 3DS |  * This structure contains the state information that needs to be unique for a shader unit. The 3DS | ||||||
|  * has four shader units that process shaders in parallel. At the present, Citra only implements a |  * has four shader units that process shaders in parallel. At the present, Citra only implements a | ||||||
| @@ -67,6 +98,7 @@ static_assert(sizeof(OutputVertex) == 24 * sizeof(float), "OutputVertex has inva | |||||||
|  * here will make it easier for us to parallelize the shader processing later. |  * here will make it easier for us to parallelize the shader processing later. | ||||||
|  */ |  */ | ||||||
| struct UnitState { | struct UnitState { | ||||||
|  |     explicit UnitState(GSEmitter* emitter = nullptr); | ||||||
|     struct Registers { |     struct Registers { | ||||||
|         // The registers are accessed by the shader JIT using SSE instructions, and are therefore |         // The registers are accessed by the shader JIT using SSE instructions, and are therefore | ||||||
|         // required to be 16-byte aligned. |         // required to be 16-byte aligned. | ||||||
| @@ -82,6 +114,8 @@ struct UnitState { | |||||||
|     // TODO: How many bits do these actually have? |     // TODO: How many bits do these actually have? | ||||||
|     s32 address_registers[3]; |     s32 address_registers[3]; | ||||||
|  |  | ||||||
|  |     GSEmitter* emitter_ptr; | ||||||
|  |  | ||||||
|     static size_t InputOffset(const SourceRegister& reg) { |     static size_t InputOffset(const SourceRegister& reg) { | ||||||
|         switch (reg.GetRegisterType()) { |         switch (reg.GetRegisterType()) { | ||||||
|         case RegisterType::Input: |         case RegisterType::Input: | ||||||
| @@ -125,6 +159,19 @@ struct UnitState { | |||||||
|     void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); |     void WriteOutput(const ShaderRegs& config, AttributeBuffer& output); | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * This is an extended shader unit state that represents the special unit that can run both vertex | ||||||
|  |  * shader and geometry shader. It contains an additional primitive emitter and utilities for | ||||||
|  |  * geometry shader. | ||||||
|  |  */ | ||||||
|  | struct GSUnitState : public UnitState { | ||||||
|  |     GSUnitState(); | ||||||
|  |     void SetVertexHandler(VertexHandler vertex_handler, WindingSetter winding_setter); | ||||||
|  |     void ConfigOutput(const ShaderRegs& config); | ||||||
|  |  | ||||||
|  |     GSEmitter emitter; | ||||||
|  | }; | ||||||
|  |  | ||||||
| struct ShaderSetup { | struct ShaderSetup { | ||||||
|     struct { |     struct { | ||||||
|         // The float uniforms are accessed by the shader JIT using SSE instructions, and are |         // The float uniforms are accessed by the shader JIT using SSE instructions, and are | ||||||
|   | |||||||
| @@ -636,6 +636,22 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData | |||||||
|                 break; |                 break; | ||||||
|             } |             } | ||||||
|  |  | ||||||
|  |             case OpCode::Id::EMIT: { | ||||||
|  |                 GSEmitter* emitter = state.emitter_ptr; | ||||||
|  |                 ASSERT_MSG(emitter, "Execute EMIT on VS"); | ||||||
|  |                 emitter->Emit(state.registers.output); | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             case OpCode::Id::SETEMIT: { | ||||||
|  |                 GSEmitter* emitter = state.emitter_ptr; | ||||||
|  |                 ASSERT_MSG(emitter, "Execute SETEMIT on VS"); | ||||||
|  |                 emitter->vertex_id = instr.setemit.vertex_id; | ||||||
|  |                 emitter->prim_emit = instr.setemit.prim_emit != 0; | ||||||
|  |                 emitter->winding = instr.setemit.winding != 0; | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |  | ||||||
|             default: |             default: | ||||||
|                 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", |                 LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", | ||||||
|                           (int)instr.opcode.Value().EffectiveOpCode(), |                           (int)instr.opcode.Value().EffectiveOpCode(), | ||||||
|   | |||||||
| @@ -75,8 +75,8 @@ const JitFunction instr_table[64] = { | |||||||
|     &JitShader::Compile_IF,    // ifu |     &JitShader::Compile_IF,    // ifu | ||||||
|     &JitShader::Compile_IF,    // ifc |     &JitShader::Compile_IF,    // ifc | ||||||
|     &JitShader::Compile_LOOP,  // loop |     &JitShader::Compile_LOOP,  // loop | ||||||
|     nullptr,                   // emit |     &JitShader::Compile_EMIT,  // emit | ||||||
|     nullptr,                   // sete |     &JitShader::Compile_SETE,  // sete | ||||||
|     &JitShader::Compile_JMP,   // jmpc |     &JitShader::Compile_JMP,   // jmpc | ||||||
|     &JitShader::Compile_JMP,   // jmpu |     &JitShader::Compile_JMP,   // jmpu | ||||||
|     &JitShader::Compile_CMP,   // cmp |     &JitShader::Compile_CMP,   // cmp | ||||||
| @@ -772,6 +772,51 @@ void JitShader::Compile_JMP(Instruction instr) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | static void Emit(GSEmitter* emitter, Math::Vec4<float24> (*output)[16]) { | ||||||
|  |     emitter->Emit(*output); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void JitShader::Compile_EMIT(Instruction instr) { | ||||||
|  |     Label have_emitter, end; | ||||||
|  |     mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||||||
|  |     test(rax, rax); | ||||||
|  |     jnz(have_emitter); | ||||||
|  |  | ||||||
|  |     ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|  |     mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute EMIT on VS")); | ||||||
|  |     CallFarFunction(*this, LogCritical); | ||||||
|  |     ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|  |     jmp(end); | ||||||
|  |  | ||||||
|  |     L(have_emitter); | ||||||
|  |     ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|  |     mov(ABI_PARAM1, rax); | ||||||
|  |     mov(ABI_PARAM2, STATE); | ||||||
|  |     add(ABI_PARAM2, static_cast<Xbyak::uint32>(offsetof(UnitState, registers.output))); | ||||||
|  |     CallFarFunction(*this, Emit); | ||||||
|  |     ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|  |     L(end); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | void JitShader::Compile_SETE(Instruction instr) { | ||||||
|  |     Label have_emitter, end; | ||||||
|  |     mov(rax, qword[STATE + offsetof(UnitState, emitter_ptr)]); | ||||||
|  |     test(rax, rax); | ||||||
|  |     jnz(have_emitter); | ||||||
|  |  | ||||||
|  |     ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|  |     mov(ABI_PARAM1, reinterpret_cast<size_t>("Execute SETEMIT on VS")); | ||||||
|  |     CallFarFunction(*this, LogCritical); | ||||||
|  |     ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0); | ||||||
|  |     jmp(end); | ||||||
|  |  | ||||||
|  |     L(have_emitter); | ||||||
|  |     mov(byte[rax + offsetof(GSEmitter, vertex_id)], instr.setemit.vertex_id); | ||||||
|  |     mov(byte[rax + offsetof(GSEmitter, prim_emit)], instr.setemit.prim_emit); | ||||||
|  |     mov(byte[rax + offsetof(GSEmitter, winding)], instr.setemit.winding); | ||||||
|  |     L(end); | ||||||
|  | } | ||||||
|  |  | ||||||
| void JitShader::Compile_Block(unsigned end) { | void JitShader::Compile_Block(unsigned end) { | ||||||
|     while (program_counter < end) { |     while (program_counter < end) { | ||||||
|         Compile_NextInstr(); |         Compile_NextInstr(); | ||||||
|   | |||||||
| @@ -66,6 +66,8 @@ public: | |||||||
|     void Compile_JMP(Instruction instr); |     void Compile_JMP(Instruction instr); | ||||||
|     void Compile_CMP(Instruction instr); |     void Compile_CMP(Instruction instr); | ||||||
|     void Compile_MAD(Instruction instr); |     void Compile_MAD(Instruction instr); | ||||||
|  |     void Compile_EMIT(Instruction instr); | ||||||
|  |     void Compile_SETE(Instruction instr); | ||||||
|  |  | ||||||
| private: | private: | ||||||
|     void Compile_Block(unsigned end); |     void Compile_Block(unsigned end); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user