shader: Implement geometry shaders
This commit is contained in:
		| @@ -140,7 +140,27 @@ Id DefineVariable(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin | ||||
|     return id; | ||||
| } | ||||
|  | ||||
| u32 NumVertices(InputTopology input_topology) { | ||||
|     switch (input_topology) { | ||||
|     case InputTopology::Points: | ||||
|         return 1; | ||||
|     case InputTopology::Lines: | ||||
|         return 2; | ||||
|     case InputTopology::LinesAdjacency: | ||||
|         return 4; | ||||
|     case InputTopology::Triangles: | ||||
|         return 3; | ||||
|     case InputTopology::TrianglesAdjacency: | ||||
|         return 6; | ||||
|     } | ||||
|     throw InvalidArgument("Invalid input topology {}", input_topology); | ||||
| } | ||||
|  | ||||
| Id DefineInput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin = std::nullopt) { | ||||
|     if (ctx.stage == Stage::Geometry) { | ||||
|         const u32 num_vertices{NumVertices(ctx.profile.input_topology)}; | ||||
|         type = ctx.TypeArray(type, ctx.Constant(ctx.U32[1], num_vertices)); | ||||
|     } | ||||
|     return DefineVariable(ctx, type, builtin, spv::StorageClass::Input); | ||||
| } | ||||
|  | ||||
| @@ -455,12 +475,16 @@ void EmitContext::DefineSharedMemory(const IR::Program& program) { | ||||
|  | ||||
| void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
|     const auto make_load{[&] { | ||||
|         const bool is_array{stage == Stage::Geometry}; | ||||
|         const Id end_block{OpLabel()}; | ||||
|         const Id default_label{OpLabel()}; | ||||
|  | ||||
|         const Id func_type_load{TypeFunction(F32[1], U32[1])}; | ||||
|         const Id func_type_load{is_array ? TypeFunction(F32[1], U32[1], U32[1]) | ||||
|                                          : TypeFunction(F32[1], U32[1])}; | ||||
|         const Id func{OpFunction(F32[1], spv::FunctionControlMask::MaskNone, func_type_load)}; | ||||
|         const Id offset{OpFunctionParameter(U32[1])}; | ||||
|         const Id vertex{is_array ? OpFunctionParameter(U32[1]) : Id{}}; | ||||
|  | ||||
|         AddLabel(); | ||||
|         const Id base_index{OpShiftRightArithmetic(U32[1], offset, Constant(U32[1], 2U))}; | ||||
|         const Id masked_index{OpBitwiseAnd(U32[1], base_index, Constant(U32[1], 3U))}; | ||||
| @@ -472,7 +496,7 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
|             labels.push_back(OpLabel()); | ||||
|         } | ||||
|         const u32 base_attribute_value = static_cast<u32>(IR::Attribute::Generic0X) >> 2; | ||||
|         for (u32 i = 0; i < info.input_generics.size(); i++) { | ||||
|         for (u32 i = 0; i < info.input_generics.size(); ++i) { | ||||
|             if (!info.input_generics[i].used) { | ||||
|                 continue; | ||||
|             } | ||||
| @@ -486,7 +510,10 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
|         size_t label_index{0}; | ||||
|         if (info.loads_position) { | ||||
|             AddLabel(labels[label_index]); | ||||
|             const Id result{OpLoad(F32[1], OpAccessChain(input_f32, input_position, masked_index))}; | ||||
|             const Id pointer{is_array | ||||
|                                  ? OpAccessChain(input_f32, input_position, vertex, masked_index) | ||||
|                                  : OpAccessChain(input_f32, input_position, masked_index)}; | ||||
|             const Id result{OpLoad(F32[1], pointer)}; | ||||
|             OpReturnValue(result); | ||||
|             ++label_index; | ||||
|         } | ||||
| @@ -502,7 +529,9 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
|                 continue; | ||||
|             } | ||||
|             const Id generic_id{input_generics.at(i)}; | ||||
|             const Id pointer{OpAccessChain(type->pointer, generic_id, masked_index)}; | ||||
|             const Id pointer{is_array | ||||
|                                  ? OpAccessChain(type->pointer, generic_id, vertex, masked_index) | ||||
|                                  : OpAccessChain(type->pointer, generic_id, masked_index)}; | ||||
|             const Id value{OpLoad(type->id, pointer)}; | ||||
|             const Id result{type->needs_cast ? OpBitcast(F32[1], value) : value}; | ||||
|             OpReturnValue(result); | ||||
| @@ -910,13 +939,13 @@ void EmitContext::DefineOutputs(const Info& info) { | ||||
|     } | ||||
|     if (info.stores_point_size || profile.fixed_state_point_size) { | ||||
|         if (stage == Stage::Fragment) { | ||||
|             throw NotImplementedException("Storing PointSize in Fragment stage"); | ||||
|             throw NotImplementedException("Storing PointSize in fragment stage"); | ||||
|         } | ||||
|         output_point_size = DefineOutput(*this, F32[1], spv::BuiltIn::PointSize); | ||||
|     } | ||||
|     if (info.stores_clip_distance) { | ||||
|         if (stage == Stage::Fragment) { | ||||
|             throw NotImplementedException("Storing PointSize in Fragment stage"); | ||||
|             throw NotImplementedException("Storing ClipDistance in fragment stage"); | ||||
|         } | ||||
|         const Id type{TypeArray(F32[1], Constant(U32[1], 8U))}; | ||||
|         clip_distances = DefineOutput(*this, type, spv::BuiltIn::ClipDistance); | ||||
| @@ -924,7 +953,7 @@ void EmitContext::DefineOutputs(const Info& info) { | ||||
|     if (info.stores_viewport_index && | ||||
|         (profile.support_viewport_index_layer_non_geometry || stage == Shader::Stage::Geometry)) { | ||||
|         if (stage == Stage::Fragment) { | ||||
|             throw NotImplementedException("Storing ViewportIndex in Fragment stage"); | ||||
|             throw NotImplementedException("Storing ViewportIndex in fragment stage"); | ||||
|         } | ||||
|         viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex); | ||||
|     } | ||||
|   | ||||
| @@ -134,6 +134,44 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) { | ||||
|     case Shader::Stage::VertexB: | ||||
|         execution_model = spv::ExecutionModel::Vertex; | ||||
|         break; | ||||
|     case Shader::Stage::Geometry: | ||||
|         execution_model = spv::ExecutionModel::Geometry; | ||||
|         ctx.AddCapability(spv::Capability::Geometry); | ||||
|         ctx.AddCapability(spv::Capability::GeometryStreams); | ||||
|         switch (ctx.profile.input_topology) { | ||||
|         case InputTopology::Points: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::InputPoints); | ||||
|             break; | ||||
|         case InputTopology::Lines: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::InputLines); | ||||
|             break; | ||||
|         case InputTopology::LinesAdjacency: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::InputLinesAdjacency); | ||||
|             break; | ||||
|         case InputTopology::Triangles: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::Triangles); | ||||
|             break; | ||||
|         case InputTopology::TrianglesAdjacency: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::InputTrianglesAdjacency); | ||||
|             break; | ||||
|         } | ||||
|         switch (program.output_topology) { | ||||
|         case OutputTopology::PointList: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::OutputPoints); | ||||
|             break; | ||||
|         case OutputTopology::LineStrip: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::OutputLineStrip); | ||||
|             break; | ||||
|         case OutputTopology::TriangleStrip: | ||||
|             ctx.AddExecutionMode(main, spv::ExecutionMode::OutputTriangleStrip); | ||||
|             break; | ||||
|         } | ||||
|         if (program.info.stores_point_size) { | ||||
|             ctx.AddCapability(spv::Capability::GeometryPointSize); | ||||
|         } | ||||
|         ctx.AddExecutionMode(main, spv::ExecutionMode::OutputVertices, program.output_vertices); | ||||
|         ctx.AddExecutionMode(main, spv::ExecutionMode::Invocations, program.invocations); | ||||
|         break; | ||||
|     case Shader::Stage::Fragment: | ||||
|         execution_model = spv::ExecutionModel::Fragment; | ||||
|         ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft); | ||||
|   | ||||
| @@ -34,8 +34,8 @@ void EmitMemoryBarrierDeviceLevel(EmitContext& ctx); | ||||
| void EmitMemoryBarrierSystemLevel(EmitContext& ctx); | ||||
| void EmitPrologue(EmitContext& ctx); | ||||
| void EmitEpilogue(EmitContext& ctx); | ||||
| void EmitEmitVertex(EmitContext& ctx, Id stream); | ||||
| void EmitEndPrimitive(EmitContext& ctx, Id stream); | ||||
| void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream); | ||||
| void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream); | ||||
| void EmitGetRegister(EmitContext& ctx); | ||||
| void EmitSetRegister(EmitContext& ctx); | ||||
| void EmitGetPred(EmitContext& ctx); | ||||
| @@ -51,10 +51,10 @@ Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o | ||||
| Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||||
| Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||||
| Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); | ||||
| Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr); | ||||
| void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value); | ||||
| Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset); | ||||
| void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value); | ||||
| Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex); | ||||
| void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, Id vertex); | ||||
| Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex); | ||||
| void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, Id vertex); | ||||
| void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value); | ||||
| void EmitSetFragDepth(EmitContext& ctx, Id value); | ||||
| void EmitGetZFlag(EmitContext& ctx); | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <tuple> | ||||
| #include <utility> | ||||
|  | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
|  | ||||
| @@ -29,6 +30,15 @@ std::optional<AttrInfo> AttrTypes(EmitContext& ctx, u32 index) { | ||||
|     throw InvalidArgument("Invalid attribute type {}", type); | ||||
| } | ||||
|  | ||||
| template <typename... Args> | ||||
| Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&... args) { | ||||
|     if (ctx.stage == Stage::Geometry) { | ||||
|         return ctx.OpAccessChain(pointer_type, base, vertex, std::forward<Args>(args)...); | ||||
|     } else { | ||||
|         return ctx.OpAccessChain(pointer_type, base, std::forward<Args>(args)...); | ||||
|     } | ||||
| } | ||||
|  | ||||
| std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | ||||
|     const u32 element{static_cast<u32>(attr) % 4}; | ||||
|     const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; | ||||
| @@ -66,6 +76,31 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) { | ||||
|         throw NotImplementedException("Read attribute {}", attr); | ||||
|     } | ||||
| } | ||||
|  | ||||
| Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, u32 element_size, | ||||
|            const IR::Value& binding, const IR::Value& offset) { | ||||
|     if (!binding.IsImmediate()) { | ||||
|         throw NotImplementedException("Constant buffer indexing"); | ||||
|     } | ||||
|     const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | ||||
|     const Id uniform_type{ctx.uniform_types.*member_ptr}; | ||||
|     if (!offset.IsImmediate()) { | ||||
|         Id index{ctx.Def(offset)}; | ||||
|         if (element_size > 1) { | ||||
|             const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | ||||
|             const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; | ||||
|             index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); | ||||
|         } | ||||
|         const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; | ||||
|         return ctx.OpLoad(result_type, access_chain); | ||||
|     } | ||||
|     if (offset.U32() % element_size != 0) { | ||||
|         throw NotImplementedException("Unaligned immediate constant buffer load"); | ||||
|     } | ||||
|     const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; | ||||
|     const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; | ||||
|     return ctx.OpLoad(result_type, access_chain); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void EmitGetRegister(EmitContext&) { | ||||
| @@ -100,31 +135,6 @@ void EmitGetIndirectBranchVariable(EmitContext&) { | ||||
|     throw NotImplementedException("SPIR-V Instruction"); | ||||
| } | ||||
|  | ||||
| static Id GetCbuf(EmitContext& ctx, Id result_type, Id UniformDefinitions::*member_ptr, | ||||
|                   u32 element_size, const IR::Value& binding, const IR::Value& offset) { | ||||
|     if (!binding.IsImmediate()) { | ||||
|         throw NotImplementedException("Constant buffer indexing"); | ||||
|     } | ||||
|     const Id cbuf{ctx.cbufs[binding.U32()].*member_ptr}; | ||||
|     const Id uniform_type{ctx.uniform_types.*member_ptr}; | ||||
|     if (!offset.IsImmediate()) { | ||||
|         Id index{ctx.Def(offset)}; | ||||
|         if (element_size > 1) { | ||||
|             const u32 log2_element_size{static_cast<u32>(std::countr_zero(element_size))}; | ||||
|             const Id shift{ctx.Constant(ctx.U32[1], log2_element_size)}; | ||||
|             index = ctx.OpShiftRightArithmetic(ctx.U32[1], ctx.Def(offset), shift); | ||||
|         } | ||||
|         const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, index)}; | ||||
|         return ctx.OpLoad(result_type, access_chain); | ||||
|     } | ||||
|     if (offset.U32() % element_size != 0) { | ||||
|         throw NotImplementedException("Unaligned immediate constant buffer load"); | ||||
|     } | ||||
|     const Id imm_offset{ctx.Constant(ctx.U32[1], offset.U32() / element_size)}; | ||||
|     const Id access_chain{ctx.OpAccessChain(uniform_type, cbuf, ctx.u32_zero_value, imm_offset)}; | ||||
|     return ctx.OpLoad(result_type, access_chain); | ||||
| } | ||||
|  | ||||
| Id EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) { | ||||
|     const Id load{GetCbuf(ctx, ctx.U8, &UniformDefinitions::U8, sizeof(u8), binding, offset)}; | ||||
|     return ctx.OpUConvert(ctx.U32[1], load); | ||||
| @@ -157,7 +167,7 @@ Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& | ||||
|     return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset); | ||||
| } | ||||
|  | ||||
| Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { | ||||
| Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) { | ||||
|     const u32 element{static_cast<u32>(attr) % 4}; | ||||
|     const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }}; | ||||
|     if (IR::IsGeneric(attr)) { | ||||
| @@ -168,7 +178,7 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { | ||||
|             return ctx.Constant(ctx.F32[1], 0.0f); | ||||
|         } | ||||
|         const Id generic_id{ctx.input_generics.at(index)}; | ||||
|         const Id pointer{ctx.OpAccessChain(type->pointer, generic_id, element_id())}; | ||||
|         const Id pointer{AttrPointer(ctx, type->pointer, vertex, generic_id, element_id())}; | ||||
|         const Id value{ctx.OpLoad(type->id, pointer)}; | ||||
|         return type->needs_cast ? ctx.OpBitcast(ctx.F32[1], value) : value; | ||||
|     } | ||||
| @@ -177,8 +187,8 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { | ||||
|     case IR::Attribute::PositionY: | ||||
|     case IR::Attribute::PositionZ: | ||||
|     case IR::Attribute::PositionW: | ||||
|         return ctx.OpLoad(ctx.F32[1], | ||||
|                           ctx.OpAccessChain(ctx.input_f32, ctx.input_position, element_id())); | ||||
|         return ctx.OpLoad( | ||||
|             ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.input_position, element_id())); | ||||
|     case IR::Attribute::InstanceId: | ||||
|         if (ctx.profile.support_vertex_instance_id) { | ||||
|             return ctx.OpLoad(ctx.U32[1], ctx.instance_id); | ||||
| @@ -198,29 +208,32 @@ Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) { | ||||
|                             ctx.Constant(ctx.U32[1], std::numeric_limits<u32>::max()), | ||||
|                             ctx.u32_zero_value); | ||||
|     case IR::Attribute::PointSpriteS: | ||||
|         return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, | ||||
|                                                         ctx.Constant(ctx.U32[1], 0U))); | ||||
|         return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, | ||||
|                                                   ctx.u32_zero_value)); | ||||
|     case IR::Attribute::PointSpriteT: | ||||
|         return ctx.OpLoad(ctx.F32[1], ctx.OpAccessChain(ctx.input_f32, ctx.point_coord, | ||||
|                                                         ctx.Constant(ctx.U32[1], 1U))); | ||||
|         return ctx.OpLoad(ctx.F32[1], AttrPointer(ctx, ctx.input_f32, vertex, ctx.point_coord, | ||||
|                                                   ctx.Constant(ctx.U32[1], 1U))); | ||||
|     default: | ||||
|         throw NotImplementedException("Read attribute {}", attr); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value) { | ||||
| void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, [[maybe_unused]] Id vertex) { | ||||
|     const std::optional<Id> output{OutputAttrPointer(ctx, attr)}; | ||||
|     if (!output) { | ||||
|         return; | ||||
|     if (output) { | ||||
|         ctx.OpStore(*output, value); | ||||
|     } | ||||
|     ctx.OpStore(*output, value); | ||||
| } | ||||
|  | ||||
| Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset) { | ||||
|     return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); | ||||
| Id EmitGetAttributeIndexed(EmitContext& ctx, Id offset, Id vertex) { | ||||
|     if (ctx.stage == Stage::Geometry) { | ||||
|         return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset, vertex); | ||||
|     } else { | ||||
|         return ctx.OpFunctionCall(ctx.F32[1], ctx.indexed_load_func, offset); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value) { | ||||
| void EmitSetAttributeIndexed(EmitContext& ctx, Id offset, Id value, [[maybe_unused]] Id vertex) { | ||||
|     ctx.OpFunctionCall(ctx.void_id, ctx.indexed_store_func, offset, value); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,17 @@ | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| void ConvertDepthMode(EmitContext& ctx) { | ||||
|     const Id type{ctx.F32[1]}; | ||||
|     const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; | ||||
|     const Id z{ctx.OpCompositeExtract(type, position, 2u)}; | ||||
|     const Id w{ctx.OpCompositeExtract(type, position, 3u)}; | ||||
|     const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; | ||||
|     const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; | ||||
|     ctx.OpStore(ctx.output_position, vector); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void EmitPrologue(EmitContext& ctx) { | ||||
|     if (ctx.stage == Stage::VertexB) { | ||||
| @@ -25,23 +36,30 @@ void EmitPrologue(EmitContext& ctx) { | ||||
| } | ||||
|  | ||||
| void EmitEpilogue(EmitContext& ctx) { | ||||
|     if (ctx.profile.convert_depth_mode) { | ||||
|         const Id type{ctx.F32[1]}; | ||||
|         const Id position{ctx.OpLoad(ctx.F32[4], ctx.output_position)}; | ||||
|         const Id z{ctx.OpCompositeExtract(type, position, 2u)}; | ||||
|         const Id w{ctx.OpCompositeExtract(type, position, 3u)}; | ||||
|         const Id screen_depth{ctx.OpFMul(type, ctx.OpFAdd(type, z, w), ctx.Constant(type, 0.5f))}; | ||||
|         const Id vector{ctx.OpCompositeInsert(ctx.F32[4], screen_depth, position, 2u)}; | ||||
|         ctx.OpStore(ctx.output_position, vector); | ||||
|     if (ctx.stage == Stage::VertexB && ctx.profile.convert_depth_mode) { | ||||
|         ConvertDepthMode(ctx); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitEmitVertex(EmitContext& ctx, Id stream) { | ||||
|     ctx.OpEmitStreamVertex(stream); | ||||
| void EmitEmitVertex(EmitContext& ctx, const IR::Value& stream) { | ||||
|     if (ctx.profile.convert_depth_mode) { | ||||
|         ConvertDepthMode(ctx); | ||||
|     } | ||||
|     if (!stream.IsImmediate()) { | ||||
|         // LOG_WARNING(..., "EmitVertex's stream is not constant"); | ||||
|         ctx.OpEmitStreamVertex(ctx.u32_zero_value); | ||||
|         return; | ||||
|     } | ||||
|     ctx.OpEmitStreamVertex(ctx.Def(stream)); | ||||
| } | ||||
|  | ||||
| void EmitEndPrimitive(EmitContext& ctx, Id stream) { | ||||
|     ctx.OpEndStreamPrimitive(stream); | ||||
| void EmitEndPrimitive(EmitContext& ctx, const IR::Value& stream) { | ||||
|     if (!stream.IsImmediate()) { | ||||
|         // LOG_WARNING(..., "EndPrimitive's stream is not constant"); | ||||
|         ctx.OpEndStreamPrimitive(ctx.u32_zero_value); | ||||
|         return; | ||||
|     } | ||||
|     ctx.OpEndStreamPrimitive(ctx.Def(stream)); | ||||
| } | ||||
|  | ||||
| } // namespace Shader::Backend::SPIRV | ||||
|   | ||||
| @@ -308,19 +308,27 @@ U1 IREmitter::GetFlowTestResult(FlowTest test) { | ||||
| } | ||||
|  | ||||
| F32 IREmitter::GetAttribute(IR::Attribute attribute) { | ||||
|     return Inst<F32>(Opcode::GetAttribute, attribute); | ||||
|     return GetAttribute(attribute, Imm32(0)); | ||||
| } | ||||
|  | ||||
| void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) { | ||||
|     Inst(Opcode::SetAttribute, attribute, value); | ||||
| F32 IREmitter::GetAttribute(IR::Attribute attribute, const U32& vertex) { | ||||
|     return Inst<F32>(Opcode::GetAttribute, attribute, vertex); | ||||
| } | ||||
|  | ||||
| void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex) { | ||||
|     Inst(Opcode::SetAttribute, attribute, value, vertex); | ||||
| } | ||||
|  | ||||
| F32 IREmitter::GetAttributeIndexed(const U32& phys_address) { | ||||
|     return Inst<F32>(Opcode::GetAttributeIndexed, phys_address); | ||||
|     return GetAttributeIndexed(phys_address, Imm32(0)); | ||||
| } | ||||
|  | ||||
| void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value) { | ||||
|     Inst(Opcode::SetAttributeIndexed, phys_address, value); | ||||
| F32 IREmitter::GetAttributeIndexed(const U32& phys_address, const U32& vertex) { | ||||
|     return Inst<F32>(Opcode::GetAttributeIndexed, phys_address, vertex); | ||||
| } | ||||
|  | ||||
| void IREmitter::SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex) { | ||||
|     Inst(Opcode::SetAttributeIndexed, phys_address, value, vertex); | ||||
| } | ||||
|  | ||||
| void IREmitter::SetFragColor(u32 index, u32 component, const F32& value) { | ||||
|   | ||||
| @@ -77,10 +77,12 @@ public: | ||||
|     [[nodiscard]] U1 GetFlowTestResult(FlowTest test); | ||||
|  | ||||
|     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute); | ||||
|     void SetAttribute(IR::Attribute attribute, const F32& value); | ||||
|     [[nodiscard]] F32 GetAttribute(IR::Attribute attribute, const U32& vertex); | ||||
|     void SetAttribute(IR::Attribute attribute, const F32& value, const U32& vertex); | ||||
|  | ||||
|     [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address); | ||||
|     void SetAttributeIndexed(const U32& phys_address, const F32& value); | ||||
|     [[nodiscard]] F32 GetAttributeIndexed(const U32& phys_address, const U32& vertex); | ||||
|     void SetAttributeIndexed(const U32& phys_address, const F32& value, const U32& vertex); | ||||
|  | ||||
|     void SetFragColor(u32 index, u32 component, const F32& value); | ||||
|     void SetFragDepth(const F32& value); | ||||
|   | ||||
| @@ -44,10 +44,10 @@ OPCODE(GetCbufS16,                                          U32,            U32, | ||||
| OPCODE(GetCbufU32,                                          U32,            U32,            U32,                                                            ) | ||||
| OPCODE(GetCbufF32,                                          F32,            U32,            U32,                                                            ) | ||||
| OPCODE(GetCbufU32x2,                                        U32x2,          U32,            U32,                                                            ) | ||||
| OPCODE(GetAttribute,                                        F32,            Attribute,                                                                      ) | ||||
| OPCODE(SetAttribute,                                        Void,           Attribute,      F32,                                                            ) | ||||
| OPCODE(GetAttributeIndexed,                                 F32,            U32,                                                                            ) | ||||
| OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,                                                            ) | ||||
| OPCODE(GetAttribute,                                        F32,            Attribute,      U32,                                                            ) | ||||
| OPCODE(SetAttribute,                                        Void,           Attribute,      F32,            U32,                                            ) | ||||
| OPCODE(GetAttributeIndexed,                                 F32,            U32,            U32,                                                            ) | ||||
| OPCODE(SetAttributeIndexed,                                 Void,           U32,            F32,            U32,                                            ) | ||||
| OPCODE(SetFragColor,                                        Void,           U32,            U32,            F32,                                            ) | ||||
| OPCODE(SetFragDepth,                                        Void,           F32,                                                                            ) | ||||
| OPCODE(GetZFlag,                                            U1,             Void,                                                                           ) | ||||
|   | ||||
| @@ -10,6 +10,7 @@ | ||||
| #include <boost/container/small_vector.hpp> | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/program_header.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "shader_recompiler/stage.h" | ||||
|  | ||||
| @@ -21,6 +22,9 @@ struct Program { | ||||
|     Info info; | ||||
|     Stage stage{}; | ||||
|     std::array<u32, 3> workgroup_size{}; | ||||
|     OutputTopology output_topology{}; | ||||
|     u32 output_vertices{}; | ||||
|     u32 invocations{}; | ||||
|     u32 local_memory_size{}; | ||||
|     u32 shared_memory_size{}; | ||||
| }; | ||||
|   | ||||
| @@ -69,9 +69,20 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||
|     program.post_order_blocks = PostOrder(program.blocks); | ||||
|     program.stage = env.ShaderStage(); | ||||
|     program.local_memory_size = env.LocalMemorySize(); | ||||
|     if (program.stage == Stage::Compute) { | ||||
|     switch (program.stage) { | ||||
|     case Stage::Geometry: { | ||||
|         const ProgramHeader& sph{env.SPH()}; | ||||
|         program.output_topology = sph.common3.output_topology; | ||||
|         program.output_vertices = sph.common4.max_output_vertices; | ||||
|         program.invocations = sph.common2.threads_per_input_primitive; | ||||
|         break; | ||||
|     } | ||||
|     case Stage::Compute: | ||||
|         program.workgroup_size = env.WorkgroupSize(); | ||||
|         program.shared_memory_size = env.SharedMemorySize(); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|     RemoveUnreachableBlocks(program); | ||||
|  | ||||
|   | ||||
| @@ -64,7 +64,7 @@ void TranslatorVisitor::ALD(u64 insn) { | ||||
|         BitField<8, 8, IR::Reg> index_reg; | ||||
|         BitField<20, 10, u64> absolute_offset; | ||||
|         BitField<20, 11, s64> relative_offset; | ||||
|         BitField<39, 8, IR::Reg> array_reg; | ||||
|         BitField<39, 8, IR::Reg> vertex_reg; | ||||
|         BitField<32, 1, u64> o; | ||||
|         BitField<31, 1, u64> patch; | ||||
|         BitField<47, 2, Size> size; | ||||
| @@ -80,15 +80,17 @@ void TranslatorVisitor::ALD(u64 insn) { | ||||
|     if (offset % 4 != 0) { | ||||
|         throw NotImplementedException("Unaligned absolute offset {}", offset); | ||||
|     } | ||||
|     const IR::U32 vertex{X(ald.vertex_reg)}; | ||||
|     const u32 num_elements{NumElements(ald.size)}; | ||||
|     if (ald.index_reg == IR::Reg::RZ) { | ||||
|         for (u32 element = 0; element < num_elements; ++element) { | ||||
|             F(ald.dest_reg + element, ir.GetAttribute(IR::Attribute{offset / 4 + element})); | ||||
|             const IR::Attribute attr{offset / 4 + element}; | ||||
|             F(ald.dest_reg + element, ir.GetAttribute(attr, vertex)); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     HandleIndexed(*this, ald.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||||
|         F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset)); | ||||
|         F(ald.dest_reg + element, ir.GetAttributeIndexed(final_offset, vertex)); | ||||
|     }); | ||||
| } | ||||
|  | ||||
| @@ -100,7 +102,7 @@ void TranslatorVisitor::AST(u64 insn) { | ||||
|         BitField<20, 10, u64> absolute_offset; | ||||
|         BitField<20, 11, s64> relative_offset; | ||||
|         BitField<31, 1, u64> patch; | ||||
|         BitField<39, 8, IR::Reg> array_reg; | ||||
|         BitField<39, 8, IR::Reg> vertex_reg; | ||||
|         BitField<47, 2, Size> size; | ||||
|     } const ast{insn}; | ||||
|  | ||||
| @@ -114,15 +116,17 @@ void TranslatorVisitor::AST(u64 insn) { | ||||
|     if (offset % 4 != 0) { | ||||
|         throw NotImplementedException("Unaligned absolute offset {}", offset); | ||||
|     } | ||||
|     const IR::U32 vertex{X(ast.vertex_reg)}; | ||||
|     const u32 num_elements{NumElements(ast.size)}; | ||||
|     if (ast.index_reg == IR::Reg::RZ) { | ||||
|         for (u32 element = 0; element < num_elements; ++element) { | ||||
|             ir.SetAttribute(IR::Attribute{offset / 4 + element}, F(ast.src_reg + element)); | ||||
|             const IR::Attribute attr{offset / 4 + element}; | ||||
|             ir.SetAttribute(attr, F(ast.src_reg + element), vertex); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     HandleIndexed(*this, ast.index_reg, num_elements, [&](u32 element, IR::U32 final_offset) { | ||||
|         ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element)); | ||||
|         ir.SetAttributeIndexed(final_offset, F(ast.src_reg + element), vertex); | ||||
|     }); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -18,6 +18,14 @@ enum class AttributeType : u8 { | ||||
|     Disabled, | ||||
| }; | ||||
|  | ||||
| enum class InputTopology { | ||||
|     Points, | ||||
|     Lines, | ||||
|     LinesAdjacency, | ||||
|     Triangles, | ||||
|     TrianglesAdjacency, | ||||
| }; | ||||
|  | ||||
| struct Profile { | ||||
|     u32 supported_spirv{0x00010000}; | ||||
|  | ||||
| @@ -46,6 +54,8 @@ struct Profile { | ||||
|     std::array<AttributeType, 32> generic_input_types{}; | ||||
|     bool convert_depth_mode{}; | ||||
|  | ||||
|     InputTopology input_topology{}; | ||||
|  | ||||
|     std::optional<float> fixed_state_point_size; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -769,7 +769,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | ||||
|         const size_t stage_index{index - 1}; | ||||
|         infos[stage_index] = &program.info; | ||||
|  | ||||
|         const Shader::Profile profile{MakeProfile(key, program.stage)}; | ||||
|         const Shader::Profile profile{MakeProfile(key, program)}; | ||||
|         const std::vector<u32> code{EmitSPIRV(profile, program, binding)}; | ||||
|         device.SaveShader(code); | ||||
|         modules[stage_index] = BuildShader(device, code); | ||||
| @@ -880,15 +880,59 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA | ||||
| } | ||||
|  | ||||
| Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, | ||||
|                                            Shader::Stage stage) { | ||||
|                                            const Shader::IR::Program& program) { | ||||
|     Shader::Profile profile{base_profile}; | ||||
|     if (stage == Shader::Stage::VertexB) { | ||||
|         profile.convert_depth_mode = key.state.ndc_minus_one_to_one != 0; | ||||
|         if (key.state.topology == Maxwell::PrimitiveTopology::Points) { | ||||
|             profile.fixed_state_point_size = Common::BitCast<float>(key.state.point_size); | ||||
|  | ||||
|     const Shader::Stage stage{program.stage}; | ||||
|     const bool has_geometry{key.unique_hashes[4] != u128{}}; | ||||
|     const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; | ||||
|     const float point_size{Common::BitCast<float>(key.state.point_size)}; | ||||
|     switch (stage) { | ||||
|     case Shader::Stage::VertexB: | ||||
|         if (!has_geometry) { | ||||
|             if (key.state.topology == Maxwell::PrimitiveTopology::Points) { | ||||
|                 profile.fixed_state_point_size = point_size; | ||||
|             } | ||||
|             profile.convert_depth_mode = gl_ndc; | ||||
|         } | ||||
|         std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(), | ||||
|                                &CastAttributeType); | ||||
|         break; | ||||
|     case Shader::Stage::Geometry: | ||||
|         if (program.output_topology == Shader::OutputTopology::PointList) { | ||||
|             profile.fixed_state_point_size = point_size; | ||||
|         } | ||||
|         profile.convert_depth_mode = gl_ndc; | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|     switch (key.state.topology) { | ||||
|     case Maxwell::PrimitiveTopology::Points: | ||||
|         profile.input_topology = Shader::InputTopology::Points; | ||||
|         break; | ||||
|     case Maxwell::PrimitiveTopology::Lines: | ||||
|     case Maxwell::PrimitiveTopology::LineLoop: | ||||
|     case Maxwell::PrimitiveTopology::LineStrip: | ||||
|         profile.input_topology = Shader::InputTopology::Lines; | ||||
|         break; | ||||
|     case Maxwell::PrimitiveTopology::Triangles: | ||||
|     case Maxwell::PrimitiveTopology::TriangleStrip: | ||||
|     case Maxwell::PrimitiveTopology::TriangleFan: | ||||
|     case Maxwell::PrimitiveTopology::Quads: | ||||
|     case Maxwell::PrimitiveTopology::QuadStrip: | ||||
|     case Maxwell::PrimitiveTopology::Polygon: | ||||
|     case Maxwell::PrimitiveTopology::Patches: | ||||
|         profile.input_topology = Shader::InputTopology::Triangles; | ||||
|         break; | ||||
|     case Maxwell::PrimitiveTopology::LinesAdjacency: | ||||
|     case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||||
|         profile.input_topology = Shader::InputTopology::LinesAdjacency; | ||||
|         break; | ||||
|     case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||||
|     case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||||
|         profile.input_topology = Shader::InputTopology::TrianglesAdjacency; | ||||
|         break; | ||||
|     } | ||||
|     return profile; | ||||
| } | ||||
|   | ||||
| @@ -33,6 +33,10 @@ namespace Core { | ||||
| class System; | ||||
| } | ||||
|  | ||||
| namespace Shader::IR { | ||||
| struct Program; | ||||
| } | ||||
|  | ||||
| namespace Vulkan { | ||||
|  | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| @@ -160,7 +164,8 @@ private: | ||||
|                                                            Shader::Environment& env, | ||||
|                                                            bool build_in_parallel); | ||||
|  | ||||
|     Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); | ||||
|     Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, | ||||
|                                 const Shader::IR::Program& program); | ||||
|  | ||||
|     Tegra::GPU& gpu; | ||||
|     Tegra::Engines::Maxwell3D& maxwell3d; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user