shader: Add denorm flush support
This commit is contained in:
		| @@ -32,6 +32,7 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/ir/ir_emitter.h | ||||
|     frontend/ir/microinstruction.cpp | ||||
|     frontend/ir/microinstruction.h | ||||
|     frontend/ir/modifiers.h | ||||
|     frontend/ir/opcodes.cpp | ||||
|     frontend/ir/opcodes.h | ||||
|     frontend/ir/opcodes.inc | ||||
| @@ -94,9 +95,7 @@ add_library(shader_recompiler STATIC | ||||
|     shader_info.h | ||||
| ) | ||||
|  | ||||
| target_include_directories(shader_recompiler PRIVATE sirit) | ||||
| target_link_libraries(shader_recompiler PRIVATE fmt::fmt sirit) | ||||
| target_link_libraries(shader_recompiler INTERFACE fmt::fmt sirit) | ||||
| target_link_libraries(shader_recompiler PUBLIC fmt::fmt sirit) | ||||
|  | ||||
| add_executable(shader_util main.cpp) | ||||
| target_link_libraries(shader_util PRIVATE shader_recompiler) | ||||
|   | ||||
| @@ -14,8 +14,6 @@ | ||||
| #include "shader_recompiler/frontend/ir/microinstruction.h" | ||||
| #include "shader_recompiler/frontend/ir/program.h" | ||||
|  | ||||
| #pragma optimize("", off) | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
| namespace { | ||||
| template <class Func> | ||||
| @@ -113,9 +111,61 @@ Id TypeId(const EmitContext& ctx, IR::Type type) { | ||||
|         throw NotImplementedException("Phi node type {}", type); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx, | ||||
|                         Id main_func) { | ||||
|     if (!profile.support_float_controls) { | ||||
|         return; | ||||
|     } | ||||
|     const Info& info{program.info}; | ||||
|     if (!info.uses_fp32_denorms_flush && !info.uses_fp32_denorms_preserve && | ||||
|         !info.uses_fp16_denorms_flush && !info.uses_fp16_denorms_preserve) { | ||||
|         return; | ||||
|     } | ||||
|     ctx.AddExtension("SPV_KHR_float_controls"); | ||||
|  | ||||
|     if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { | ||||
|         // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); | ||||
|     } else if (info.uses_fp32_denorms_flush) { | ||||
|         if (profile.support_fp32_denorm_flush) { | ||||
|             ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||||
|             ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U); | ||||
|         } else { | ||||
|             // Drivers will most likely flush denorms by default, no need to warn | ||||
|         } | ||||
|     } else if (info.uses_fp32_denorms_preserve) { | ||||
|         if (profile.support_fp32_denorm_preserve) { | ||||
|             ctx.AddCapability(spv::Capability::DenormPreserve); | ||||
|             ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U); | ||||
|         } else { | ||||
|             // LOG_WARNING(HW_GPU, "Fp32 denorm preserve used in shader without host support"); | ||||
|         } | ||||
|     } | ||||
|     if (!profile.support_separate_denorm_behavior) { | ||||
|         // No separate denorm behavior | ||||
|         return; | ||||
|     } | ||||
|     if (info.uses_fp16_denorms_flush && info.uses_fp16_denorms_preserve) { | ||||
|         // LOG_ERROR(HW_GPU, "Fp16 denorm flush and preserve on the same shader"); | ||||
|     } else if (info.uses_fp16_denorms_flush) { | ||||
|         if (profile.support_fp16_denorm_flush) { | ||||
|             ctx.AddCapability(spv::Capability::DenormFlushToZero); | ||||
|             ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); | ||||
|         } else { | ||||
|             // Same as fp32, no need to warn as most drivers will flush by default | ||||
|         } | ||||
|     } else if (info.uses_fp32_denorms_preserve) { | ||||
|         if (profile.support_fp16_denorm_preserve) { | ||||
|             ctx.AddCapability(spv::Capability::DenormPreserve); | ||||
|             ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 16U); | ||||
|         } else { | ||||
|             // LOG_WARNING(HW_GPU, "Fp16 denorm preserve used in shader without host support"); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { | ||||
| std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program& program) { | ||||
|     EmitContext ctx{program}; | ||||
|     const Id void_function{ctx.TypeFunction(ctx.void_id)}; | ||||
|     // FIXME: Forward declare functions (needs sirit support) | ||||
| @@ -131,10 +181,11 @@ std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { | ||||
|         ctx.OpFunctionEnd(); | ||||
|     } | ||||
|     boost::container::small_vector<Id, 32> interfaces; | ||||
|     if (program.info.uses_workgroup_id) { | ||||
|     const Info& info{program.info}; | ||||
|     if (info.uses_workgroup_id) { | ||||
|         interfaces.push_back(ctx.workgroup_id); | ||||
|     } | ||||
|     if (program.info.uses_local_invocation_id) { | ||||
|     if (info.uses_local_invocation_id) { | ||||
|         interfaces.push_back(ctx.local_invocation_id); | ||||
|     } | ||||
|     const std::span interfaces_span(interfaces.data(), interfaces.size()); | ||||
| @@ -144,6 +195,8 @@ std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program) { | ||||
|     ctx.AddExecutionMode(func, spv::ExecutionMode::LocalSize, workgroup_size[0], workgroup_size[1], | ||||
|                          workgroup_size[2]); | ||||
|  | ||||
|     SetupDenormControl(profile, program, ctx, func); | ||||
|  | ||||
|     return ctx.Assemble(); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -11,10 +11,12 @@ | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "shader_recompiler/frontend/ir/microinstruction.h" | ||||
| #include "shader_recompiler/frontend/ir/program.h" | ||||
| #include "shader_recompiler/profile.h" | ||||
|  | ||||
| namespace Shader::Backend::SPIRV { | ||||
|  | ||||
| [[nodiscard]] std::vector<u32> EmitSPIRV(Environment& env, IR::Program& program); | ||||
| [[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, | ||||
|                                          IR::Program& program); | ||||
|  | ||||
| // Microinstruction emitters | ||||
| Id EmitPhi(EmitContext& ctx, IR::Inst* inst); | ||||
|   | ||||
| @@ -13,7 +13,10 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { | ||||
|         ctx.Decorate(op, spv::Decoration::NoContraction); | ||||
|     } | ||||
|     switch (flags.rounding) { | ||||
|     case IR::FpRounding::DontCare: | ||||
|         break; | ||||
|     case IR::FpRounding::RN: | ||||
|         ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTE); | ||||
|         break; | ||||
|     case IR::FpRounding::RM: | ||||
|         ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTN); | ||||
| @@ -25,9 +28,6 @@ Id Decorate(EmitContext& ctx, IR::Inst* inst, Id op) { | ||||
|         ctx.Decorate(op, spv::Decoration::FPRoundingMode, spv::FPRoundingMode::RTZ); | ||||
|         break; | ||||
|     } | ||||
|     if (flags.fmz_mode != IR::FmzMode::FTZ) { | ||||
|         throw NotImplementedException("Denorm management not implemented"); | ||||
|     } | ||||
|     return op; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -558,53 +558,53 @@ F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) { | ||||
| F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value, FpControl control) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::F16: | ||||
|         return Inst<F16>(Opcode::FPRoundEven16, value); | ||||
|         return Inst<F16>(Opcode::FPRoundEven16, Flags{control}, value); | ||||
|     case Type::F32: | ||||
|         return Inst<F32>(Opcode::FPRoundEven32, value); | ||||
|         return Inst<F32>(Opcode::FPRoundEven32, Flags{control}, value); | ||||
|     case Type::F64: | ||||
|         return Inst<F64>(Opcode::FPRoundEven64, value); | ||||
|         return Inst<F64>(Opcode::FPRoundEven64, Flags{control}, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| F16F32F64 IREmitter::FPFloor(const F16F32F64& value) { | ||||
| F16F32F64 IREmitter::FPFloor(const F16F32F64& value, FpControl control) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::F16: | ||||
|         return Inst<F16>(Opcode::FPFloor16, value); | ||||
|         return Inst<F16>(Opcode::FPFloor16, Flags{control}, value); | ||||
|     case Type::F32: | ||||
|         return Inst<F32>(Opcode::FPFloor32, value); | ||||
|         return Inst<F32>(Opcode::FPFloor32, Flags{control}, value); | ||||
|     case Type::F64: | ||||
|         return Inst<F64>(Opcode::FPFloor64, value); | ||||
|         return Inst<F64>(Opcode::FPFloor64, Flags{control}, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| F16F32F64 IREmitter::FPCeil(const F16F32F64& value) { | ||||
| F16F32F64 IREmitter::FPCeil(const F16F32F64& value, FpControl control) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::F16: | ||||
|         return Inst<F16>(Opcode::FPCeil16, value); | ||||
|         return Inst<F16>(Opcode::FPCeil16, Flags{control}, value); | ||||
|     case Type::F32: | ||||
|         return Inst<F32>(Opcode::FPCeil32, value); | ||||
|         return Inst<F32>(Opcode::FPCeil32, Flags{control}, value); | ||||
|     case Type::F64: | ||||
|         return Inst<F64>(Opcode::FPCeil64, value); | ||||
|         return Inst<F64>(Opcode::FPCeil64, Flags{control}, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
| } | ||||
|  | ||||
| F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) { | ||||
| F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) { | ||||
|     switch (value.Type()) { | ||||
|     case Type::F16: | ||||
|         return Inst<F16>(Opcode::FPTrunc16, value); | ||||
|         return Inst<F16>(Opcode::FPTrunc16, Flags{control}, value); | ||||
|     case Type::F32: | ||||
|         return Inst<F32>(Opcode::FPTrunc32, value); | ||||
|         return Inst<F32>(Opcode::FPTrunc32, Flags{control}, value); | ||||
|     case Type::F64: | ||||
|         return Inst<F64>(Opcode::FPTrunc64, value); | ||||
|         return Inst<F64>(Opcode::FPTrunc64, Flags{control}, value); | ||||
|     default: | ||||
|         ThrowInvalidType(value.Type()); | ||||
|     } | ||||
|   | ||||
| @@ -129,10 +129,10 @@ public: | ||||
|     [[nodiscard]] F32 FPSinNotReduced(const F32& value); | ||||
|     [[nodiscard]] F32 FPSqrt(const F32& value); | ||||
|     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {}); | ||||
|  | ||||
|     [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); | ||||
|     [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); | ||||
|   | ||||
| @@ -4,15 +4,19 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include "common/common_types.h" | ||||
|  | ||||
| namespace Shader::IR { | ||||
|  | ||||
| enum class FmzMode : u8 { | ||||
|     None, // Denorms are not flushed, NAN is propagated (nouveau) | ||||
|     DontCare, // Not specified for this instruction | ||||
|     FTZ,      // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK) | ||||
|     FMZ,      // Flush denorms to zero, x * 0 == 0 (D3D9) | ||||
|     None,     // Denorms are not flushed, NAN is propagated (nouveau) | ||||
| }; | ||||
|  | ||||
| enum class FpRounding : u8 { | ||||
|     DontCare, // Not specified for this instruction | ||||
|     RN,       // Round to nearest even, | ||||
|     RM,       // Round towards negative infinity | ||||
|     RP,       // Round towards positive infinity | ||||
| @@ -21,8 +25,9 @@ enum class FpRounding : u8 { | ||||
|  | ||||
| struct FpControl { | ||||
|     bool no_contraction{false}; | ||||
|     FpRounding rounding{FpRounding::RN}; | ||||
|     FmzMode fmz_mode{FmzMode::FTZ}; | ||||
|     FpRounding rounding{FpRounding::DontCare}; | ||||
|     FmzMode fmz_mode{FmzMode::DontCare}; | ||||
| }; | ||||
| static_assert(sizeof(FpControl) <= sizeof(u32)); | ||||
|  | ||||
| } // namespace Shader::IR | ||||
|   | ||||
| @@ -81,17 +81,28 @@ void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) { | ||||
|     // F2I is used to convert from a floating point value to an integer | ||||
|     const F2I f2i{insn}; | ||||
|  | ||||
|     const bool denorm_cares{f2i.src_format != SrcFormat::F16 && f2i.src_format != SrcFormat::F64 && | ||||
|                             f2i.dest_format != DestFormat::I64}; | ||||
|     IR::FmzMode fmz_mode{IR::FmzMode::DontCare}; | ||||
|     if (denorm_cares) { | ||||
|         fmz_mode = f2i.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None; | ||||
|     } | ||||
|     const IR::FpControl fp_control{ | ||||
|         .no_contraction{true}, | ||||
|         .rounding{IR::FpRounding::DontCare}, | ||||
|         .fmz_mode{fmz_mode}, | ||||
|     }; | ||||
|     const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)}; | ||||
|     const IR::F16F32F64 rounded_value{[&] { | ||||
|         switch (f2i.rounding) { | ||||
|         case Rounding::Round: | ||||
|             return v.ir.FPRoundEven(op_a); | ||||
|             return v.ir.FPRoundEven(op_a, fp_control); | ||||
|         case Rounding::Floor: | ||||
|             return v.ir.FPFloor(op_a); | ||||
|             return v.ir.FPFloor(op_a, fp_control); | ||||
|         case Rounding::Ceil: | ||||
|             return v.ir.FPCeil(op_a); | ||||
|             return v.ir.FPCeil(op_a, fp_control); | ||||
|         case Rounding::Trunc: | ||||
|             return v.ir.FPTrunc(op_a); | ||||
|             return v.ir.FPTrunc(op_a, fp_control); | ||||
|         default: | ||||
|             throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); | ||||
|         } | ||||
|   | ||||
| @@ -2,23 +2,28 @@ | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include "shader_recompiler/frontend/ir/microinstruction.h" | ||||
| #include "shader_recompiler/frontend/ir/modifiers.h" | ||||
| #include "shader_recompiler/frontend/ir/program.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
|  | ||||
| namespace Shader::Optimization { | ||||
| namespace { | ||||
| void AddConstantBufferDescriptor(Info& info, u32 index) { | ||||
|     auto& descriptor{info.constant_buffers.at(index)}; | ||||
|     if (descriptor) { | ||||
| void AddConstantBufferDescriptor(Info& info, u32 index, u32 count) { | ||||
|     if (count != 1) { | ||||
|         throw NotImplementedException("Constant buffer descriptor indexing"); | ||||
|     } | ||||
|     if ((info.constant_buffer_mask & (1U << index)) != 0) { | ||||
|         return; | ||||
|     } | ||||
|     descriptor = &info.constant_buffer_descriptors.emplace_back(Info::ConstantBufferDescriptor{ | ||||
|     info.constant_buffer_mask |= 1U << index; | ||||
|     info.constant_buffer_descriptors.push_back({ | ||||
|         .index{index}, | ||||
|         .count{1}, | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void Visit(Info& info, IR::Inst& inst) { | ||||
| void VisitUsages(Info& info, IR::Inst& inst) { | ||||
|     switch (inst.Opcode()) { | ||||
|     case IR::Opcode::WorkgroupId: | ||||
|         info.uses_workgroup_id = true; | ||||
| @@ -72,7 +77,7 @@ void Visit(Info& info, IR::Inst& inst) { | ||||
|         break; | ||||
|     case IR::Opcode::GetCbuf: | ||||
|         if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) { | ||||
|             AddConstantBufferDescriptor(info, index.U32()); | ||||
|             AddConstantBufferDescriptor(info, index.U32(), 1); | ||||
|         } else { | ||||
|             throw NotImplementedException("Constant buffer with non-immediate index"); | ||||
|         } | ||||
| @@ -81,6 +86,60 @@ void Visit(Info& info, IR::Inst& inst) { | ||||
|         break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void VisitFpModifiers(Info& info, IR::Inst& inst) { | ||||
|     switch (inst.Opcode()) { | ||||
|     case IR::Opcode::FPAdd16: | ||||
|     case IR::Opcode::FPFma16: | ||||
|     case IR::Opcode::FPMul16: | ||||
|     case IR::Opcode::FPRoundEven16: | ||||
|     case IR::Opcode::FPFloor16: | ||||
|     case IR::Opcode::FPCeil16: | ||||
|     case IR::Opcode::FPTrunc16: { | ||||
|         const auto control{inst.Flags<IR::FpControl>()}; | ||||
|         switch (control.fmz_mode) { | ||||
|         case IR::FmzMode::DontCare: | ||||
|             break; | ||||
|         case IR::FmzMode::FTZ: | ||||
|         case IR::FmzMode::FMZ: | ||||
|             info.uses_fp16_denorms_flush = true; | ||||
|             break; | ||||
|         case IR::FmzMode::None: | ||||
|             info.uses_fp16_denorms_preserve = true; | ||||
|             break; | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case IR::Opcode::FPAdd32: | ||||
|     case IR::Opcode::FPFma32: | ||||
|     case IR::Opcode::FPMul32: | ||||
|     case IR::Opcode::FPRoundEven32: | ||||
|     case IR::Opcode::FPFloor32: | ||||
|     case IR::Opcode::FPCeil32: | ||||
|     case IR::Opcode::FPTrunc32: { | ||||
|         const auto control{inst.Flags<IR::FpControl>()}; | ||||
|         switch (control.fmz_mode) { | ||||
|         case IR::FmzMode::DontCare: | ||||
|             break; | ||||
|         case IR::FmzMode::FTZ: | ||||
|         case IR::FmzMode::FMZ: | ||||
|             info.uses_fp32_denorms_flush = true; | ||||
|             break; | ||||
|         case IR::FmzMode::None: | ||||
|             info.uses_fp32_denorms_preserve = true; | ||||
|             break; | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void Visit(Info& info, IR::Inst& inst) { | ||||
|     VisitUsages(info, inst); | ||||
|     VisitFpModifiers(info, inst); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void CollectShaderInfoPass(IR::Program& program) { | ||||
|   | ||||
| @@ -351,7 +351,6 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program) { | ||||
|             .cbuf_offset{storage_buffer.offset}, | ||||
|             .count{1}, | ||||
|         }); | ||||
|         info.storage_buffers[storage_index] = &info.storage_buffers_descriptors.back(); | ||||
|         ++storage_index; | ||||
|     } | ||||
|     for (const StorageInst& storage_inst : to_replace) { | ||||
|   | ||||
| @@ -60,6 +60,17 @@ void RunDatabase() { | ||||
|     fmt::print(stdout, "{} ms", duration_cast<milliseconds>(t - t0).count() / double(N)); | ||||
| } | ||||
|  | ||||
| static constexpr Profile PROFILE{ | ||||
|     .unified_descriptor_binding = true, | ||||
|     .support_float_controls = true, | ||||
|     .support_separate_denorm_behavior = true, | ||||
|     .support_separate_rounding_mode = true, | ||||
|     .support_fp16_denorm_preserve = true, | ||||
|     .support_fp32_denorm_preserve = true, | ||||
|     .support_fp16_denorm_flush = true, | ||||
|     .support_fp32_denorm_flush = true, | ||||
| }; | ||||
|  | ||||
| int main() { | ||||
|     // RunDatabase(); | ||||
|  | ||||
| @@ -76,7 +87,7 @@ int main() { | ||||
|     fmt::print(stdout, "{}\n", cfg.Dot()); | ||||
|     IR::Program program{TranslateProgram(inst_pool, block_pool, env, cfg)}; | ||||
|     fmt::print(stdout, "{}\n", IR::DumpProgram(program)); | ||||
|     const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(env, program)}; | ||||
|     const std::vector<u32> spirv{Backend::SPIRV::EmitSPIRV(PROFILE, env, program)}; | ||||
|     std::FILE* const file{std::fopen("D:\\shader.spv", "wb")}; | ||||
|     std::fwrite(spirv.data(), spirv.size(), sizeof(u32), file); | ||||
|     std::fclose(file); | ||||
|   | ||||
| @@ -7,7 +7,14 @@ | ||||
| namespace Shader { | ||||
|  | ||||
| struct Profile { | ||||
|     bool unified_descriptor_binding; | ||||
|     bool unified_descriptor_binding{}; | ||||
|     bool support_float_controls{}; | ||||
|     bool support_separate_denorm_behavior{}; | ||||
|     bool support_separate_rounding_mode{}; | ||||
|     bool support_fp16_denorm_preserve{}; | ||||
|     bool support_fp32_denorm_preserve{}; | ||||
|     bool support_fp16_denorm_flush{}; | ||||
|     bool support_fp32_denorm_flush{}; | ||||
| }; | ||||
|  | ||||
| } // namespace Shader | ||||
|   | ||||
| @@ -14,14 +14,15 @@ | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address) { | ||||
| std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, Environment& env, | ||||
|                                                  u32 start_address) { | ||||
|     ObjectPool<Maxwell::Flow::Block> flow_block_pool; | ||||
|     ObjectPool<IR::Inst> inst_pool; | ||||
|     ObjectPool<IR::Block> block_pool; | ||||
|  | ||||
|     Maxwell::Flow::CFG cfg{env, flow_block_pool, start_address}; | ||||
|     IR::Program program{Maxwell::TranslateProgram(inst_pool, block_pool, env, cfg)}; | ||||
|     return {std::move(program.info), Backend::SPIRV::EmitSPIRV(env, program)}; | ||||
|     return {std::move(program.info), Backend::SPIRV::EmitSPIRV(profile, env, program)}; | ||||
| } | ||||
|  | ||||
| } // namespace Shader | ||||
|   | ||||
| @@ -9,10 +9,12 @@ | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/environment.h" | ||||
| #include "shader_recompiler/profile.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| [[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(Environment& env, u32 start_address); | ||||
| [[nodiscard]] std::pair<Info, std::vector<u32>> RecompileSPIRV(const Profile& profile, | ||||
|                                                                Environment& env, u32 start_address); | ||||
|  | ||||
| } // namespace Shader | ||||
|   | ||||
| @@ -31,14 +31,15 @@ struct Info { | ||||
|     bool uses_local_invocation_id{}; | ||||
|     bool uses_fp16{}; | ||||
|     bool uses_fp64{}; | ||||
|     bool uses_fp16_denorms_flush{}; | ||||
|     bool uses_fp16_denorms_preserve{}; | ||||
|     bool uses_fp32_denorms_flush{}; | ||||
|     bool uses_fp32_denorms_preserve{}; | ||||
|  | ||||
|     u32 constant_buffer_mask{}; | ||||
|  | ||||
|     std::array<ConstantBufferDescriptor*, MAX_CBUFS> constant_buffers{}; | ||||
|     boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS> | ||||
|         constant_buffer_descriptors; | ||||
|  | ||||
|     std::array<StorageBufferDescriptor*, MAX_SSBOS> storage_buffers{}; | ||||
|     boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors; | ||||
| }; | ||||
|  | ||||
|   | ||||
| @@ -131,12 +131,7 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip | ||||
|       })} {} | ||||
|  | ||||
| void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { | ||||
|     u32 enabled_uniforms{}; | ||||
|     for (const auto& desc : info.constant_buffer_descriptors) { | ||||
|         enabled_uniforms |= ((1ULL << desc.count) - 1) << desc.index; | ||||
|     } | ||||
|     buffer_cache.SetEnabledComputeUniformBuffers(enabled_uniforms); | ||||
|  | ||||
|     buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); | ||||
|     buffer_cache.UnbindComputeStorageBuffers(); | ||||
|     size_t index{}; | ||||
|     for (const auto& desc : info.storage_buffers_descriptors) { | ||||
|   | ||||
| @@ -177,7 +177,20 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderInfo* shader_info) { | ||||
|     if (const std::optional<u128> cached_hash{env.Analyze(qmd.program_start)}) { | ||||
|         // TODO: Load from cache | ||||
|     } | ||||
|     const auto [info, code]{Shader::RecompileSPIRV(env, qmd.program_start)}; | ||||
|     const auto& float_control{device.FloatControlProperties()}; | ||||
|     const Shader::Profile profile{ | ||||
|         .unified_descriptor_binding = true, | ||||
|         .support_float_controls = true, | ||||
|         .support_separate_denorm_behavior = float_control.denormBehaviorIndependence == | ||||
|                                             VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||||
|         .support_separate_rounding_mode = | ||||
|             float_control.roundingModeIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR, | ||||
|         .support_fp16_denorm_preserve = float_control.shaderDenormPreserveFloat16 != VK_FALSE, | ||||
|         .support_fp32_denorm_preserve = float_control.shaderDenormPreserveFloat32 != VK_FALSE, | ||||
|         .support_fp16_denorm_flush = float_control.shaderDenormFlushToZeroFloat16 != VK_FALSE, | ||||
|         .support_fp32_denorm_flush = float_control.shaderDenormFlushToZeroFloat32 != VK_FALSE, | ||||
|     }; | ||||
|     const auto [info, code]{Shader::RecompileSPIRV(profile, env, qmd.program_start)}; | ||||
|  | ||||
|     FILE* file = fopen("D:\\shader.spv", "wb"); | ||||
|     fwrite(code.data(), 4, code.size(), file); | ||||
|   | ||||
| @@ -43,6 +43,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ | ||||
|     VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, | ||||
|     VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, | ||||
|     VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, | ||||
|     VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME, | ||||
|     VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, | ||||
|     VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, | ||||
|     VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, | ||||
| @@ -200,6 +201,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|     CheckSuitability(surface != nullptr); | ||||
|     SetupFamilies(surface); | ||||
|     SetupFeatures(); | ||||
|     SetupProperties(); | ||||
|  | ||||
|     const auto queue_cis = GetDeviceQueueCreateInfos(); | ||||
|     const std::vector extensions = LoadExtensions(surface != nullptr); | ||||
| @@ -426,8 +428,6 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|  | ||||
|     graphics_queue = logical.GetQueue(graphics_family); | ||||
|     present_queue = logical.GetQueue(present_family); | ||||
|  | ||||
|     use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); | ||||
| } | ||||
|  | ||||
| Device::~Device() = default; | ||||
| @@ -600,7 +600,7 @@ void Device::CheckSuitability(bool requires_swapchain) const { | ||||
|     VkPhysicalDeviceRobustness2FeaturesEXT robustness2{}; | ||||
|     robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; | ||||
|  | ||||
|     VkPhysicalDeviceFeatures2 features2{}; | ||||
|     VkPhysicalDeviceFeatures2KHR features2{}; | ||||
|     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; | ||||
|     features2.pNext = &robustness2; | ||||
|  | ||||
| @@ -684,7 +684,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) { | ||||
|                  true); | ||||
|         } | ||||
|     } | ||||
|     VkPhysicalDeviceFeatures2KHR features; | ||||
|     VkPhysicalDeviceFeatures2KHR features{}; | ||||
|     features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR; | ||||
|  | ||||
|     VkPhysicalDeviceProperties2KHR physical_properties; | ||||
| @@ -806,11 +806,21 @@ void Device::SetupFamilies(VkSurfaceKHR surface) { | ||||
| } | ||||
|  | ||||
| void Device::SetupFeatures() { | ||||
|     const auto supported_features{physical.GetFeatures()}; | ||||
|     is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat; | ||||
|     is_shader_storage_image_multisample = supported_features.shaderStorageImageMultisample; | ||||
|     const VkPhysicalDeviceFeatures features{physical.GetFeatures()}; | ||||
|     is_formatless_image_load_supported = features.shaderStorageImageReadWithoutFormat; | ||||
|     is_shader_storage_image_multisample = features.shaderStorageImageMultisample; | ||||
|     is_blit_depth_stencil_supported = TestDepthStencilBlits(); | ||||
|     is_optimal_astc_supported = IsOptimalAstcSupported(supported_features); | ||||
|     is_optimal_astc_supported = IsOptimalAstcSupported(features); | ||||
| } | ||||
|  | ||||
| void Device::SetupProperties() { | ||||
|     float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR; | ||||
|  | ||||
|     VkPhysicalDeviceProperties2KHR properties2{}; | ||||
|     properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; | ||||
|     properties2.pNext = &float_controls; | ||||
|  | ||||
|     physical.GetProperties2KHR(properties2); | ||||
| } | ||||
|  | ||||
| void Device::CollectTelemetryParameters() { | ||||
|   | ||||
| @@ -128,6 +128,11 @@ public: | ||||
|         return properties.limits.maxComputeSharedMemorySize; | ||||
|     } | ||||
|  | ||||
|     /// Returns float control properties of the device. | ||||
|     const VkPhysicalDeviceFloatControlsPropertiesKHR& FloatControlProperties() const { | ||||
|         return float_controls; | ||||
|     } | ||||
|  | ||||
|     /// Returns true if ASTC is natively supported. | ||||
|     bool IsOptimalAstcSupported() const { | ||||
|         return is_optimal_astc_supported; | ||||
| @@ -223,11 +228,6 @@ public: | ||||
|         return reported_extensions; | ||||
|     } | ||||
|  | ||||
|     /// Returns true if the setting for async shader compilation is enabled. | ||||
|     bool UseAsynchronousShaders() const { | ||||
|         return use_asynchronous_shaders; | ||||
|     } | ||||
|  | ||||
|     u64 GetDeviceLocalMemory() const { | ||||
|         return device_access_memory; | ||||
|     } | ||||
| @@ -245,6 +245,9 @@ private: | ||||
|     /// Sets up device features. | ||||
|     void SetupFeatures(); | ||||
|  | ||||
|     /// Sets up device properties. | ||||
|     void SetupProperties(); | ||||
|  | ||||
|     /// Collects telemetry information from the device. | ||||
|     void CollectTelemetryParameters(); | ||||
|  | ||||
| @@ -271,6 +274,7 @@ private: | ||||
|     vk::DeviceDispatch dld;                                      ///< Device function pointers. | ||||
|     vk::PhysicalDevice physical;                                 ///< Physical device. | ||||
|     VkPhysicalDeviceProperties properties;                       ///< Device properties. | ||||
|     VkPhysicalDeviceFloatControlsPropertiesKHR float_controls{}; ///< Float control properties. | ||||
|     vk::Device logical;                                          ///< Logical device. | ||||
|     vk::Queue graphics_queue;                                    ///< Main graphics queue. | ||||
|     vk::Queue present_queue;                                     ///< Main present queue. | ||||
| @@ -301,9 +305,6 @@ private: | ||||
|     bool has_renderdoc{};                       ///< Has RenderDoc attached | ||||
|     bool has_nsight_graphics{};                 ///< Has Nsight Graphics attached | ||||
|  | ||||
|     // Asynchronous Graphics Pipeline setting | ||||
|     bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline | ||||
|  | ||||
|     // Telemetry parameters | ||||
|     std::string vendor_name;                      ///< Device's driver name. | ||||
|     std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. | ||||
|   | ||||
| @@ -311,8 +311,6 @@ const char* ToString(VkResult result) noexcept { | ||||
|         return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; | ||||
|     case VkResult::VK_ERROR_UNKNOWN: | ||||
|         return "VK_ERROR_UNKNOWN"; | ||||
|     case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: | ||||
|         return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; | ||||
|     case VkResult::VK_THREAD_IDLE_KHR: | ||||
|         return "VK_THREAD_IDLE_KHR"; | ||||
|     case VkResult::VK_THREAD_DONE_KHR: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user