shader: Add support for native 16-bit floats
This commit is contained in:
		| @@ -211,6 +211,7 @@ add_library(shader_recompiler STATIC | ||||
|     frontend/maxwell/translate/translate.h | ||||
|     frontend/maxwell/translate_program.cpp | ||||
|     frontend/maxwell/translate_program.h | ||||
|     host_translate_info.h | ||||
|     ir_opt/collect_shader_info_pass.cpp | ||||
|     ir_opt/constant_propagation_pass.cpp | ||||
|     ir_opt/dead_code_elimination_pass.cpp | ||||
|   | ||||
| @@ -13,6 +13,7 @@ | ||||
| #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate_program.h" | ||||
| #include "shader_recompiler/host_translate_info.h" | ||||
| #include "shader_recompiler/ir_opt/passes.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
| @@ -120,7 +121,7 @@ void AddNVNStorageBuffers(IR::Program& program) { | ||||
| } // Anonymous namespace | ||||
|  | ||||
| IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||||
|                              Environment& env, Flow::CFG& cfg) { | ||||
|                              Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | ||||
|     IR::Program program; | ||||
|     program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | ||||
|     program.blocks = GenerateBlocks(program.syntax_list); | ||||
| @@ -150,8 +151,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo | ||||
|     RemoveUnreachableBlocks(program); | ||||
|  | ||||
|     // Replace instructions before the SSA rewrite | ||||
|     Optimization::LowerFp16ToFp32(program); | ||||
|  | ||||
|     if (!host_info.support_float16) { | ||||
|         Optimization::LowerFp16ToFp32(program); | ||||
|     } | ||||
|     Optimization::SsaRewritePass(program); | ||||
|  | ||||
|     Optimization::GlobalMemoryToStorageBufferPass(program); | ||||
|   | ||||
| @@ -8,13 +8,14 @@ | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/program.h" | ||||
| #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||||
| #include "shader_recompiler/host_translate_info.h" | ||||
| #include "shader_recompiler/object_pool.h" | ||||
|  | ||||
| namespace Shader::Maxwell { | ||||
|  | ||||
| [[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, | ||||
|                                            ObjectPool<IR::Block>& block_pool, Environment& env, | ||||
|                                            Flow::CFG& cfg); | ||||
|                                            Flow::CFG& cfg, const HostTranslateInfo& host_info); | ||||
|  | ||||
| [[nodiscard]] IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b, | ||||
|                                                   Environment& env_vertex_b); | ||||
|   | ||||
							
								
								
									
										18
									
								
								src/shader_recompiler/host_translate_info.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								src/shader_recompiler/host_translate_info.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| // Copyright 2021 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| namespace Shader { | ||||
|  | ||||
| // Try to keep entries here to a minimum | ||||
| // They can accidentally change the cached information in a shader | ||||
|  | ||||
| /// Misc information about the host | ||||
| struct HostTranslateInfo { | ||||
|     bool support_float16{}; ///< True when the device supports 16-bit floats | ||||
|     bool support_int64{};   ///< True when the device supports 64-bit integers | ||||
| }; | ||||
|  | ||||
| } // namespace Shader | ||||
| @@ -201,6 +201,10 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | ||||
|           .has_gl_component_indexing_bug = device.HasComponentIndexingBug(), | ||||
|           .has_gl_precise_bug = device.HasPreciseBug(), | ||||
|           .ignore_nan_fp_comparisons = true, | ||||
|       }, | ||||
|       host_info{ | ||||
|           .support_float16 = false, | ||||
|           .support_int64 = true, | ||||
|       } { | ||||
|     if (use_asynchronous_shaders) { | ||||
|         workers = CreateWorkers(); | ||||
| @@ -373,15 +377,15 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline( | ||||
|         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | ||||
|         if (!uses_vertex_a || index != 1) { | ||||
|             // Normal path | ||||
|             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||||
|             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); | ||||
|  | ||||
|             for (const auto& desc : programs[index].info.storage_buffers_descriptors) { | ||||
|                 total_storage_buffers += desc.count; | ||||
|             } | ||||
|         } else { | ||||
|             // VertexB path when VertexA is present. | ||||
|             Shader::IR::Program& program_va{programs[0]}; | ||||
|             Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||||
|             auto& program_va{programs[0]}; | ||||
|             auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||||
|             for (const auto& desc : program_vb.info.storage_buffers_descriptors) { | ||||
|                 total_storage_buffers += desc.count; | ||||
|             } | ||||
| @@ -449,7 +453,7 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline( | ||||
|     LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | ||||
|  | ||||
|     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | ||||
|     Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||||
|     auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||||
|  | ||||
|     u32 num_storage_buffers{}; | ||||
|     for (const auto& desc : program.info.storage_buffers_descriptors) { | ||||
|   | ||||
| @@ -14,6 +14,7 @@ | ||||
| #include "common/common_types.h" | ||||
| #include "common/thread_worker.h" | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
| #include "shader_recompiler/host_translate_info.h" | ||||
| #include "shader_recompiler/object_pool.h" | ||||
| #include "video_core/engines/shader_type.h" | ||||
| #include "video_core/renderer_opengl/gl_compute_pipeline.h" | ||||
| @@ -82,6 +83,8 @@ private: | ||||
|     std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache; | ||||
|  | ||||
|     Shader::Profile profile; | ||||
|     Shader::HostTranslateInfo host_info; | ||||
|  | ||||
|     std::filesystem::path shader_cache_filename; | ||||
|     std::unique_ptr<ShaderWorker> workers; | ||||
| }; | ||||
|   | ||||
| @@ -307,6 +307,10 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw | ||||
|         .has_broken_signed_operations = false, | ||||
|         .ignore_nan_fp_comparisons = false, | ||||
|     }; | ||||
|     host_info = Shader::HostTranslateInfo{ | ||||
|         .support_float16 = device.IsFloat16Supported(), | ||||
|         .support_int64 = true, | ||||
|     }; | ||||
| } | ||||
|  | ||||
| PipelineCache::~PipelineCache() = default; | ||||
| @@ -484,11 +488,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | ||||
|         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); | ||||
|         if (!uses_vertex_a || index != 1) { | ||||
|             // Normal path | ||||
|             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||||
|             programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg, host_info); | ||||
|         } else { | ||||
|             // VertexB path when VertexA is present. | ||||
|             Shader::IR::Program& program_va{programs[0]}; | ||||
|             Shader::IR::Program program_vb{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||||
|             auto& program_va{programs[0]}; | ||||
|             auto program_vb{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||||
|             programs[index] = MergeDualVertexPrograms(program_va, program_vb, env); | ||||
|         } | ||||
|     } | ||||
| @@ -575,7 +579,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( | ||||
|     LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); | ||||
|  | ||||
|     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | ||||
|     Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||||
|     auto program{TranslateProgram(pools.inst, pools.block, env, cfg, host_info)}; | ||||
|     const std::vector<u32> code{EmitSPIRV(profile, program)}; | ||||
|     device.SaveShader(code); | ||||
|     vk::ShaderModule spv_module{BuildShader(device, code)}; | ||||
|   | ||||
| @@ -19,6 +19,7 @@ | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
| #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||||
| #include "shader_recompiler/host_translate_info.h" | ||||
| #include "shader_recompiler/object_pool.h" | ||||
| #include "shader_recompiler/profile.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| @@ -157,6 +158,8 @@ private: | ||||
|     ShaderPools main_pools; | ||||
|  | ||||
|     Shader::Profile profile; | ||||
|     Shader::HostTranslateInfo host_info; | ||||
|  | ||||
|     std::filesystem::path pipeline_cache_filename; | ||||
|  | ||||
|     Common::ThreadWorker workers; | ||||
|   | ||||
| @@ -497,8 +497,8 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | ||||
|     } | ||||
|     if (is_float16_supported && driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { | ||||
|         // Intel's compiler crashes when using fp16 on Astral Chain, disable it for the time being. | ||||
|         // LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); | ||||
|         // is_float16_supported = false; | ||||
|         LOG_WARNING(Render_Vulkan, "Blacklisting Intel proprietary from float16 math"); | ||||
|         is_float16_supported = false; | ||||
|     } | ||||
|  | ||||
|     graphics_queue = logical.GetQueue(graphics_family); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user