From 96f0746ab9b73f1d0cfb28057d094802e8e80dca Mon Sep 17 00:00:00 2001 From: GPUCode Date: Mon, 26 Dec 2022 15:50:11 +0200 Subject: [PATCH] HACK: Skip normquat lerp to drop geometry shader requirement --- .../rasterizer_cache/surface_params.cpp | 2 + .../renderer_vulkan/vk_pipeline_cache.cpp | 13 +-- .../renderer_vulkan/vk_rasterizer.cpp | 28 ++++++- .../renderer_vulkan/vk_shader_gen.cpp | 81 ++++++++++++++++++- .../renderer_vulkan/vk_shader_gen.h | 22 ++++- .../renderer_vulkan/vk_stream_buffer.cpp | 3 +- 6 files changed, 132 insertions(+), 17 deletions(-) diff --git a/src/video_core/rasterizer_cache/surface_params.cpp b/src/video_core/rasterizer_cache/surface_params.cpp index 50c73125c..f91f85806 100644 --- a/src/video_core/rasterizer_cache/surface_params.cpp +++ b/src/video_core/rasterizer_cache/surface_params.cpp @@ -70,6 +70,7 @@ Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surfac if (is_tiled) { const int x0 = (begin_pixel_index % (stride * 8)) / 8; const int y0 = (begin_pixel_index / (stride * 8)) * 8; + // Top to bottom return Common::Rectangle(x0, height - y0, x0 + sub_surface.width, height - (y0 + sub_surface.height)); @@ -77,6 +78,7 @@ Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surfac const int x0 = begin_pixel_index % stride; const int y0 = begin_pixel_index / stride; + // Bottom to top return Common::Rectangle(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0); } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2d66e87e7..5fe3db7bc 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -205,7 +205,7 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) { bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs, Pica::Shader::ShaderSetup& setup, const VertexLayout& layout) { - PicaVSConfig config{regs.vs, setup}; + PicaVSConfig config{regs.rasterizer, regs.vs, setup}; u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES; for (u32 i = 0; i < layout.attribute_count; i++) { @@ -243,14 +243,15 @@ void PipelineCache::UseTrivialVertexShader() { } void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) { + return UseTrivialGeometryShader(); const PicaFixedGSConfig gs_config{regs}; + const vk::ShaderModule handle = + fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, + instance.GetDevice(), ShaderOptimization::Debug); - scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) { - vk::ShaderModule handle = - fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry, - instance.GetDevice(), ShaderOptimization::High); + scheduler.Record([this, handle, hash = gs_config.Hash()](vk::CommandBuffer, vk::CommandBuffer) { current_shaders[ProgramType::GS] = handle; - shader_hashes[ProgramType::GS] = gs_config.Hash(); + shader_hashes[ProgramType::GS] = hash; }); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9cc570ea4..3aebab662 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -451,7 +451,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { const Common::Rectangle viewport_rect_unscaled = regs.rasterizer.GetViewportRect(); - auto [color_surface, depth_surface, surfaces_rect] = + const auto [color_surface, depth_surface, surfaces_rect] = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); if (!color_surface && shadow_rendering) { @@ -680,13 +680,28 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { pipeline_cache.SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(), draw_rect.GetHeight()); + // Sometimes the dimentions of the color and depth framebuffers might not be the same + // In that case select the minimum one to abide by the spec + u32 width = 0; + u32 height = 0; + if (color_surface && depth_surface) { + width = std::min(color_surface->GetScaledWidth(), depth_surface->GetScaledWidth()); + height = std::min(color_surface->GetScaledHeight(), depth_surface->GetScaledHeight()); + } else if (color_surface) { + width = color_surface->GetScaledWidth(); + height = color_surface->GetScaledHeight(); + } else if (depth_surface) { + width = depth_surface->GetScaledWidth(); + height = depth_surface->GetScaledHeight(); + } + const FramebufferInfo framebuffer_info = { .color = color_surface ? color_surface->GetFramebufferView() : VK_NULL_HANDLE, .depth = depth_surface ? depth_surface->GetFramebufferView() : VK_NULL_HANDLE, .renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment, pipeline_info.depth_attachment, false), - .width = surfaces_rect.GetWidth(), - .height = surfaces_rect.GetHeight()}; + .width = width, + .height = height}; auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{}); if (new_framebuffer) { @@ -757,6 +772,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) { depth_surface); } + static int submit_threshold = 80; + submit_threshold--; + if (!submit_threshold) { + submit_threshold = 80; + scheduler.Flush(); + } + return succeeded; } diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp index a867b3ccd..ba7a620c6 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp @@ -247,7 +247,8 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) { state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0); } -void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { +void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer, + const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { program_hash = setup.GetProgramCodeHash(); swizzle_hash = setup.GetSwizzleDataHash(); main_offset = regs.main_offset; @@ -259,6 +260,27 @@ void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::Sh for (int reg : Common::BitSet(regs.output_mask)) { output_map[reg] = num_outputs++; } + + vs_output_attributes = Common::BitSet(regs.output_mask).Count(); + gs_output_attributes = vs_output_attributes; + + semantic_maps.fill({16, 0}); + for (u32 attrib = 0; attrib < rasterizer.vs_output_total; ++attrib) { + const std::array semantics{ + rasterizer.vs_output_attributes[attrib].map_x.Value(), + rasterizer.vs_output_attributes[attrib].map_y.Value(), + rasterizer.vs_output_attributes[attrib].map_z.Value(), + rasterizer.vs_output_attributes[attrib].map_w.Value(), + }; + for (u32 comp = 0; comp < 4; ++comp) { + const auto semantic = semantics[comp]; + if (static_cast(semantic) < 24) { + semantic_maps[static_cast(semantic)] = {attrib, comp}; + } else if (semantic != VSOutputAttributes::INVALID) { + LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic); + } + } + } } void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) { @@ -1660,6 +1682,8 @@ layout (set = 0, binding = 0, std140) uniform vs_config { }; )"; + out += GetVertexInterfaceDeclaration(true); + // input attributes declaration for (std::size_t i = 0; i < used_regs.size(); ++i) { if (used_regs[i]) { @@ -1732,14 +1756,65 @@ layout (set = 0, binding = 0, std140) uniform vs_config { // output attributes declaration for (u32 i = 0; i < config.state.num_outputs; ++i) { - out += fmt::format("layout(location = {}) out vec4 vs_out_attr{};\n", i, i); + out += fmt::format("vec4 vs_out_attr{};\n", i, i); } + const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string { + const u32 slot = static_cast(slot_semantic); + const u32 attrib = config.semantic_maps[slot].attribute_index; + const u32 comp = config.semantic_maps[slot].component_index; + if (attrib < config.gs_output_attributes) { + return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]); + } + return "0.0"; + }; + + out += "vec4 GetVertexQuaternion() {\n"; + out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " + + semantic(VSOutputAttributes::QUATERNION_Y) + ", " + + semantic(VSOutputAttributes::QUATERNION_Z) + ", " + + semantic(VSOutputAttributes::QUATERNION_W) + ");\n"; + out += "}\n\n"; + + out += "void EmitVtx() {\n"; + out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " + + semantic(VSOutputAttributes::POSITION_Y) + ", " + + semantic(VSOutputAttributes::POSITION_Z) + ", " + + semantic(VSOutputAttributes::POSITION_W) + ");\n"; + out += " gl_Position = vtx_pos;\n"; + out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n"; + //out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n"; + //out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0 + //out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n"; + //out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n"; + + // This is inaccurate! + out += " normquat = GetVertexQuaternion();\n"; + + out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " + + semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) + + ", " + semantic(VSOutputAttributes::COLOR_A) + ");\n"; + out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n"; + + out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n"; + out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n"; + + out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n"; + out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " + + semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) + + ");\n\n"; + + out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " + + semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n"; + out += "}\n"; + out += "\nvoid main() {\n"; for (u32 i = 0; i < config.state.num_outputs; ++i) { out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i); } - out += "\n exec_shader();\n}\n\n"; + out += "\n exec_shader();\nEmitVtx();\n}\n\n"; out += program_source; diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.h b/src/video_core/renderer_vulkan/vk_shader_gen.h index 189fb34bc..1b00bf9e0 100644 --- a/src/video_core/renderer_vulkan/vk_shader_gen.h +++ b/src/video_core/renderer_vulkan/vk_shader_gen.h @@ -150,7 +150,8 @@ struct PicaFSConfig : Common::HashableStruct { * PICA vertex/geometry shader. */ struct PicaShaderConfigCommon { - void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); + void Init(const Pica::RasterizerRegs& rasterizer, + const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup); u64 program_hash; u64 swizzle_hash; @@ -163,6 +164,20 @@ struct PicaShaderConfigCommon { // output_map[output register index] -> output attribute index std::array output_map; + + + + + u32 vs_output_attributes; + u32 gs_output_attributes; + + struct SemanticMap { + u32 attribute_index; + u32 component_index; + }; + + // semantic_maps[semantic name] -> GS output attribute index + component index + std::array semantic_maps; }; /** @@ -170,8 +185,9 @@ struct PicaShaderConfigCommon { * shader. */ struct PicaVSConfig : Common::HashableStruct { - explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { - state.Init(regs, setup); + explicit PicaVSConfig(const Pica::RasterizerRegs& rasterizer, + const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) { + state.Init(rasterizer, regs, setup); } explicit PicaVSConfig(const PicaShaderConfigCommon& conf) { state = conf; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index b60e2d12b..9c65a0b9b 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -134,7 +134,6 @@ std::tuple StreamBuffer::Map(u32 size) { Bucket& bucket = buckets[bucket_index]; - // If we reach bucket boundaries move over to the next one if (bucket.cursor + size > bucket_size) { bucket.gpu_tick = scheduler.CurrentTick(); MoveNextBucket(); @@ -165,8 +164,8 @@ void StreamBuffer::Flush() { ASSERT(flush_size <= bucket_size); ASSERT(flush_start + flush_size <= total_size); + // Ensure all staging writes are visible to the host memory domain if (flush_size > 0) [[likely]] { - // Ensure all staging writes are visible to the host memory domain VmaAllocator allocator = instance.GetAllocator(); vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size); if (gpu_buffer) {