HACK: Skip normquat lerp to drop geometry shader requirement

This commit is contained in:
GPUCode
2022-12-26 15:50:11 +02:00
parent 09dcd48257
commit 96f0746ab9
6 changed files with 132 additions and 17 deletions

View File

@@ -70,6 +70,7 @@ Common::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surfac
if (is_tiled) {
const int x0 = (begin_pixel_index % (stride * 8)) / 8;
const int y0 = (begin_pixel_index / (stride * 8)) * 8;
// Top to bottom
return Common::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width,
height - (y0 + sub_surface.height));
@@ -77,6 +78,7 @@ Common::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surfac
const int x0 = begin_pixel_index % stride;
const int y0 = begin_pixel_index / stride;
// Bottom to top
return Common::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0);
}

View File

@@ -205,7 +205,7 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
Pica::Shader::ShaderSetup& setup,
const VertexLayout& layout) {
PicaVSConfig config{regs.vs, setup};
PicaVSConfig config{regs.rasterizer, regs.vs, setup};
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
for (u32 i = 0; i < layout.attribute_count; i++) {
@@ -243,14 +243,15 @@ void PipelineCache::UseTrivialVertexShader() {
}
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
return UseTrivialGeometryShader();
const PicaFixedGSConfig gs_config{regs};
const vk::ShaderModule handle =
fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::Debug);
scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) {
vk::ShaderModule handle =
fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
instance.GetDevice(), ShaderOptimization::High);
scheduler.Record([this, handle, hash = gs_config.Hash()](vk::CommandBuffer, vk::CommandBuffer) {
current_shaders[ProgramType::GS] = handle;
shader_hashes[ProgramType::GS] = gs_config.Hash();
shader_hashes[ProgramType::GS] = hash;
});
}

View File

@@ -451,7 +451,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
const Common::Rectangle viewport_rect_unscaled = regs.rasterizer.GetViewportRect();
auto [color_surface, depth_surface, surfaces_rect] =
const auto [color_surface, depth_surface, surfaces_rect] =
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);
if (!color_surface && shadow_rendering) {
@@ -680,13 +680,28 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
pipeline_cache.SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(),
draw_rect.GetHeight());
// Sometimes the dimentions of the color and depth framebuffers might not be the same
// In that case select the minimum one to abide by the spec
u32 width = 0;
u32 height = 0;
if (color_surface && depth_surface) {
width = std::min(color_surface->GetScaledWidth(), depth_surface->GetScaledWidth());
height = std::min(color_surface->GetScaledHeight(), depth_surface->GetScaledHeight());
} else if (color_surface) {
width = color_surface->GetScaledWidth();
height = color_surface->GetScaledHeight();
} else if (depth_surface) {
width = depth_surface->GetScaledWidth();
height = depth_surface->GetScaledHeight();
}
const FramebufferInfo framebuffer_info = {
.color = color_surface ? color_surface->GetFramebufferView() : VK_NULL_HANDLE,
.depth = depth_surface ? depth_surface->GetFramebufferView() : VK_NULL_HANDLE,
.renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment,
pipeline_info.depth_attachment, false),
.width = surfaces_rect.GetWidth(),
.height = surfaces_rect.GetHeight()};
.width = width,
.height = height};
auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{});
if (new_framebuffer) {
@@ -757,6 +772,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
depth_surface);
}
static int submit_threshold = 80;
submit_threshold--;
if (!submit_threshold) {
submit_threshold = 80;
scheduler.Flush();
}
return succeeded;
}

View File

@@ -247,7 +247,8 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
}
void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer,
const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
program_hash = setup.GetProgramCodeHash();
swizzle_hash = setup.GetSwizzleDataHash();
main_offset = regs.main_offset;
@@ -259,6 +260,27 @@ void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::Sh
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
output_map[reg] = num_outputs++;
}
vs_output_attributes = Common::BitSet<u32>(regs.output_mask).Count();
gs_output_attributes = vs_output_attributes;
semantic_maps.fill({16, 0});
for (u32 attrib = 0; attrib < rasterizer.vs_output_total; ++attrib) {
const std::array semantics{
rasterizer.vs_output_attributes[attrib].map_x.Value(),
rasterizer.vs_output_attributes[attrib].map_y.Value(),
rasterizer.vs_output_attributes[attrib].map_z.Value(),
rasterizer.vs_output_attributes[attrib].map_w.Value(),
};
for (u32 comp = 0; comp < 4; ++comp) {
const auto semantic = semantics[comp];
if (static_cast<std::size_t>(semantic) < 24) {
semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
} else if (semantic != VSOutputAttributes::INVALID) {
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
}
}
}
}
void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
@@ -1660,6 +1682,8 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
};
)";
out += GetVertexInterfaceDeclaration(true);
// input attributes declaration
for (std::size_t i = 0; i < used_regs.size(); ++i) {
if (used_regs[i]) {
@@ -1732,14 +1756,65 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
// output attributes declaration
for (u32 i = 0; i < config.state.num_outputs; ++i) {
out += fmt::format("layout(location = {}) out vec4 vs_out_attr{};\n", i, i);
out += fmt::format("vec4 vs_out_attr{};\n", i, i);
}
const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string {
const u32 slot = static_cast<u32>(slot_semantic);
const u32 attrib = config.semantic_maps[slot].attribute_index;
const u32 comp = config.semantic_maps[slot].component_index;
if (attrib < config.gs_output_attributes) {
return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]);
}
return "0.0";
};
out += "vec4 GetVertexQuaternion() {\n";
out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " +
semantic(VSOutputAttributes::QUATERNION_Y) + ", " +
semantic(VSOutputAttributes::QUATERNION_Z) + ", " +
semantic(VSOutputAttributes::QUATERNION_W) + ");\n";
out += "}\n\n";
out += "void EmitVtx() {\n";
out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " +
semantic(VSOutputAttributes::POSITION_Y) + ", " +
semantic(VSOutputAttributes::POSITION_Z) + ", " +
semantic(VSOutputAttributes::POSITION_W) + ");\n";
out += " gl_Position = vtx_pos;\n";
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
//out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
//out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
//out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
//out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n";
// This is inaccurate!
out += " normquat = GetVertexQuaternion();\n";
out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " +
semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) +
", " + semantic(VSOutputAttributes::COLOR_A) + ");\n";
out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n";
out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n";
out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n";
out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n";
out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " +
semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) +
");\n\n";
out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " +
semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n";
out += "}\n";
out += "\nvoid main() {\n";
for (u32 i = 0; i < config.state.num_outputs; ++i) {
out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i);
}
out += "\n exec_shader();\n}\n\n";
out += "\n exec_shader();\nEmitVtx();\n}\n\n";
out += program_source;

View File

@@ -150,7 +150,8 @@ struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
* PICA vertex/geometry shader.
*/
struct PicaShaderConfigCommon {
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
void Init(const Pica::RasterizerRegs& rasterizer,
const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
u64 program_hash;
u64 swizzle_hash;
@@ -163,6 +164,20 @@ struct PicaShaderConfigCommon {
// output_map[output register index] -> output attribute index
std::array<u32, 16> output_map;
u32 vs_output_attributes;
u32 gs_output_attributes;
struct SemanticMap {
u32 attribute_index;
u32 component_index;
};
// semantic_maps[semantic name] -> GS output attribute index + component index
std::array<SemanticMap, 24> semantic_maps;
};
/**
@@ -170,8 +185,9 @@ struct PicaShaderConfigCommon {
* shader.
*/
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
state.Init(regs, setup);
explicit PicaVSConfig(const Pica::RasterizerRegs& rasterizer,
const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
state.Init(rasterizer, regs, setup);
}
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
state = conf;

View File

@@ -134,7 +134,6 @@ std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size) {
Bucket& bucket = buckets[bucket_index];
// If we reach bucket boundaries move over to the next one
if (bucket.cursor + size > bucket_size) {
bucket.gpu_tick = scheduler.CurrentTick();
MoveNextBucket();
@@ -165,8 +164,8 @@ void StreamBuffer::Flush() {
ASSERT(flush_size <= bucket_size);
ASSERT(flush_start + flush_size <= total_size);
// Ensure all staging writes are visible to the host memory domain
if (flush_size > 0) [[likely]] {
// Ensure all staging writes are visible to the host memory domain
VmaAllocator allocator = instance.GetAllocator();
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
if (gpu_buffer) {