HACK: Skip normquat lerp to drop geometry shader requirement
This commit is contained in:
@@ -70,6 +70,7 @@ Common::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surfac
|
||||
if (is_tiled) {
|
||||
const int x0 = (begin_pixel_index % (stride * 8)) / 8;
|
||||
const int y0 = (begin_pixel_index / (stride * 8)) * 8;
|
||||
|
||||
// Top to bottom
|
||||
return Common::Rectangle<u32>(x0, height - y0, x0 + sub_surface.width,
|
||||
height - (y0 + sub_surface.height));
|
||||
@@ -77,6 +78,7 @@ Common::Rectangle<u32> SurfaceParams::GetSubRect(const SurfaceParams& sub_surfac
|
||||
|
||||
const int x0 = begin_pixel_index % stride;
|
||||
const int y0 = begin_pixel_index / stride;
|
||||
|
||||
// Bottom to top
|
||||
return Common::Rectangle<u32>(x0, y0 + sub_surface.height, x0 + sub_surface.width, y0);
|
||||
}
|
||||
|
@@ -205,7 +205,7 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {
|
||||
bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
|
||||
Pica::Shader::ShaderSetup& setup,
|
||||
const VertexLayout& layout) {
|
||||
PicaVSConfig config{regs.vs, setup};
|
||||
PicaVSConfig config{regs.rasterizer, regs.vs, setup};
|
||||
|
||||
u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
|
||||
for (u32 i = 0; i < layout.attribute_count; i++) {
|
||||
@@ -243,14 +243,15 @@ void PipelineCache::UseTrivialVertexShader() {
|
||||
}
|
||||
|
||||
void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
|
||||
return UseTrivialGeometryShader();
|
||||
const PicaFixedGSConfig gs_config{regs};
|
||||
const vk::ShaderModule handle =
|
||||
fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::Debug);
|
||||
|
||||
scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
vk::ShaderModule handle =
|
||||
fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
|
||||
instance.GetDevice(), ShaderOptimization::High);
|
||||
scheduler.Record([this, handle, hash = gs_config.Hash()](vk::CommandBuffer, vk::CommandBuffer) {
|
||||
current_shaders[ProgramType::GS] = handle;
|
||||
shader_hashes[ProgramType::GS] = gs_config.Hash();
|
||||
shader_hashes[ProgramType::GS] = hash;
|
||||
});
|
||||
}
|
||||
|
||||
|
@@ -451,7 +451,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
|
||||
const Common::Rectangle viewport_rect_unscaled = regs.rasterizer.GetViewportRect();
|
||||
|
||||
auto [color_surface, depth_surface, surfaces_rect] =
|
||||
const auto [color_surface, depth_surface, surfaces_rect] =
|
||||
res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);
|
||||
|
||||
if (!color_surface && shadow_rendering) {
|
||||
@@ -680,13 +680,28 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
pipeline_cache.SetScissor(draw_rect.left, draw_rect.bottom, draw_rect.GetWidth(),
|
||||
draw_rect.GetHeight());
|
||||
|
||||
// Sometimes the dimentions of the color and depth framebuffers might not be the same
|
||||
// In that case select the minimum one to abide by the spec
|
||||
u32 width = 0;
|
||||
u32 height = 0;
|
||||
if (color_surface && depth_surface) {
|
||||
width = std::min(color_surface->GetScaledWidth(), depth_surface->GetScaledWidth());
|
||||
height = std::min(color_surface->GetScaledHeight(), depth_surface->GetScaledHeight());
|
||||
} else if (color_surface) {
|
||||
width = color_surface->GetScaledWidth();
|
||||
height = color_surface->GetScaledHeight();
|
||||
} else if (depth_surface) {
|
||||
width = depth_surface->GetScaledWidth();
|
||||
height = depth_surface->GetScaledHeight();
|
||||
}
|
||||
|
||||
const FramebufferInfo framebuffer_info = {
|
||||
.color = color_surface ? color_surface->GetFramebufferView() : VK_NULL_HANDLE,
|
||||
.depth = depth_surface ? depth_surface->GetFramebufferView() : VK_NULL_HANDLE,
|
||||
.renderpass = renderpass_cache.GetRenderpass(pipeline_info.color_attachment,
|
||||
pipeline_info.depth_attachment, false),
|
||||
.width = surfaces_rect.GetWidth(),
|
||||
.height = surfaces_rect.GetHeight()};
|
||||
.width = width,
|
||||
.height = height};
|
||||
|
||||
auto [it, new_framebuffer] = framebuffers.try_emplace(framebuffer_info, vk::Framebuffer{});
|
||||
if (new_framebuffer) {
|
||||
@@ -757,6 +772,13 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
|
||||
depth_surface);
|
||||
}
|
||||
|
||||
static int submit_threshold = 80;
|
||||
submit_threshold--;
|
||||
if (!submit_threshold) {
|
||||
submit_threshold = 80;
|
||||
scheduler.Flush();
|
||||
}
|
||||
|
||||
return succeeded;
|
||||
}
|
||||
|
||||
|
@@ -247,7 +247,8 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
|
||||
state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
|
||||
}
|
||||
|
||||
void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
void PicaShaderConfigCommon::Init(const Pica::RasterizerRegs& rasterizer,
|
||||
const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
program_hash = setup.GetProgramCodeHash();
|
||||
swizzle_hash = setup.GetSwizzleDataHash();
|
||||
main_offset = regs.main_offset;
|
||||
@@ -259,6 +260,27 @@ void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::Sh
|
||||
for (int reg : Common::BitSet<u32>(regs.output_mask)) {
|
||||
output_map[reg] = num_outputs++;
|
||||
}
|
||||
|
||||
vs_output_attributes = Common::BitSet<u32>(regs.output_mask).Count();
|
||||
gs_output_attributes = vs_output_attributes;
|
||||
|
||||
semantic_maps.fill({16, 0});
|
||||
for (u32 attrib = 0; attrib < rasterizer.vs_output_total; ++attrib) {
|
||||
const std::array semantics{
|
||||
rasterizer.vs_output_attributes[attrib].map_x.Value(),
|
||||
rasterizer.vs_output_attributes[attrib].map_y.Value(),
|
||||
rasterizer.vs_output_attributes[attrib].map_z.Value(),
|
||||
rasterizer.vs_output_attributes[attrib].map_w.Value(),
|
||||
};
|
||||
for (u32 comp = 0; comp < 4; ++comp) {
|
||||
const auto semantic = semantics[comp];
|
||||
if (static_cast<std::size_t>(semantic) < 24) {
|
||||
semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
|
||||
} else if (semantic != VSOutputAttributes::INVALID) {
|
||||
LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
|
||||
@@ -1660,6 +1682,8 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
};
|
||||
|
||||
)";
|
||||
out += GetVertexInterfaceDeclaration(true);
|
||||
|
||||
// input attributes declaration
|
||||
for (std::size_t i = 0; i < used_regs.size(); ++i) {
|
||||
if (used_regs[i]) {
|
||||
@@ -1732,14 +1756,65 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
|
||||
|
||||
// output attributes declaration
|
||||
for (u32 i = 0; i < config.state.num_outputs; ++i) {
|
||||
out += fmt::format("layout(location = {}) out vec4 vs_out_attr{};\n", i, i);
|
||||
out += fmt::format("vec4 vs_out_attr{};\n", i, i);
|
||||
}
|
||||
|
||||
const auto semantic = [&config = config.state](VSOutputAttributes::Semantic slot_semantic) -> std::string {
|
||||
const u32 slot = static_cast<u32>(slot_semantic);
|
||||
const u32 attrib = config.semantic_maps[slot].attribute_index;
|
||||
const u32 comp = config.semantic_maps[slot].component_index;
|
||||
if (attrib < config.gs_output_attributes) {
|
||||
return fmt::format("vs_out_attr{}.{}", attrib, "xyzw"[comp]);
|
||||
}
|
||||
return "0.0";
|
||||
};
|
||||
|
||||
out += "vec4 GetVertexQuaternion() {\n";
|
||||
out += " return vec4(" + semantic(VSOutputAttributes::QUATERNION_X) + ", " +
|
||||
semantic(VSOutputAttributes::QUATERNION_Y) + ", " +
|
||||
semantic(VSOutputAttributes::QUATERNION_Z) + ", " +
|
||||
semantic(VSOutputAttributes::QUATERNION_W) + ");\n";
|
||||
out += "}\n\n";
|
||||
|
||||
out += "void EmitVtx() {\n";
|
||||
out += " vec4 vtx_pos = vec4(" + semantic(VSOutputAttributes::POSITION_X) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_Y) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_Z) + ", " +
|
||||
semantic(VSOutputAttributes::POSITION_W) + ");\n";
|
||||
out += " gl_Position = vtx_pos;\n";
|
||||
out += " gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
|
||||
//out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
|
||||
//out += " gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
|
||||
//out += " gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
|
||||
//out += "#endif // !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n\n";
|
||||
|
||||
// This is inaccurate!
|
||||
out += " normquat = GetVertexQuaternion();\n";
|
||||
|
||||
out += " vec4 vtx_color = vec4(" + semantic(VSOutputAttributes::COLOR_R) + ", " +
|
||||
semantic(VSOutputAttributes::COLOR_G) + ", " + semantic(VSOutputAttributes::COLOR_B) +
|
||||
", " + semantic(VSOutputAttributes::COLOR_A) + ");\n";
|
||||
out += " primary_color = min(abs(vtx_color), vec4(1.0));\n\n";
|
||||
|
||||
out += " texcoord0 = vec2(" + semantic(VSOutputAttributes::TEXCOORD0_U) + ", " +
|
||||
semantic(VSOutputAttributes::TEXCOORD0_V) + ");\n";
|
||||
out += " texcoord1 = vec2(" + semantic(VSOutputAttributes::TEXCOORD1_U) + ", " +
|
||||
semantic(VSOutputAttributes::TEXCOORD1_V) + ");\n\n";
|
||||
|
||||
out += " texcoord0_w = " + semantic(VSOutputAttributes::TEXCOORD0_W) + ";\n";
|
||||
out += " view = vec3(" + semantic(VSOutputAttributes::VIEW_X) + ", " +
|
||||
semantic(VSOutputAttributes::VIEW_Y) + ", " + semantic(VSOutputAttributes::VIEW_Z) +
|
||||
");\n\n";
|
||||
|
||||
out += " texcoord2 = vec2(" + semantic(VSOutputAttributes::TEXCOORD2_U) + ", " +
|
||||
semantic(VSOutputAttributes::TEXCOORD2_V) + ");\n\n";
|
||||
out += "}\n";
|
||||
|
||||
out += "\nvoid main() {\n";
|
||||
for (u32 i = 0; i < config.state.num_outputs; ++i) {
|
||||
out += fmt::format(" vs_out_attr{} = vec4(0.0, 0.0, 0.0, 1.0);\n", i);
|
||||
}
|
||||
out += "\n exec_shader();\n}\n\n";
|
||||
out += "\n exec_shader();\nEmitVtx();\n}\n\n";
|
||||
|
||||
out += program_source;
|
||||
|
||||
|
@@ -150,7 +150,8 @@ struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
|
||||
* PICA vertex/geometry shader.
|
||||
*/
|
||||
struct PicaShaderConfigCommon {
|
||||
void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
void Init(const Pica::RasterizerRegs& rasterizer,
|
||||
const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
|
||||
|
||||
u64 program_hash;
|
||||
u64 swizzle_hash;
|
||||
@@ -163,6 +164,20 @@ struct PicaShaderConfigCommon {
|
||||
|
||||
// output_map[output register index] -> output attribute index
|
||||
std::array<u32, 16> output_map;
|
||||
|
||||
|
||||
|
||||
|
||||
u32 vs_output_attributes;
|
||||
u32 gs_output_attributes;
|
||||
|
||||
struct SemanticMap {
|
||||
u32 attribute_index;
|
||||
u32 component_index;
|
||||
};
|
||||
|
||||
// semantic_maps[semantic name] -> GS output attribute index + component index
|
||||
std::array<SemanticMap, 24> semantic_maps;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -170,8 +185,9 @@ struct PicaShaderConfigCommon {
|
||||
* shader.
|
||||
*/
|
||||
struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
|
||||
explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
state.Init(regs, setup);
|
||||
explicit PicaVSConfig(const Pica::RasterizerRegs& rasterizer,
|
||||
const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
|
||||
state.Init(rasterizer, regs, setup);
|
||||
}
|
||||
explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
|
||||
state = conf;
|
||||
|
@@ -134,7 +134,6 @@ std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size) {
|
||||
|
||||
Bucket& bucket = buckets[bucket_index];
|
||||
|
||||
// If we reach bucket boundaries move over to the next one
|
||||
if (bucket.cursor + size > bucket_size) {
|
||||
bucket.gpu_tick = scheduler.CurrentTick();
|
||||
MoveNextBucket();
|
||||
@@ -165,8 +164,8 @@ void StreamBuffer::Flush() {
|
||||
ASSERT(flush_size <= bucket_size);
|
||||
ASSERT(flush_start + flush_size <= total_size);
|
||||
|
||||
// Ensure all staging writes are visible to the host memory domain
|
||||
if (flush_size > 0) [[likely]] {
|
||||
// Ensure all staging writes are visible to the host memory domain
|
||||
VmaAllocator allocator = instance.GetAllocator();
|
||||
vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
|
||||
if (gpu_buffer) {
|
||||
|
Reference in New Issue
Block a user