From 5cdc08f6ea2c4b54e74118ddb9c05fa388a2e5e4 Mon Sep 17 00:00:00 2001
From: GPUCode <geoster3d@gmail.com>
Date: Thu, 5 Jan 2023 17:00:43 +0200
Subject: [PATCH] Vertex spirv

---
 .../renderer_opengl/gl_shader_decompiler.cpp  |    9 +-
 .../renderer_vulkan/vk_pipeline_cache.cpp     |   14 +-
 .../renderer_vulkan/vk_pipeline_cache.h       |    9 +-
 .../renderer_vulkan/vk_shader_decompiler.cpp  | 1089 ++++++++++++++++-
 .../renderer_vulkan/vk_shader_decompiler.h    |  352 +++++-
 .../renderer_vulkan/vk_shader_gen.cpp         |    8 +-
 .../renderer_vulkan/vk_shader_gen_spv.cpp     |    3 -
 .../renderer_vulkan/vk_shader_gen_spv.h       |   18 +-
 src/video_core/shader/shader_cache.h          |   24 +-
 9 files changed, 1489 insertions(+), 37 deletions(-)
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 19634babe..39771dca1 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -221,7 +221,7 @@ public:
 
     int scope = 0;
 
-private:
+public:
     void AddExpression(std::string_view text) {
         if (!text.empty()) {
             shader_source.append(static_cast<std::size_t>(scope) * 4, ' ');
@@ -816,6 +816,7 @@ private:
     }
 
     void Generate() {
+        bool dump = false;
         if (sanitize_mul) {
 #ifdef ANDROID
             // Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here
@@ -884,6 +885,8 @@ private:
                     u32 compile_end = CompileRange(label, next_label);
                     if (compile_end > next_label && compile_end != PROGRAM_END) {
                         // This happens only when there is a label inside a IF/LOOP block
+                        dump = true;
+                        LOG_INFO(Render_OpenGL, "compile_end: {}", compile_end);
                         shader.AddLine("{{ jmp_to = {}u; break; }}", compile_end);
                         labels.emplace(compile_end);
                     }
@@ -906,6 +909,10 @@ private:
 
             DEBUG_ASSERT(shader.scope == 0);
         }
+
+        if (dump) {
+            LOG_INFO(Render_OpenGL, "{}", shader.shader_source);
+        }
     }
 
 private:
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 2df8f30a9..2ebaf8078 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -239,9 +239,17 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
         config.state.emulated_attrib_locations[location] = is_supported ? 0 : emulated_attrib_loc++;
     }
 
-    auto [handle, result] =
-        programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
-                                        instance.GetDevice(), ShaderOptimization::High);
+    vk::ShaderModule handle{};
+    if (Settings::values.spirv_shader_gen.GetValue()) {
+         std::optional<std::vector<u32>> code;
+         std::tie(handle, code) = programmable_vertex_shaders_spv.Get(config, setup,
+                                                                        instance.GetDevice());
+    } else {
+        std::optional<std::string> code;
+        std::tie(handle, code) = programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
+                                                                 instance.GetDevice(), ShaderOptimization::High);
+    }
+
     if (!handle) {
         LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
         return false;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 655bde8b3..31bf29fd5 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -9,6 +9,7 @@
 #include "common/hash.h"
 #include "video_core/rasterizer_cache/pixel_format.h"
 #include "video_core/regs.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
 #include "video_core/shader/shader_cache.h"
@@ -111,7 +112,12 @@ struct PipelineInfo {
  * Vulkan specialized PICA shader caches
  */
 using ProgrammableVertexShaders = Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule,
-                                                                  &Compile, &GenerateVertexShader>;
+                                                                  std::string, &Compile,
+                                                                  &GenerateVertexShader>;
+
+using ProgrammableVertexShadersSPV = Pica::Shader::ShaderDoubleCache<PicaVSConfig, vk::ShaderModule,
+                                                                    std::vector<u32>, &CompileSPV,
+                                                                    &GenerateVertexShaderSPV>;
 
 using FixedGeometryShaders = Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule,
                                                        &Compile, &GenerateFixedGeometryShader>;
@@ -219,6 +225,7 @@ private:
     std::array<vk::ShaderModule, MAX_SHADER_STAGES> current_shaders;
     std::array<u64, MAX_SHADER_STAGES> shader_hashes;
     ProgrammableVertexShaders programmable_vertex_shaders;
+    ProgrammableVertexShadersSPV programmable_vertex_shaders_spv;
     FixedGeometryShaders fixed_geometry_shaders;
     FragmentShadersGLSL fragment_shaders_glsl;
     FragmentShadersSPV fragment_shaders_spv;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index d9f488861..b5f12aff6 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1,6 +1,1089 @@
-#include "vk_shader_decompiler.h"
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
 
-vk_shader_decompiler::vk_shader_decompiler()
-{
+#include <exception>
+#include <map>
+#include <set>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <boost/container/small_vector.hpp>
+#include <fmt/format.h>
+#include <nihstro/shader_bytecode.h>
+#include "common/assert.h"
+#include "common/file_util.h"
+#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 
+namespace Vulkan {
+
+int i = 0;
+
+using nihstro::Instruction;
+using nihstro::OpCode;
+using nihstro::RegisterType;
+using nihstro::SourceRegister;
+using nihstro::SwizzlePattern;
+
+VertexModule::VertexModule(const Pica::Shader::ShaderSetup& setup,
+                           const PicaVSConfig& config) : Sirit::Module{0x00010300},
+    config{config}, program_code{setup.program_code}, swizzle_data{setup.swizzle_data},
+    main_offset{config.state.main_offset}, sanitize_mul{config.state.sanitize_mul},
+    subroutines{ControlFlowAnalyzer(program_code, main_offset).MoveSubroutines()} {
+    DefineArithmeticTypes();
+    DefineUniformStructs();
+    DefineInterface();
 }
+
+VertexModule::~VertexModule() = default;
+
+ControlFlowAnalyzer::ControlFlowAnalyzer(const Pica::Shader::ProgramCode& program_code, u32 main_offset)
+    : program_code(program_code) {
+    // Recursively finds all subroutines.
+    const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
+    if (program_main.exit_method != ExitMethod::AlwaysEnd) {
+        throw DecompileFail("Program does not always end");
+    }
+}
+
+const Subroutine& ControlFlowAnalyzer::AddSubroutine(u32 begin, u32 end) {
+    auto iter = subroutines.find(Subroutine{begin, end});
+    if (iter != subroutines.end())
+        return *iter;
+
+    Subroutine subroutine{begin, end};
+    subroutine.exit_method = Scan(begin, end, subroutine.labels);
+    if (subroutine.exit_method == ExitMethod::Undetermined)
+        throw DecompileFail("Recursive function detected");
+    return *subroutines.insert(std::move(subroutine)).first;
+}
+
+ExitMethod ControlFlowAnalyzer::Scan(u32 begin, u32 end, std::set<u32>& labels) {
+    auto [iter, inserted] =
+        exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
+    ExitMethod& exit_method = iter->second;
+    if (!inserted)
+        return exit_method;
+
+    using nihstro::Instruction;
+    using nihstro::OpCode;
+
+    for (u32 offset = begin; offset != end && offset != PROGRAM_END; ++offset) {
+        const Instruction instr = {program_code[offset]};
+        switch (instr.opcode.Value()) {
+        case OpCode::Id::END: {
+            return exit_method = ExitMethod::AlwaysEnd;
+        }
+        case OpCode::Id::JMPC:
+        case OpCode::Id::JMPU: {
+            labels.insert(instr.flow_control.dest_offset);
+            ExitMethod no_jmp = Scan(offset + 1, end, labels);
+            ExitMethod jmp = Scan(instr.flow_control.dest_offset, end, labels);
+            return exit_method = ParallelExit(no_jmp, jmp);
+        }
+        case OpCode::Id::CALL: {
+            auto& call = AddSubroutine(instr.flow_control.dest_offset,
+                                       instr.flow_control.dest_offset +
+                                           instr.flow_control.num_instructions);
+            if (call.exit_method == ExitMethod::AlwaysEnd)
+                return exit_method = ExitMethod::AlwaysEnd;
+            ExitMethod after_call = Scan(offset + 1, end, labels);
+            return exit_method = SeriesExit(call.exit_method, after_call);
+        }
+        case OpCode::Id::LOOP: {
+            auto& loop = AddSubroutine(offset + 1, instr.flow_control.dest_offset + 1);
+            if (loop.exit_method == ExitMethod::AlwaysEnd)
+                return exit_method = ExitMethod::AlwaysEnd;
+            ExitMethod after_loop = Scan(instr.flow_control.dest_offset + 1, end, labels);
+            return exit_method = SeriesExit(loop.exit_method, after_loop);
+        }
+        case OpCode::Id::CALLC:
+        case OpCode::Id::CALLU: {
+            auto& call = AddSubroutine(instr.flow_control.dest_offset,
+                                       instr.flow_control.dest_offset +
+                                           instr.flow_control.num_instructions);
+            ExitMethod after_call = Scan(offset + 1, end, labels);
+            return exit_method = SeriesExit(
+                       ParallelExit(call.exit_method, ExitMethod::AlwaysReturn), after_call);
+        }
+        case OpCode::Id::IFU:
+        case OpCode::Id::IFC: {
+            auto& if_sub = AddSubroutine(offset + 1, instr.flow_control.dest_offset);
+            ExitMethod else_method;
+            if (instr.flow_control.num_instructions != 0) {
+                auto& else_sub = AddSubroutine(instr.flow_control.dest_offset,
+                                               instr.flow_control.dest_offset +
+                                                   instr.flow_control.num_instructions);
+                else_method = else_sub.exit_method;
+            } else {
+                else_method = ExitMethod::AlwaysReturn;
+            }
+
+            ExitMethod both = ParallelExit(if_sub.exit_method, else_method);
+            if (both == ExitMethod::AlwaysEnd)
+                return exit_method = ExitMethod::AlwaysEnd;
+            ExitMethod after_call =
+                Scan(instr.flow_control.dest_offset + instr.flow_control.num_instructions, end,
+                     labels);
+            return exit_method = SeriesExit(both, after_call);
+        }
+        default:
+            break;
+        }
+    }
+    return exit_method = ExitMethod::AlwaysReturn;
+}
+
+ExitMethod ControlFlowAnalyzer::SeriesExit(ExitMethod a, ExitMethod b) {
+    // This should be handled before evaluating b.
+    DEBUG_ASSERT(a != ExitMethod::AlwaysEnd);
+
+    if (a == ExitMethod::Undetermined) {
+        return ExitMethod::Undetermined;
+    }
+
+    if (a == ExitMethod::AlwaysReturn) {
+        return b;
+    }
+
+    if (b == ExitMethod::Undetermined || b == ExitMethod::AlwaysEnd) {
+        return ExitMethod::AlwaysEnd;
+    }
+
+    return ExitMethod::Conditional;
+}
+
+ExitMethod ControlFlowAnalyzer::ParallelExit(ExitMethod a, ExitMethod b) {
+    if (a == ExitMethod::Undetermined) {
+        return b;
+    }
+    if (b == ExitMethod::Undetermined) {
+        return a;
+    }
+    if (a == b) {
+        return a;
+    }
+    return ExitMethod::Conditional;
+}
+
+/// An adaptor for getting swizzle pattern string from nihstro interfaces.
+template <SwizzlePattern::Selector (SwizzlePattern::*getter)(int) const>
+Id GetSelectorSrc(VertexModule& m, const Id vector, const SwizzlePattern& pattern) {
+    bool identity = true;
+    std::array<Sirit::Literal, 4> components;
+    for (u32 i = 0; i < 4; ++i) {
+        const SwizzlePattern::Selector selector = (pattern.*getter)(i);
+        const u32 index = static_cast<u32>(selector);
+        identity &= (i == index);
+        components[i] = index;
+    }
+
+    if (identity) {
+        return vector;
+    }
+    return m.OpVectorShuffle(m.vec_ids.Get(4), vector, vector, components);
+}
+
+constexpr auto GetSelectorSrc1 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc1>;
+constexpr auto GetSelectorSrc2 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc2>;
+constexpr auto GetSelectorSrc3 = GetSelectorSrc<&SwizzlePattern::GetSelectorSrc3>;
+
+const Subroutine& VertexModule::GetSubroutine(u32 begin, u32 end) const {
+    auto iter = subroutines.find(Subroutine{begin, end});
+    ASSERT(iter != subroutines.end());
+    return *iter;
+}
+
+Id VertexModule::EvaluateCondition(Instruction::FlowControlType flow_control) {
+    using Op = Instruction::FlowControlType::Op;
+
+    const Id cond_code{OpLoad(bvec_ids.Get(2), conditional_code)};
+    const Id cond_x{OpCompositeExtract(bool_id, cond_code, 0)};
+    const Id cond_y{OpCompositeExtract(bool_id, cond_code, 1)};
+
+    const Id result_x =
+        flow_control.refx.Value() ? cond_x : OpLogicalNot(bool_id, cond_x);;
+    const Id result_y =
+        flow_control.refy.Value() ? cond_y : OpLogicalNot(bool_id, cond_y);
+
+    const auto Condition = [&]() -> Id {
+        if (flow_control.refx.Value() && flow_control.refy.Value()) {
+            return cond_code;
+        } else if (!flow_control.refx.Value() && !flow_control.refy.Value()) {
+            return OpLogicalNot(bvec_ids.Get(2), cond_code);
+        } else {
+            return OpCompositeConstruct(bvec_ids.Get(2), result_x, result_y);
+        }
+    };
+
+    switch (flow_control.op) {
+    case Op::JustX:
+        return result_x;
+    case Op::JustY:
+        return result_y;
+    case Op::Or:
+        return OpAny(bool_id, Condition());
+    case Op::And:
+        return OpAll(bool_id, Condition());
+    default:
+        UNREACHABLE();
+        return Id{};
+    }
+}
+
+Id VertexModule::GetSourceRegister(const SourceRegister& source_reg, u32 address_register_index) {
+    const u32 index = static_cast<u32>(source_reg.GetIndex());
+
+    switch (source_reg.GetRegisterType()) {
+    case RegisterType::Input: {
+        if (!used_regs[index]) {
+            const Id type{AttribType(index)};
+            const Id vs_in_typed_reg = DefineInput(type, index);
+            const Id typed_reg{OpLoad(type, vs_in_typed_reg)};
+            input_typed_regs[index] = vs_in_typed_reg;
+            input_regs[index] = AttribCast(index, typed_reg);
+            used_regs[index] = true;
+        }
+        return input_regs[index];
+    }
+    case RegisterType::Temporary: {
+        return OpLoad(vec_ids.Get(4), tmp_regs[index]);
+    }
+    case RegisterType::FloatUniform: {
+        Id uniform_index{ConstU32(index)};
+        if (address_register_index != 0) {
+            const Id private_ptr{TypePointer(spv::StorageClass::Private, i32_id)};
+            const Id component{ConstU32(address_register_index - 1)};
+            const Id offset{OpLoad(i32_id, OpAccessChain(private_ptr,
+                                                         address_registers,
+                                                         component))};
+            uniform_index = OpIAdd(i32_id, uniform_index, offset);
+        }
+        return GetVsUniformMember(vec_ids.Get(4), ConstS32(2), uniform_index);
+    }
+    default:
+        UNREACHABLE();
+    }
+    return Id{};
+}
+
+Id VertexModule::GetDestRegister(const DestRegister& dest_reg) {
+    const u32 index = static_cast<u32>(dest_reg.GetIndex());
+
+    switch (dest_reg.GetRegisterType()) {
+    case RegisterType::Temporary:
+        return tmp_regs[index];
+    case RegisterType::Output:
+        if (config.state.output_map[index] < config.state.num_outputs) {
+            return output_regs[index];
+        }
+        break;
+    default:
+        UNREACHABLE();
+    }
+    return Id{};
+}
+
+Id VertexModule::GetDestPointer(const DestRegister& dest_reg) {
+    switch (dest_reg.GetRegisterType()) {
+    case RegisterType::Temporary:
+        return TypePointer(spv::StorageClass::Private, f32_id);
+    case RegisterType::Output:
+        return TypePointer(spv::StorageClass::Output, f32_id);
+    default:
+        UNREACHABLE();
+    }
+    return Id{};
+}
+
+void VertexModule::CallSubroutine(const Subroutine& subroutine) {
+    if (subroutine.exit_method == ExitMethod::AlwaysEnd) {
+        OpFunctionCall(bool_id, subroutine.function);
+        OpReturnValue(ConstBool(true));
+        OpFunctionEnd();
+    } else if (subroutine.exit_method == ExitMethod::Conditional) {
+        ASSERT_MSG(false, "Conditional exit method not implemented");
+        //shader.AddLine("if ({}()) {{ return true; }}", subroutine.GetName());
+    } else {
+        OpFunctionCall(bool_id, subroutine.function);
+    }
+}
+
+void VertexModule::SetDest(const nihstro::SwizzlePattern& swizzle, Id dest, Id value,
+                           Id reg_pointer, u32 dest_num_components, u32 value_num_components) {
+    u32 dest_mask_num_components = 0;
+    std::array<u32, 4> dest_mask_swizzle;
+
+    for (u32 i = 0; i < dest_num_components; ++i) {
+        if (swizzle.DestComponentEnabled(static_cast<int>(i))) {
+            dest_mask_swizzle[dest_mask_num_components++] = i;
+        }
+    }
+
+    if (!Sirit::ValidId(dest) || dest_mask_num_components == 0) {
+        return;
+    }
+    DEBUG_ASSERT(value_num_components >= dest_num_components || value_num_components == 1);
+
+    Id src{value};
+    if (value_num_components == 1) {
+        if (dest_mask_num_components == 4) {
+            src = OpCompositeConstruct(vec_ids.Get(4), src, src, src, src);
+            OpStore(dest, src);
+        } else {
+            for (u32 i = 0; i < dest_mask_num_components; i++) {
+                const u32 comp = dest_mask_swizzle[i];
+                const Id pointer{OpAccessChain(reg_pointer, dest, ConstU32(comp))};
+                OpStore(pointer, src);
+            }
+        }
+    } else {
+        if (dest_mask_num_components == 4) {
+            OpStore(dest, src);
+        } else {
+            for (u32 i = 0; i < dest_mask_num_components; i++) {
+                const u32 comp = dest_mask_swizzle[i];
+                const Id pointer{OpAccessChain(reg_pointer, dest, ConstU32(comp))};
+                const Id result_type{dest.value == address_registers.value ? i32_id : f32_id};
+                const Id val{OpCompositeExtract(result_type, src, comp)};
+                OpStore(pointer, val);
+            }
+        }
+    }
+}
+
+Id VertexModule::SanitizeMul(Id lhs, Id rhs) {
+    const Id product{OpFMul(vec_ids.Get(4), lhs, rhs)};
+    const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
+    const Id product_nan{OpIsNan(bvec_ids.Get(4), product)};
+
+#ifdef ANDROID
+    // Use a cheaper sanitize_mul on Android, as mobile GPUs struggle here
+    // This seems to be sufficient at least for Ocarina of Time and Attack on Titan accurate
+    // multiplication bugs
+    return OpSelect(vec_ids.Get(4), product_nan, zero_vec, product);
+#else
+    const Id rhs_nan{OpIsNan(bvec_ids.Get(4), rhs)};
+    const Id lhs_nan{OpIsNan(bvec_ids.Get(4), lhs)};
+    return OpSelect(vec_ids.Get(4), product_nan,
+                    OpSelect(vec_ids.Get(4), lhs_nan, product,
+                    OpSelect(vec_ids.Get(4), rhs_nan, product, zero_vec)),
+                    product);
+#endif
+}
+
+SpirvParams params;
+
+u32 VertexModule::CompileInstr(u32 offset) {
+    const Instruction instr = {program_code[offset]};
+
+    std::size_t swizzle_offset =
+        instr.opcode.Value().GetInfo().type == OpCode::Type::MultiplyAdd
+            ? instr.mad.operand_desc_id
+            : instr.common.operand_desc_id;
+    const SwizzlePattern swizzle = {swizzle_data[swizzle_offset]};
+
+    //shader.AddLine("// {}: {}", offset, instr.opcode.Value().GetInfo().name);
+
+    switch (instr.opcode.Value().GetInfo().type) {
+    case OpCode::Type::Arithmetic: {
+        const bool is_inverted =
+            (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+
+        Id src1{GetSourceRegister(instr.common.GetSrc1(is_inverted),
+                                  !is_inverted * instr.common.address_register_index)};
+        if (swizzle.negate_src1) {
+            src1 = OpFNegate(vec_ids.Get(4), src1);
+        }
+        src1 = GetSelectorSrc1(*this, src1, swizzle);
+
+        Id src2{GetSourceRegister(instr.common.GetSrc2(is_inverted),
+                                  is_inverted * instr.common.address_register_index)};
+        if (swizzle.negate_src2) {
+            src2 = OpFNegate(vec_ids.Get(4), src2);
+        }
+        src2 = GetSelectorSrc2(*this, src2, swizzle);
+
+        const Id dest_reg{GetDestRegister(instr.common.dest.Value())};
+        const Id reg_pointer{GetDestPointer(instr.common.dest.Value())};
+
+        switch (instr.opcode.Value().EffectiveOpCode()) {
+        case OpCode::Id::ADD: {
+            SetDest(swizzle, dest_reg, OpFAdd(vec_ids.Get(4), src1, src2), reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::MUL: {
+            Id product{};
+            if (sanitize_mul) {
+                product = SanitizeMul(src1, src2);
+            } else {
+                product = OpFMul(vec_ids.Get(4), src1, src2);
+            }
+
+            SetDest(swizzle, dest_reg, product, reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::FLR: {
+            SetDest(swizzle, dest_reg, OpFloor(vec_ids.Get(4), src1), reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::MAX: {
+            SetDest(swizzle, dest_reg, OpFMax(vec_ids.Get(4), src1, src2), reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::MIN: {
+            SetDest(swizzle, dest_reg, OpFMin(vec_ids.Get(4), src1, src2), reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::DP3:
+        case OpCode::Id::DP4:
+        case OpCode::Id::DPH:
+        case OpCode::Id::DPHI: {
+            OpCode::Id opcode = instr.opcode.Value().EffectiveOpCode();
+            Id dot{};
+            if (opcode == OpCode::Id::DP3) {
+                if (sanitize_mul) {
+                    const Id product{SanitizeMul(src1, src2)};
+                    const Id product_xyz{OpVectorShuffle(vec_ids.Get(3), product, product, 0, 1, 2)};
+                    dot = OpDot(f32_id, product_xyz, ConstF32(1.f, 1.f, 1.f));
+                } else {
+                    const Id src1_xyz{OpVectorShuffle(vec_ids.Get(3), src1, src1, 0, 1, 2)};
+                    const Id src2_xyz{OpVectorShuffle(vec_ids.Get(3), src2, src2, 0, 1, 2)};
+                    dot = OpDot(f32_id, src1_xyz, src2_xyz);
+                }
+            } else {
+                if (sanitize_mul) {
+                    const Id src1_ =
+                        (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
+                            ? OpCompositeInsert(vec_ids.Get(4), ConstF32(1.f), src1, 3)
+                            : src1;
+
+                    dot = OpDot(f32_id, SanitizeMul(src1_, src2), ConstF32(1.f, 1.f, 1.f, 1.f));
+                } else {
+                    dot = OpDot(f32_id, src1, src2);
+                }
+            }
+
+            SetDest(swizzle, dest_reg, dot, reg_pointer, 4, 1);
+            break;
+        }
+
+        case OpCode::Id::RCP: {
+            //if (!sanitize_mul) {
+                // When accurate multiplication is OFF, NaN are not really handled. This is a
+                // workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
+                //shader.AddLine("if ({}.x != 0.0)", src1);
+            //}
+            const Id src1_x{OpCompositeExtract(f32_id, src1, 0)};
+            const Id rcp{OpFDiv(f32_id, ConstF32(1.f), src1_x)};
+            SetDest(swizzle, dest_reg, rcp, reg_pointer, 4, 1);
+            break;
+        }
+
+        case OpCode::Id::RSQ: {
+            //if (!sanitize_mul) {
+                // When accurate multiplication is OFF, NaN are not really handled. This is a
+                // workaround to cheaply avoid NaN. Fixes graphical issues in Ocarina of Time.
+                //shader.AddLine("if ({}.x > 0.0)", src1);
+            //}
+            const Id src1_x{OpCompositeExtract(f32_id, src1, 0)};
+            const Id rsq{OpInverseSqrt(f32_id, src1_x)};
+            SetDest(swizzle, dest_reg, rsq, reg_pointer, 4, 1);
+            break;
+        }
+
+        case OpCode::Id::MOVA: {
+            const Id src1i{OpConvertFToS(ivec_ids.Get(4), src1)};
+            const Id src1i_xy{OpVectorShuffle(ivec_ids.Get(2), src1i, src1i, 0, 1)};
+            SetDest(swizzle, address_registers, src1i_xy,
+                    TypePointer(spv::StorageClass::Private, i32_id), 2, 2);
+            break;
+        }
+
+        case OpCode::Id::MOV: {
+            SetDest(swizzle, dest_reg, src1, reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::SGE:
+        case OpCode::Id::SGEI: {
+            const Id one_vec{ConstF32(1.f, 1.f, 1.f, 1.f)};
+            const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
+            const Id geq{OpFOrdGreaterThanEqual(bvec_ids.Get(4), src1, src2)};
+            const Id geqf{OpSelect(vec_ids.Get(4), geq, one_vec, zero_vec)};
+            SetDest(swizzle, dest_reg, geqf, reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::SLT:
+        case OpCode::Id::SLTI: {
+            const Id one_vec{ConstF32(1.f, 1.f, 1.f, 1.f)};
+            const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
+            const Id le{OpFOrdLessThan(bvec_ids.Get(4), src1, src2)};
+            const Id lef{OpSelect(vec_ids.Get(4), le, one_vec, zero_vec)};
+            SetDest(swizzle, dest_reg, lef, reg_pointer, 4, 4);
+            break;
+        }
+
+        case OpCode::Id::CMP: {
+            using CompareOp = Instruction::Common::CompareOpType::Op;
+            const auto Compare = [&](CompareOp op, Id type, Id lhs, Id rhs) -> Id {
+                switch (op) {
+                case CompareOp::Equal:
+                    return OpFOrdEqual(type, lhs, rhs);
+                case CompareOp::NotEqual:
+                    return OpFOrdNotEqual(type, lhs, rhs);
+                case CompareOp::LessThan:
+                    return OpFOrdLessThan(type, lhs, rhs);
+                case CompareOp::LessEqual:
+                    return OpFOrdLessThanEqual(type, lhs, rhs);
+                case CompareOp::GreaterThan:
+                    return OpFOrdGreaterThan(type, lhs, rhs);
+                case CompareOp::GreaterEqual:
+                    return OpFOrdGreaterThanEqual(type, lhs, rhs);
+                default:
+                    LOG_ERROR(HW_GPU, "Unknown compare mode {:x}", op);
+                }
+                return Id{};
+            };
+
+            const CompareOp op_x = instr.common.compare_op.x.Value();
+            const CompareOp op_y = instr.common.compare_op.y.Value();
+
+            if (op_x != op_y) {
+                const Id src1_x{OpCompositeExtract(f32_id, src1, 0)};
+                const Id src2_x{OpCompositeExtract(f32_id, src2, 0)};
+                const Id cond_code_x{Compare(op_x, bool_id, src1_x, src2_x)};
+
+                const Id src1_y{OpCompositeExtract(f32_id, src1, 1)};
+                const Id src2_y{OpCompositeExtract(f32_id, src2, 1)};
+                const Id cond_code_y{Compare(op_y, bool_id, src1_y, src2_y)};
+
+                const Id cond_code{OpCompositeConstruct(bvec_ids.Get(2), cond_code_x, cond_code_y)};
+                OpStore(conditional_code, cond_code);
+            } else {
+                const Id src1_xy{OpVectorShuffle(vec_ids.Get(2), src1, src1, 0, 1)};
+                const Id src2_xy{OpVectorShuffle(vec_ids.Get(2), src2, src2, 0, 1)};
+                const Id cond_code{Compare(op_x, bvec_ids.Get(2), src1_xy, src2_xy)};
+                OpStore(conditional_code, cond_code);
+            }
+            break;
+        }
+
+        case OpCode::Id::EX2: {
+            const Id src1_x{OpCompositeExtract(f32_id, src1, 0)};
+            const Id exp2{OpExp2(f32_id, src1_x)};
+            SetDest(swizzle, dest_reg, exp2, reg_pointer, 4, 1);
+            break;
+        }
+
+        case OpCode::Id::LG2: {
+            const Id src1_x{OpCompositeExtract(f32_id, src1, 0)};
+            const Id log2{OpLog2(f32_id, src1_x)};
+            SetDest(swizzle, dest_reg, log2, reg_pointer, 4, 1);
+            break;
+        }
+
+        default: {
+            LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x{:02x} ({}): 0x{:08x}",
+                      (int)instr.opcode.Value().EffectiveOpCode(),
+                      instr.opcode.Value().GetInfo().name, instr.hex);
+            throw DecompileFail("Unhandled instruction");
+            break;
+        }
+        }
+
+        break;
+    }
+
+    case OpCode::Type::MultiplyAdd: {
+        if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
+            (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
+            bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
+
+            Id src1{GetSourceRegister(instr.mad.GetSrc1(is_inverted), 0)};
+            if (swizzle.negate_src1) {
+                src1 = OpFNegate(vec_ids.Get(4), src1);
+            }
+            src1 = GetSelectorSrc1(*this, src1, swizzle);
+
+            Id src2{GetSourceRegister(instr.mad.GetSrc2(is_inverted),
+                                      !is_inverted * instr.mad.address_register_index)};
+            if (swizzle.negate_src2) {
+                src2 = OpFNegate(vec_ids.Get(4), src2);
+            }
+            src2 = GetSelectorSrc2(*this, src2, swizzle);
+
+            Id src3{GetSourceRegister(instr.mad.GetSrc3(is_inverted),
+                                      is_inverted * instr.mad.address_register_index)};
+            if (swizzle.negate_src3) {
+                src3 = OpFNegate(vec_ids.Get(4), src3);
+            }
+            src3 = GetSelectorSrc3(*this, src3, swizzle);
+
+            Id dest_reg =
+                (instr.mad.dest.Value() < 0x10)
+                    ? output_regs[instr.mad.dest.Value().GetIndex()]
+                : (instr.mad.dest.Value() < 0x20)
+                    ? tmp_regs[instr.mad.dest.Value().GetIndex()]
+                    : Id{};
+            Id reg_pointer =
+                (instr.mad.dest.Value() < 0x10)
+                    ? TypePointer(spv::StorageClass::Output, f32_id)
+                : (instr.mad.dest.Value() < 0x20)
+                    ? TypePointer(spv::StorageClass::Private, f32_id)
+                    : Id{};
+
+            if (sanitize_mul) {
+                const Id src12{SanitizeMul(src1, src2)};
+                const Id result{OpFAdd(vec_ids.Get(4), src12, src3)};
+                SetDest(swizzle, dest_reg, result, reg_pointer, 4, 4);
+            } else {
+                const Id result{OpFma(vec_ids.Get(4), src1, src2, src3)};
+                SetDest(swizzle, dest_reg, result, reg_pointer, 4, 4);
+            }
+        } else {
+            LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x{:02x} ({}): 0x{:08x}",
+                      (int)instr.opcode.Value().EffectiveOpCode(),
+                      instr.opcode.Value().GetInfo().name, instr.hex);
+            throw DecompileFail("Unhandled instruction");
+        }
+        break;
+    }
+
+    default: {
+        switch (instr.opcode.Value()) {
+        case OpCode::Id::END: {
+            OpReturnValue(ConstBool(true));
+            offset = PROGRAM_END - 1;
+            break;
+        }
+
+        case OpCode::Id::JMPC:
+        case OpCode::Id::JMPU: {
+            Id condition{};
+            if (instr.opcode.Value() == OpCode::Id::JMPC) {
+                condition = EvaluateCondition(instr.flow_control);
+            } else {
+                const bool invert_test = instr.flow_control.num_instructions & 1;
+                condition = GetUniformBool(instr.flow_control.bool_uniform_id);
+                if (invert_test) {
+                    condition = OpLogicalNot(bool_id, condition);
+                }
+            }
+
+            const Id merge_block{OpLabel()};
+            const Id true_label{OpLabel()};
+            OpSelectionMerge(merge_block, spv::SelectionControlMask::MaskNone);
+            OpBranchConditional(condition, true_label, merge_block);
+
+            AddLabel(true_label);
+            OpStore(params.jmp_to, ConstU32(instr.flow_control.dest_offset.Value()));
+            OpBranch(params.switch_merge_block);
+
+            AddLabel(merge_block);
+            break;
+        }
+
+        case OpCode::Id::CALL:
+        case OpCode::Id::CALLC:
+        case OpCode::Id::CALLU: {
+            Id condition{};
+            if (instr.opcode.Value() == OpCode::Id::CALLC) {
+                condition = EvaluateCondition(instr.flow_control);
+            } else if (instr.opcode.Value() == OpCode::Id::CALLU) {
+                condition = GetUniformBool(instr.flow_control.bool_uniform_id);
+            }
+
+            auto& call_sub = GetSubroutine(instr.flow_control.dest_offset,
+                                           instr.flow_control.dest_offset +
+                                           instr.flow_control.num_instructions);
+
+            if (!Sirit::ValidId(condition)) {
+                CallSubroutine(call_sub);
+            } else {
+                const Id true_label{OpLabel()};
+                const Id false_label{OpLabel()};
+
+                OpSelectionMerge(false_label, spv::SelectionControlMask::MaskNone);
+                OpBranchConditional(condition, true_label, false_label);
+
+                AddLabel(true_label);
+                CallSubroutine(call_sub);
+                AddLabel(false_label);
+            }
+
+            if (instr.opcode.Value() == OpCode::Id::CALL &&
+                call_sub.exit_method == ExitMethod::AlwaysEnd) {
+                offset = PROGRAM_END - 1;
+            }
+
+            break;
+        }
+
+        case OpCode::Id::NOP: {
+            break;
+        }
+
+        case OpCode::Id::IFC:
+        case OpCode::Id::IFU: {
+            Id condition{};
+            if (instr.opcode.Value() == OpCode::Id::IFC) {
+                condition = EvaluateCondition(instr.flow_control);
+            } else {
+                condition = GetUniformBool(instr.flow_control.bool_uniform_id);
+            }
+
+            const u32 if_offset = offset + 1;
+            const u32 else_offset = instr.flow_control.dest_offset;
+            const u32 endif_offset =
+                instr.flow_control.dest_offset + instr.flow_control.num_instructions;
+
+            const Id merge_block{OpLabel()};
+            const Id true_label{OpLabel()};
+            const Id false_label{OpLabel()};
+
+            OpSelectionMerge(merge_block, spv::SelectionControlMask::MaskNone);
+            OpBranchConditional(condition, true_label, false_label);
+
+            AddLabel(true_label);
+
+            auto& if_sub = GetSubroutine(if_offset, else_offset);
+            CallSubroutine(if_sub);
+            offset = else_offset - 1;
+
+            OpBranch(merge_block);
+            AddLabel(false_label);
+            if (instr.flow_control.num_instructions != 0) {
+                auto& else_sub = GetSubroutine(else_offset, endif_offset);
+                CallSubroutine(else_sub);
+                offset = endif_offset - 1;
+
+                if (if_sub.exit_method == ExitMethod::AlwaysEnd &&
+                    else_sub.exit_method == ExitMethod::AlwaysEnd) {
+                    offset = PROGRAM_END - 1;
+                }
+            }
+
+            OpBranch(merge_block);
+            AddLabel(merge_block);
+            break;
+        }
+
+        case OpCode::Id::LOOP: {
+            const Id int_uniform{GetVsUniformMember(uvec_ids.Get(4), ConstS32(1),
+                                 ConstS32(static_cast<s32>(instr.flow_control.int_uniform_id.Value())))};
+            const Id int_x{OpCompositeExtract(u32_id, int_uniform, 0)};
+            const Id int_y{OpBitcast(i32_id, OpCompositeExtract(u32_id, int_uniform, 1))};
+            const Id int_z{OpBitcast(i32_id, OpCompositeExtract(u32_id, int_uniform, 2))};
+            const Id loop_id{params.vars[params.used_vars++]};
+
+            const Id addr_regs_pointer{TypePointer(spv::StorageClass::Private, i32_id)};
+            const Id addr_regs_z_id{OpAccessChain(addr_regs_pointer, address_registers, ConstU32(2u))};
+            OpStore(addr_regs_z_id, int_y);
+            OpStore(loop_id, ConstU32(0u));
+
+            const Id for_loop_label{OpLabel()};
+            const Id merge_block{OpLabel()};
+            const Id continue_target{OpLabel()};
+            const Id label{OpLabel()};
+
+            OpBranch(for_loop_label);
+            AddLabel(for_loop_label);
+            OpLoopMerge(merge_block, continue_target, spv::LoopControlMask::MaskNone);
+            OpBranch(label);
+            AddLabel(label);
+
+            const Id loop{OpLoad(u32_id, loop_id)};
+            const Id condition{OpULessThanEqual(bool_id, loop, int_x)};
+            const Id true_label{OpLabel()};
+            OpBranchConditional(condition, true_label, merge_block);
+
+            AddLabel(true_label);
+
+            auto& loop_sub = GetSubroutine(offset + 1, instr.flow_control.dest_offset + 1);
+            CallSubroutine(loop_sub);
+            OpBranch(continue_target);
+
+            AddLabel(continue_target);
+            const Id addr_regs_z{OpLoad(i32_id, addr_regs_z_id)};
+            const Id addr_regs_z_inc{OpIAdd(i32_id, addr_regs_z, int_z)};
+            OpStore(addr_regs_z_id, addr_regs_z_inc);
+            OpBranch(for_loop_label);
+
+            AddLabel(merge_block);
+
+            offset = instr.flow_control.dest_offset;
+            if (loop_sub.exit_method == ExitMethod::AlwaysEnd) {
+                offset = PROGRAM_END - 1;
+            }
+
+            break;
+        }
+
+        case OpCode::Id::EMIT:
+        case OpCode::Id::SETEMIT:
+            LOG_ERROR(HW_GPU, "Geometry shader operation detected in vertex shader");
+            break;
+
+        default: {
+            LOG_ERROR(HW_GPU, "Unhandled instruction: 0x{:02x} ({}): 0x{:08x}",
+                      (int)instr.opcode.Value().EffectiveOpCode(),
+                      instr.opcode.Value().GetInfo().name, instr.hex);
+            throw DecompileFail("Unhandled instruction");
+            break;
+        }
+        }
+
+        break;
+    }
+    }
+    return offset + 1;
+}
+
+u32 VertexModule::CompileRange(u32 begin, u32 end) {
+    u32 program_counter;
+    for (program_counter = begin; program_counter < (begin > end ? PROGRAM_END : end);) {
+        program_counter = CompileInstr(program_counter);
+    }
+    return program_counter;
+}
+
+void VertexModule::Generate() {
+    // Add declarations for all subroutines
+    for (const Subroutine& subroutine : subroutines) {
+        subroutine.function = OpFunction();
+    }
+
+    // Add definitions for all subroutines
+    const Id func_type{TypeFunction(bool_id)};
+    for (const Subroutine& subroutine : subroutines) {
+        const Id function{subroutine.function};
+        AddFunction(bool_id, function, spv::FunctionControlMask::MaskNone, func_type);
+        AddLabel(OpLabel());
+
+        // Define a list of variables that can be used for LOOP
+        for (Id& var : params.vars) {
+            var = DefineVar<false>(u32_id, spv::StorageClass::Function);
+        }
+
+        std::set<u32> labels = subroutine.labels;
+        if (labels.empty()) {
+            if (CompileRange(subroutine.begin, subroutine.end) != PROGRAM_END) {
+                OpReturnValue(ConstantFalse(bool_id));
+                OpFunctionEnd();
+            }
+        } else {
+            labels.insert(subroutine.begin);
+
+            const Id jmp_to_id{DefineVar<false>(u32_id, spv::StorageClass::Function)};
+            OpStore(jmp_to_id, ConstU32(subroutine.begin));
+
+            const Id while_label{OpLabel()};
+            const Id while_merge_block{OpLabel()};
+            const Id while_continue_block{OpLabel()};
+            const Id switch_label{OpLabel()};
+            const Id switch_merge_block{OpLabel()};
+
+            const Id jmp_to{OpLoad(u32_id, jmp_to_id)};
+            const Id default_label{OpLabel()};
+
+            // Define the while loop header
+            OpBranch(while_label);
+            AddLabel(while_label);
+            OpLoopMerge(while_merge_block, while_continue_block, spv::LoopControlMask::MaskNone);
+            OpBranch(switch_label);
+
+            // Define the switch statement header
+            AddLabel(switch_label);
+            OpSelectionMerge(switch_merge_block, spv::SelectionControlMask::MaskNone);
+
+            // Generate spirv labels for all switch targets
+            boost::container::small_vector<Sirit::Literal, 8> spv_literals;
+            boost::container::small_vector<Id, 8> spv_labels;
+            for (u32 label : labels) {
+                spv_labels.push_back(OpLabel());
+                spv_literals.push_back(label);
+            }
+
+            OpSwitch(jmp_to, default_label, spv_literals, spv_labels);
+
+            params = SpirvParams{
+                .jmp_to = jmp_to_id,
+                .while_label = while_label,
+                .switch_label = switch_label,
+                .switch_merge_block = switch_merge_block,
+            };
+
+            for (auto it = labels.begin(); it != labels.end(); it++) {
+                u32 label = *it;
+                u32 index = std::distance(labels.begin(), it);
+                AddLabel(spv_labels[index]);
+
+                auto next_it = labels.lower_bound(label + 1);
+                u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+
+                u32 compile_end = CompileRange(label, next_label);
+                if (compile_end > next_label && compile_end != PROGRAM_END) {
+                    ASSERT_MSG(false, "Unimplemented jump label stuff");
+                    // This happens only when there is a label inside a IF/LOOP block
+                    //OpStore(jmp_to_id, ConstU32(compile_end));
+                    //OpBranch(switch_merge_block);
+                    //labels.emplace(compile_end);
+                }
+
+                Id next_spv_label{};
+                if (next_label == subroutine.end) {
+                    next_spv_label = default_label;
+                } else {
+                    u32 next_index = std::distance(labels.begin(), next_it);
+                    next_spv_label = spv_labels[next_index];
+                }
+
+                if (compile_end != PROGRAM_END) {
+                    OpBranch(next_spv_label);
+                }
+            }
+
+            AddLabel(switch_merge_block);
+            OpBranch(while_continue_block);
+
+            AddLabel(default_label);
+            OpBranch(while_merge_block);
+
+            AddLabel(while_continue_block);
+            OpBranch(while_label);
+
+            AddLabel(while_merge_block);
+            OpReturnValue(ConstBool(false));
+            OpFunctionEnd();
+        }
+    }
+
+    // Define the shader execution entry subroutine
+    const Id exec_shader{OpFunction(bool_id, spv::FunctionControlMask::MaskNone, func_type)};
+    AddLabel();
+
+    // Call main subroutine
+    CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
+
+    // Add the main entry point
+    DefineEntryPoint();
+
+    // Initialize registers
+    OpStore(conditional_code, ConstBool(false, false));
+    OpStore(address_registers, ConstS32(0, 0, 0));
+    for (int i = 0; i < 16; ++i) {
+        OpStore(tmp_regs[i], ConstF32(0.f, 0.f, 0.f, 1.f));
+    }
+
+    // Call exec_shader
+    OpFunctionCall(bool_id, exec_shader);
+
+    OpReturn();
+    OpFunctionEnd();
+}
+
+void VertexModule::DefineArithmeticTypes() {
+    void_id = Name(TypeVoid(), "void_id");
+    bool_id = Name(TypeBool(), "bool_id");
+    f32_id = Name(TypeFloat(32), "f32_id");
+    i32_id = Name(TypeSInt(32), "i32_id");
+    u32_id = Name(TypeUInt(32), "u32_id");
+
+    for (u32 size = 2; size <= 4; size++) {
+        const u32 i = size - 2;
+        vec_ids.ids[i] = Name(TypeVector(f32_id, size), fmt::format("vec{}_id", size));
+        ivec_ids.ids[i] = Name(TypeVector(i32_id, size), fmt::format("ivec{}_id", size));
+        uvec_ids.ids[i] = Name(TypeVector(u32_id, size), fmt::format("uvec{}_id", size));
+        bvec_ids.ids[i] = Name(TypeVector(bool_id, size), fmt::format("bvec{}_id", size));
+    }
+}
+
+void VertexModule::DefineEntryPoint() {
+    AddCapability(spv::Capability::Shader);
+    SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
+
+    const Id main_type{TypeFunction(TypeVoid())};
+    const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
+    AddLabel();
+
+    boost::container::small_vector<Id, 32> interfaces;
+    /*interfaces.push_back(conditional_code);
+    interfaces.push_back(address_registers);
+    interfaces.push_back(vs_uniforms);
+    for (const Id& tmp_reg : tmp_regs) {
+        interfaces.push_back(tmp_reg);
+    }*/
+    for (size_t i = 0; i < input_typed_regs.size(); i++) {
+        if (used_regs[i]) {
+            ASSERT(Sirit::ValidId(input_typed_regs[i]));
+            interfaces.push_back(input_typed_regs[i]);
+        }
+    }
+    for (u32 i = 0; i < config.state.num_outputs; ++i) {
+        interfaces.push_back(output_regs[i]);
+    }
+
+    AddEntryPoint(spv::ExecutionModel::Vertex, main_func, "main", interfaces);
+}
+
+void VertexModule::DefineUniformStructs() {
+    // glslang uses uint for representing bools
+    const Id barray{TypeArray(u32_id, ConstU32(16u))};
+    const Id iarray{TypeArray(uvec_ids.Get(4), ConstU32(4u))};
+    const Id farray{TypeArray(vec_ids.Get(4), ConstU32(96u))};
+    const Id vs_config_id{TypeStruct(barray, iarray, farray)};
+    constexpr std::array vs_config_offsets{0u, 256u, 320u};
+
+    Decorate(vs_config_id, spv::Decoration::Block);
+    Decorate(barray, spv::Decoration::ArrayStride, 16);
+    Decorate(iarray, spv::Decoration::ArrayStride, 16);
+    Decorate(farray, spv::Decoration::ArrayStride, 16);
+    for (u32 i = 0; i < static_cast<u32>(vs_config_offsets.size()); i++) {
+        MemberDecorate(vs_config_id, i, spv::Decoration::Offset, vs_config_offsets[i]);
+    }
+
+    vs_uniforms = AddGlobalVariable(TypePointer(spv::StorageClass::Uniform, vs_config_id),
+                                    spv::StorageClass::Uniform);
+    Decorate(vs_uniforms, spv::Decoration::DescriptorSet, 0);
+    Decorate(vs_uniforms, spv::Decoration::Binding, 0);
+}
+
+void VertexModule::DefineInterface() {
+    // Add declarations for registers
+    conditional_code = DefineVar(bvec_ids.Get(2), spv::StorageClass::Private);
+    address_registers = DefineVar(ivec_ids.Get(3), spv::StorageClass::Private);
+    for (std::size_t i = 0; i < tmp_regs.size(); i++) {
+        tmp_regs[i] = DefineVar(vec_ids.Get(4), spv::StorageClass::Private);
+    }
+    for (u32 i = 0; i < config.state.num_outputs; ++i) {
+        output_regs[i] = DefineOutput(vec_ids.Get(4), i);
+    }
+}
+
+std::optional<std::vector<u32>> GenerateVertexShaderSPV(const Pica::Shader::ShaderSetup& setup,
+                                                        const PicaVSConfig& config) {
+    try {
+        VertexModule module(setup, config);
+        module.Generate();
+        const std::vector<u32> code = module.Assemble();
+
+        FileUtil::IOFile file{fmt::format("vert{}.spv", i++), "wb"};
+        file.WriteBytes(code.data(), code.size() * sizeof(u32));
+        file.Flush();
+        file.Close();
+
+        return code;
+    } catch (const DecompileFail& exception) {
+        LOG_INFO(HW_GPU, "Shader decompilation failed: {}", exception.what());
+        return std::nullopt;
+    }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 3e9ffdb9c..63146e712 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -1,11 +1,351 @@
-#ifndef VK_SHADER_DECOMPILER_H
-#define VK_SHADER_DECOMPILER_H
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
 
+#pragma once
 
-class vk_shader_decompiler
-{
+#include <exception>
+#include <map>
+#include <set>
+#include <optional>
+#include <sirit/sirit.h>
+#include <nihstro/shader_bytecode.h>
+#include "video_core/renderer_vulkan/vk_shader_gen.h"
+
+namespace Vulkan {
+
+using Sirit::Id;
+
+constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH;
+
+class DecompileFail : public std::runtime_error {
 public:
-    vk_shader_decompiler();
+    using std::runtime_error::runtime_error;
 };
 
-#endif // VK_SHADER_DECOMPILER_H
+/// Describes the behaviour of code path of a given entry point and a return point.
+enum class ExitMethod {
+    Undetermined, ///< Internal value. Only occur when analyzing JMP loop.
+    AlwaysReturn, ///< All code paths reach the return point.
+    Conditional,  ///< Code path reaches the return point or an END instruction conditionally.
+    AlwaysEnd,    ///< All code paths reach a END instruction.
+};
+
+/// A label is an offset into the code assigned to the SPIR-V lavel
+struct Label {
+    u32 label;
+    mutable Id spv_label;
+
+    Label operator+(u32 other) const {
+        return Label{.label = label + other, .spv_label = spv_label};
+    }
+
+    bool operator<(const Label& other) const {
+        return label < other.label;
+    }
+};
+
+struct SpirvParams {
+    Id jmp_to;              ///< Temporary holding the current jump target
+    Id while_label;         ///< Label to the beginning of the while loop
+    Id switch_label;        ///< Label to the beginning of the switch statement
+    Id switch_merge_block;  ///< Label to the merge block of the switch statement
+    std::array<Id, 3> vars; ///< Available function variables used for LOOP
+    u32 used_vars = 0;
+};
+
+/// A subroutine is a range of code refereced by a CALL, IF or LOOP instruction.
+struct Subroutine {
+    u32 begin;              ///< Entry point of the subroutine.
+    u32 end;                ///< Return point of the subroutine.
+    ExitMethod exit_method; ///< Exit method of the subroutine.
+    std::set<u32> labels;   ///< Addresses refereced by JMP instructions.
+    mutable Id function;    ///< Function label of the subroutine
+
+    bool operator<(const Subroutine& rhs) const {
+        return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
+    }
+};
+
+/// Analyzes shader code and produces a set of subroutines.
+class ControlFlowAnalyzer {
+public:
+    ControlFlowAnalyzer(const Pica::Shader::ProgramCode& program_code, u32 main_offset);
+
+    [[nodiscard]] std::set<Subroutine> MoveSubroutines() {
+        return std::move(subroutines);
+    }
+
+private:
+    /// Adds and analyzes a new subroutine if it is not added yet.
+    const Subroutine& AddSubroutine(u32 begin, u32 end);
+
+    /// Merges exit method of two parallel branches.
+    ExitMethod ParallelExit(ExitMethod a, ExitMethod b);
+
+    /// Cascades exit method of two blocks of code.
+    ExitMethod SeriesExit(ExitMethod a, ExitMethod b);
+
+    /// Scans a range of code for labels and determines the exit method.
+    ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels);
+
+private:
+    const Pica::Shader::ProgramCode& program_code;
+    std::set<Subroutine> subroutines;
+    std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
+};
+
+class VertexModule : public Sirit::Module {
+    struct VectorIds {
+        /// Returns the type id of the vector with the provided size
+        [[nodiscard]] constexpr Id Get(u32 size) const {
+            return ids[size - 2];
+        }
+
+        std::array<Id, 3> ids;
+    };
+
+public:
+    VertexModule(const Pica::Shader::ShaderSetup& setup,
+                 const PicaVSConfig& config);
+    ~VertexModule();
+
+    void Generate();
+
+private:
+    /// Gets the Subroutine object corresponding to the specified address.
+    const Subroutine& GetSubroutine(u32 begin, u32 end) const;
+
+    /// Generates code to evaluate a shader control flow instruction
+    Id EvaluateCondition(nihstro::Instruction::FlowControlType flow_control);
+
+    /// Generates code representing a source register.
+    Id GetSourceRegister(const SourceRegister& source_reg, u32 address_register_index);
+
+    /// Generates code representing a destination register.
+    Id GetDestRegister(const DestRegister& dest_reg);
+
+    /// Returns the pointer type of the destination register.
+    Id GetDestPointer(const DestRegister& dest_reg);
+
+    /// Attemps to sanitize multiplication result to match PICA expected behaviour.
+    Id SanitizeMul(Id lhs, Id rhs);
+
+    /**
+     * Adds code that calls a subroutine.
+     * @param subroutine the subroutine to call.
+     */
+    void CallSubroutine(const Subroutine& subroutine);
+
+    /**
+     * Writes code that does an assignment operation.
+     * @param swizzle the swizzle data of the current instruction.
+     * @param reg the destination register code.
+     * @param value the code representing the value to assign.
+     * @param storage_class storage specifier of reg.
+     * @param value_num_components number of components of the value to assign.
+     */
+    void SetDest(const nihstro::SwizzlePattern& swizzle, Id reg, Id value,
+                 Id reg_pointer, u32 dest_num_components, u32 value_num_components);
+
+    /**
+     * Compiles a single instruction from PICA to GLSL.
+     * @param offset the offset of the PICA shader instruction.
+     * @return the offset of the next instruction to execute. Usually it is the current offset + 1.
+     * If the current instruction is IF or LOOP, the next instruction is after the IF or LOOP block.
+     * If the current instruction always terminates the program, returns PROGRAM_END.
+     */
+    u32 CompileInstr(u32 offset);
+
+    /**
+     * Compiles a range of instructions from PICA to GLSL.
+     * @param begin the offset of the starting instruction.
+     * @param end the offset where the compilation should stop (exclusive).
+     * @return the offset of the next instruction to compile. PROGRAM_END if the program terminates.
+     */
+    u32 CompileRange(u32 begin, u32 end);
+
+private:
+    /// Returns an id of the attribute type
+    Id AttribType(u32 index) const {
+        switch (config.state.attrib_types[index]) {
+        case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
+            return vec_ids.Get(4);
+        case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
+        case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
+            return ivec_ids.Get(4);
+        case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
+            return uvec_ids.Get(4);
+        default:
+            UNREACHABLE();
+        }
+        return Id{};
+    }
+
+    /// Returns the attribute casted to float
+    Id AttribCast(u32 index, Id typed_reg) {
+        switch (config.state.attrib_types[index]) {
+        case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
+            break;
+        case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
+        case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
+            return OpConvertSToF(ivec_ids.Get(4), typed_reg);
+        case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
+            return OpConvertUToF(uvec_ids.Get(4), typed_reg);
+        default:
+            UNREACHABLE();
+        }
+        return typed_reg;
+    }
+
+    /// Loads the member specified from the vs_uniforms uniform struct
+    template <typename... Ids>
+    [[nodiscard]] Id GetVsUniformMember(Id type, Ids... ids) {
+        const Id uniform_ptr{TypePointer(spv::StorageClass::Uniform, type)};
+        return OpLoad(type, OpAccessChain(uniform_ptr, vs_uniforms, ids...));
+    }
+
+    /// Generates code representing a bool uniform
+    Id GetUniformBool(u32 index) {
+        const Id value{GetVsUniformMember(u32_id, ConstU32(0u), ConstU32(index))};
+        return OpINotEqual(bool_id, value, ConstU32(0u));
+    }
+
+    /// Defines a input variable
+    [[nodiscard]] Id DefineInput(Id type, u32 location) {
+        const Id input_id{DefineVar(type, spv::StorageClass::Input)};
+        Decorate(input_id, spv::Decoration::Location, location);
+        return input_id;
+    }
+
+    /// Defines a input variable
+    [[nodiscard]] Id DefineOutput(Id type, u32 location) {
+        const Id output_id{DefineVar(type, spv::StorageClass::Output)};
+        Decorate(output_id, spv::Decoration::Location, location);
+        return output_id;
+    }
+
+    /// Defines a uniform constant variable
+    [[nodiscard]] Id DefineUniformConst(Id type, u32 set, u32 binding, bool readonly = false) {
+        const Id uniform_id{DefineVar(type, spv::StorageClass::UniformConstant)};
+        Decorate(uniform_id, spv::Decoration::DescriptorSet, set);
+        Decorate(uniform_id, spv::Decoration::Binding, binding);
+        if (readonly) {
+            Decorate(uniform_id, spv::Decoration::NonWritable);
+        }
+        return uniform_id;
+    }
+
+    template <bool global = true>
+    [[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
+        const Id pointer_type_id{TypePointer(storage_class, type)};
+        return global ? AddGlobalVariable(pointer_type_id, storage_class)
+                      : AddLocalVariable(pointer_type_id, storage_class);
+    }
+
+    /// Returns the id of a signed integer constant of value
+    [[nodiscard]] Id ConstBool(bool value) {
+        return value ? ConstantTrue(bool_id) : ConstantFalse(bool_id);
+    }
+
+    template <typename... Args>
+    [[nodiscard]] Id ConstBool(Args&&... values) {
+        constexpr u32 size = static_cast<u32>(sizeof...(values));
+        static_assert(size >= 2);
+        const std::array constituents{ConstBool(values)...};
+        const Id type = size <= 4 ? bvec_ids.Get(size) : TypeArray(bool_id, ConstU32(size));
+        return ConstantComposite(type, constituents);
+    }
+
+    /// Returns the id of a signed integer constant of value
+    [[nodiscard]] Id ConstU32(u32 value) {
+        return Constant(u32_id, value);
+    }
+
+    template <typename... Args>
+    [[nodiscard]] Id ConstU32(Args&&... values) {
+        constexpr u32 size = static_cast<u32>(sizeof...(values));
+        static_assert(size >= 2);
+        const std::array constituents{Constant(u32_id, values)...};
+        const Id type = size <= 4 ? uvec_ids.Get(size) : TypeArray(u32_id, ConstU32(size));
+        return ConstantComposite(type, constituents);
+    }
+
+    /// Returns the id of a signed integer constant of value
+    [[nodiscard]] Id ConstS32(s32 value) {
+        return Constant(i32_id, value);
+    }
+
+    template <typename... Args>
+    [[nodiscard]] Id ConstS32(Args&&... values) {
+        constexpr u32 size = static_cast<u32>(sizeof...(values));
+        static_assert(size >= 2);
+        const std::array constituents{Constant(i32_id, values)...};
+        const Id type = size <= 4 ? ivec_ids.Get(size) : TypeArray(i32_id, ConstU32(size));
+        return ConstantComposite(type, constituents);
+    }
+
+    /// Returns the id of a float constant of value
+    [[nodiscard]] Id ConstF32(f32 value) {
+        return Constant(f32_id, value);
+    }
+
+    template <typename... Args>
+    [[nodiscard]] Id ConstF32(Args... values) {
+        constexpr u32 size = static_cast<u32>(sizeof...(values));
+        static_assert(size >= 2);
+        const std::array constituents{Constant(f32_id, values)...};
+        const Id type = size <= 4 ? vec_ids.Get(size) : TypeArray(f32_id, ConstU32(size));
+        return ConstantComposite(type, constituents);
+    }
+
+    void DefineArithmeticTypes();
+    void DefineEntryPoint();
+    void DefineUniformStructs();
+    void DefineInterface();
+
+public:
+    Id void_id{};
+    Id bool_id{};
+    Id f32_id{};
+    Id i32_id{};
+    Id u32_id{};
+
+    VectorIds vec_ids{};
+    VectorIds ivec_ids{};
+    VectorIds uvec_ids{};
+    VectorIds bvec_ids{};
+
+private:
+    const PicaVSConfig& config;
+    const Pica::Shader::ProgramCode& program_code;
+    const Pica::Shader::SwizzleData& swizzle_data;
+    u32 main_offset;
+    bool sanitize_mul;
+    std::set<Subroutine> subroutines;
+
+    /**
+     * PICA input registers are float but vulkan doesn't have the
+     * ability to cast integer attributes to float. Thus they are
+     * manually cast if needed
+     **/
+    std::array<Id, 16> input_typed_regs{};
+    std::array<Id, 16> input_regs{};
+    std::array<bool, 16> used_regs{};
+    std::array<Id, 16> output_regs{};
+    std::array<Id, 16> tmp_regs{};
+
+    Id vs_uniforms{};
+    Id conditional_code{};
+    Id address_registers{};
+};
+
+/**
+ * Generates the SPIRV vertex shader program source code for the given VS program
+ * @returns String of the shader source code; boost::none on failure
+ */
+std::optional<std::vector<u32>> GenerateVertexShaderSPV(const Pica::Shader::ShaderSetup& setup,
+                                         const PicaVSConfig& config);
+
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen.cpp b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
index 86da7715b..ca83c972e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
@@ -1676,10 +1676,11 @@ std::optional<std::string> GenerateVertexShader(const Pica::Shader::ShaderSetup&
     std::string& program_source = program_source_opt->code;
 
     out += R"(
-#define uniforms vs_uniforms
 layout (set = 0, binding = 0, std140) uniform vs_config {
-    pica_uniforms uniforms;
-};
+    bool b[16];
+    uvec4 i[4];
+    vec4 f[96];
+} uniforms;
 
 )";
     if (!config.state.use_geometry_shader) {
@@ -1824,7 +1825,6 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
 
     out += program_source;
 
-    LOG_INFO(Render_Vulkan, "{}", out);
     return out;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
index 715dcf91f..ed966f553 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
@@ -2,11 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include "common/microprofile.h"
 #include "core/core.h"
-#include "video_core/regs.h"
 #include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
-#include "video_core/shader/shader_uniforms.h"
 
 using Pica::FramebufferRegs;
 using Pica::LightingRegs;
diff --git a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
index e66d87f85..ed182c072 100644
--- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
@@ -12,20 +12,20 @@ namespace Vulkan {
 
 using Sirit::Id;
 
-struct VectorIds {
-    /// Returns the type id of the vector with the provided size
-    [[nodiscard]] constexpr Id Get(u32 size) const {
-        return ids[size - 2];
-    }
-
-    std::array<Id, 3> ids;
-};
-
 class FragmentModule : public Sirit::Module {
     static constexpr u32 NUM_TEV_STAGES = 6;
     static constexpr u32 NUM_LIGHTS = 8;
     static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
 
+    struct VectorIds {
+        /// Returns the type id of the vector with the provided size
+        [[nodiscard]] constexpr Id Get(u32 size) const {
+            return ids[size - 2];
+        }
+
+        std::array<Id, 3> ids;
+    };
+
 public:
     FragmentModule(const PicaFSConfig& config);
     ~FragmentModule();
diff --git a/src/video_core/shader/shader_cache.h b/src/video_core/shader/shader_cache.h
index 16d6bcf82..068b6a57c 100644
--- a/src/video_core/shader/shader_cache.h
+++ b/src/video_core/shader/shader_cache.h
@@ -11,8 +11,8 @@
 
 namespace Pica::Shader {
 
-template <typename ShaderType>
-using ShaderCacheResult = std::pair<ShaderType, std::optional<std::string>>;
+template <typename ShaderType, typename ShaderBinary>
+using ShaderCacheResult = std::pair<ShaderType, std::optional<ShaderBinary>>;
 
 template <typename KeyType, typename ShaderType, auto ModuleCompiler, auto CodeGenerator>
 class ShaderCache {
@@ -50,7 +50,8 @@ public:
  * program buffer from the previous shader, which is hashed into the config, resulting several
  * different config values from the same shader program.
  */
-template <typename KeyType, typename ShaderType, auto ModuleCompiler, auto CodeGenerator>
+template <typename KeyType, typename ShaderType, typename ShaderBinary,
+          auto ModuleCompiler, auto CodeGenerator>
 class ShaderDoubleCache {
 public:
     ShaderDoubleCache() = default;
@@ -58,7 +59,7 @@ public:
 
     template <typename... Args>
     auto Get(const KeyType& key, const Pica::Shader::ShaderSetup& setup, Args&&... args)
-        -> ShaderCacheResult<ShaderType> {
+        -> ShaderCacheResult<ShaderType, ShaderBinary> {
         if (auto map_iter = shader_map.find(key); map_iter == shader_map.end()) {
             auto code = CodeGenerator(setup, key);
             if (!code) {
@@ -66,7 +67,7 @@ public:
                 return std::make_pair(ShaderType{}, std::nullopt);
             }
 
-            std::string& program = code.value();
+            const ShaderBinary& program = code.value();
             auto [iter, new_shader] = shader_cache.emplace(program, ShaderType{});
             auto& shader = iter->second;
 
@@ -81,7 +82,7 @@ public:
         }
     }
 
-    void Inject(const KeyType& key, std::string decomp, ShaderType&& program) {
+    void Inject(const KeyType& key, ShaderBinary&& decomp, ShaderType&& program) {
         const auto iter = shader_cache.emplace(std::move(decomp), std::move(program)).first;
 
         auto& cached_shader = iter->second;
@@ -90,7 +91,16 @@ public:
 
 public:
     std::unordered_map<KeyType, ShaderType*> shader_map;
-    std::unordered_map<std::string, ShaderType> shader_cache;
+    std::unordered_map<ShaderBinary, ShaderType> shader_cache;
 };
 
 } // namespace Pica::Shader
+
+namespace std {
+template <>
+struct hash<std::vector<u32>> {
+    std::size_t operator()(const std::vector<u32>& code) const noexcept {
+        return Common::ComputeHash64(code.data(), code.size() * sizeof(u32));
+    }
+};
+} // namespace std