renderer_vulkan: Scheduler and presentation rewrite

* This commit ports yuzu's async scheduler replacing our older and crummier version Commands are recorded by the scheduler and processed by a separate worker thread * Shader compilation and queue submission are also moved to that thread to reduce stutters
externals: Update vulkan-headers
2022-10-23 18:15:03 +03:00 · 2022-10-21 18:09:47 +03:00 · 2022-10-21 18:07:38 +03:00 · 2022-10-19 20:42:33 +03:00 · 2022-10-19 20:41:18 +03:00 · 2022-10-19 20:39:33 +03:00
106 changed files with 2452 additions and 5109 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -95,13 +95,6 @@ jobs:
        env:
          MACOSX_DEPLOYMENT_TARGET: "10.13"
          ENABLE_COMPATIBILITY_REPORTING: "ON"
-      - name: Pack
-        run: ./.ci/macos/upload.sh
-      - name: Upload
-        uses: actions/upload-artifact@v3
-        with:
-          name: macos
-          path: artifacts/
  windows:
    runs-on: windows-latest
    steps:
--- a/.gitmodules
+++ b/.gitmodules
@ -67,6 +67,3 @@
 [submodule "glm"]
 	path = externals/glm
 	url = https://github.com/g-truc/glm
-[submodule "sirit"]
-	path = externals/sirit
-	url = https://github.com/GPUCode/sirit
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -138,13 +138,13 @@ if (NOT ENABLE_GENERIC)
    if (MSVC)
        detect_architecture("_M_AMD64" x86_64)
        detect_architecture("_M_IX86" x86)
-        detect_architecture("_M_ARM" arm)
-        detect_architecture("_M_ARM64" arm64)
+        detect_architecture("_M_ARM" ARM)
+        detect_architecture("_M_ARM64" ARM64)
    else()
        detect_architecture("__x86_64__" x86_64)
        detect_architecture("__i386__" x86)
-        detect_architecture("__arm__" arm)
-        detect_architecture("__aarch64__" arm64)
+        detect_architecture("__arm__" ARM)
+        detect_architecture("__aarch64__" ARM64)
    endif()
 endif()
 if (NOT DEFINED ARCHITECTURE)
--- a/externals/CMakeLists.txt
+++ b/externals/CMakeLists.txt
@ -31,27 +31,24 @@ add_subdirectory(catch2)
 # Crypto++
 add_subdirectory(cryptopp)

-# fmt and Xbyak need to be added before dynarmic
-# libfmt
-add_subdirectory(fmt)
-
 # Xbyak
 if (ARCHITECTURE_x86_64)
    add_library(xbyak INTERFACE)
-    file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
-    file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/xbyak/xbyak DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
-    target_include_directories(xbyak SYSTEM INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/xbyak/include)
+    target_include_directories(xbyak SYSTEM INTERFACE ./xbyak/xbyak)
    target_compile_definitions(xbyak INTERFACE XBYAK_NO_OP_NAMES)
 endif()

 # Dynarmic
-if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
+if (ARCHITECTURE_x86_64 OR ARCHITECTURE_ARM64)
    set(DYNARMIC_TESTS OFF)
    set(DYNARMIC_NO_BUNDLED_FMT ON)
    set(DYNARMIC_FRONTENDS "A32")
    add_subdirectory(dynarmic)
 endif()

+# libfmt
+add_subdirectory(fmt)
+
 # getopt
 if (MSVC)
    add_subdirectory(getopt)
@ -67,9 +64,6 @@ set(ENABLE_SPVREMAPPER OFF)
 set(ENABLE_CTEST OFF)
 add_subdirectory(glslang)

-# Sirit
-add_subdirectory(sirit)
-
 # glm
 add_subdirectory(glm)

--- a/externals/dynarmic
+++ b/externals/dynarmic
--- a/externals/sirit
+++ b/externals/sirit
--- a/externals/xbyak
+++ b/externals/xbyak
--- a/src/android/app/src/main/AndroidManifest.xml
+++ b/src/android/app/src/main/AndroidManifest.xml
@ -16,7 +16,6 @@
        android:required="false" />

    <uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
-    <uses-permission android:name="android.permission.INTERNET" />
    <uses-permission android:name="android.permission.CAMERA" />
    <uses-permission android:name="android.permission.RECORD_AUDIO" />
    <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
--- a/src/android/build.gradle
+++ b/src/android/build.gradle
@ -7,7 +7,7 @@ buildscript {
        jcenter()
    }
    dependencies {
-        classpath 'com.android.tools.build:gradle:7.3.1'
+        classpath 'com.android.tools.build:gradle:7.2.0'

        // NOTE: Do not place your application dependencies here; they belong
        // in the individual module build.gradle files
--- a/src/citra/config.cpp
+++ b/src/citra/config.cpp
@ -204,15 +204,14 @@ void Config::ReadValues() {
    Settings::values.use_virtual_sd =
        sdl2_config->GetBoolean("Data Storage", "use_virtual_sd", true);

-    Settings::values.use_custom_storage =
-        sdl2_config->GetBoolean("Data Storage", "use_custom_storage", false);
-
-    if (Settings::values.use_custom_storage) {
-        FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir,
-                                 sdl2_config->GetString("Data Storage", "nand_directory", ""));
-        FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir,
-                                 sdl2_config->GetString("Data Storage", "sdmc_directory", ""));
-    }
+    const std::string default_nand_dir = FileUtil::GetDefaultUserPath(FileUtil::UserPath::NANDDir);
+    FileUtil::UpdateUserPath(
+        FileUtil::UserPath::NANDDir,
+        sdl2_config->GetString("Data Storage", "nand_directory", default_nand_dir));
+    const std::string default_sdmc_dir = FileUtil::GetDefaultUserPath(FileUtil::UserPath::SDMCDir);
+    FileUtil::UpdateUserPath(
+        FileUtil::UserPath::SDMCDir,
+        sdl2_config->GetString("Data Storage", "sdmc_directory", default_sdmc_dir));

    // System
    Settings::values.is_new_3ds = sdl2_config->GetBoolean("System", "is_new_3ds", true);
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@ -250,10 +250,6 @@ volume =
 # 1 (default): Yes, 0: No
 use_virtual_sd =

-# Whether to use custom storage locations
-# 1: Yes, 0 (default): No
-use_custom_storage =
-
 # The path of the virtual SD card directory.
 # empty (default) will use the user_path
 sdmc_directory =
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@ -320,8 +320,6 @@ static Frontend::WindowSystemType GetWindowSystemType() {
        return Frontend::WindowSystemType::X11;
    else if (platform_name == QStringLiteral("wayland"))
        return Frontend::WindowSystemType::Wayland;
-    else if (platform_name == QStringLiteral("cocoa"))
-        return Frontend::WindowSystemType::MacOS;

    LOG_CRITICAL(Frontend, "Unknown Qt platform!");
    return Frontend::WindowSystemType::Windows;
--- a/src/citra_qt/configuration/config.cpp
+++ b/src/citra_qt/configuration/config.cpp
@ -304,17 +304,21 @@ void Config::ReadDataStorageValues() {

    Settings::values.use_virtual_sd = ReadSetting(QStringLiteral("use_virtual_sd"), true).toBool();

-    Settings::values.use_custom_storage =
-        ReadSetting(QStringLiteral("use_custom_storage"), false).toBool();
    const std::string nand_dir =
-        ReadSetting(QStringLiteral("nand_directory"), QStringLiteral("")).toString().toStdString();
+        ReadSetting(
+            QStringLiteral("nand_directory"),
+            QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::NANDDir)))
+            .toString()
+            .toStdString();
    const std::string sdmc_dir =
-        ReadSetting(QStringLiteral("sdmc_directory"), QStringLiteral("")).toString().toStdString();
+        ReadSetting(
+            QStringLiteral("sdmc_directory"),
+            QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::SDMCDir)))
+            .toString()
+            .toStdString();

-    if (Settings::values.use_custom_storage) {
-        FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir, nand_dir);
-        FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir, sdmc_dir);
-    }
+    FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir, nand_dir);
+    FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir, sdmc_dir);

    qt_config->endGroup();
 }
@ -485,8 +489,6 @@ void Config::ReadRendererValues() {
        ReadSetting(QStringLiteral("graphics_api"), static_cast<u32>(Settings::GraphicsAPI::OpenGL))
            .toUInt());
    Settings::values.physical_device = ReadSetting(QStringLiteral("physical_device"), 0).toUInt();
-    Settings::values.async_command_recording = ReadSetting(QStringLiteral("async_command_recording"), true).toBool();
-    Settings::values.spirv_shader_gen = ReadSetting(QStringLiteral("spirv_shader_gen"), false).toBool();
    Settings::values.use_hw_renderer =
        ReadSetting(QStringLiteral("use_hw_renderer"), true).toBool();
    Settings::values.use_hw_shader = ReadSetting(QStringLiteral("use_hw_shader"), true).toBool();
@ -874,13 +876,12 @@ void Config::SaveDataStorageValues() {
    qt_config->beginGroup(QStringLiteral("Data Storage"));

    WriteSetting(QStringLiteral("use_virtual_sd"), Settings::values.use_virtual_sd, true);
-    WriteSetting(QStringLiteral("use_custom_storage"), Settings::values.use_custom_storage, false);
    WriteSetting(QStringLiteral("nand_directory"),
                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir)),
-                 QStringLiteral(""));
+                 QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::NANDDir)));
    WriteSetting(QStringLiteral("sdmc_directory"),
                 QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir)),
-                 QStringLiteral(""));
+                 QString::fromStdString(FileUtil::GetDefaultUserPath(FileUtil::UserPath::SDMCDir)));

    qt_config->endGroup();
 }
@ -1006,8 +1007,6 @@ void Config::SaveRendererValues() {
    WriteSetting(QStringLiteral("graphics_api"), static_cast<u32>(Settings::values.graphics_api),
                 static_cast<u32>(Settings::GraphicsAPI::OpenGL));
    WriteSetting(QStringLiteral("physical_device"), Settings::values.physical_device, 0);
-    WriteSetting(QStringLiteral("async_command_recording"), Settings::values.async_command_recording, true);
-    WriteSetting(QStringLiteral("spirv_shader_gen"), Settings::values.spirv_shader_gen, false);
    WriteSetting(QStringLiteral("use_hw_renderer"), Settings::values.use_hw_renderer, true);
    WriteSetting(QStringLiteral("use_hw_shader"), Settings::values.use_hw_shader, true);
 #ifdef __APPLE__
--- a/src/citra_qt/configuration/configure_debug.cpp
+++ b/src/citra_qt/configuration/configure_debug.cpp
@ -4,7 +4,6 @@

 #include <QDesktopServices>
 #include <QUrl>
-#include <QMessageBox>
 #include "citra_qt/configuration/configure_debug.h"
 #include "citra_qt/debugger/console.h"
 #include "citra_qt/uisettings.h"
@ -12,9 +11,7 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "core/settings.h"
-#include "qcheckbox.h"
 #include "ui_configure_debug.h"
-#include "video_core/renderer_vulkan/vk_instance.h"

 ConfigureDebug::ConfigureDebug(QWidget* parent)
    : QWidget(parent), ui(std::make_unique<Ui::ConfigureDebug>()) {
@ -26,36 +23,6 @@ ConfigureDebug::ConfigureDebug(QWidget* parent)
        QDesktopServices::openUrl(QUrl::fromLocalFile(path));
    });

-    connect(ui->toggle_renderer_debug, &QCheckBox::clicked, this, [this](bool checked) {
-        if (checked && Settings::values.graphics_api == Settings::GraphicsAPI::Vulkan) {
-            try {
-                Vulkan::Instance debug_inst{true};
-            } catch (vk::LayerNotPresentError& err) {
-                ui->toggle_renderer_debug->toggle();
-                QMessageBox::warning(
-                    this, tr("Validation layer not available"),
-                    tr("Unable to enable debug renderer because the layer "
-                       "<strong>VK_LAYER_KHRONOS_validation</strong> is missing. "
-                       "Please install the Vulkan SDK or the appropriate package of your distribution"));
-            }
-        }
-    });
-
-    connect(ui->toggle_dump_command_buffers, &QCheckBox::clicked, this, [this](bool checked) {
-        if (checked && Settings::values.graphics_api == Settings::GraphicsAPI::Vulkan) {
-            try {
-                Vulkan::Instance debug_inst{false, true};
-            } catch (vk::LayerNotPresentError& err) {
-                ui->toggle_dump_command_buffers->toggle();
-                QMessageBox::warning(
-                    this, tr("Command buffer dumping not available"),
-                    tr("Unable to enable command buffer dumping because the layer "
-                       "<strong>VK_LAYER_LUNARG_api_dump</strong> is missing. "
-                       "Please install the Vulkan SDK or the appropriate package of your distribution"));
-            }
-        }
-    });
-
    const bool is_powered_on = Core::System::GetInstance().IsPoweredOn();
    ui->toggle_cpu_jit->setEnabled(!is_powered_on);
    ui->toggle_renderer_debug->setEnabled(!is_powered_on);
--- a/src/citra_qt/configuration/configure_debug.h
+++ b/src/citra_qt/configuration/configure_debug.h
@ -22,6 +22,5 @@ public:
    void RetranslateUI();
    void SetConfiguration();

-private:
    std::unique_ptr<Ui::ConfigureDebug> ui;
 };
--- a/src/citra_qt/configuration/configure_graphics.cpp
+++ b/src/citra_qt/configuration/configure_graphics.cpp
@ -26,7 +26,6 @@ ConfigureGraphics::ConfigureGraphics(QWidget* parent)
    ui->graphics_api_combo->setEnabled(not_running);
    ui->toggle_shader_jit->setEnabled(not_running);
    ui->toggle_disk_shader_cache->setEnabled(hw_renderer_enabled && not_running);
-    ui->toggle_async_recording->setEnabled(hw_renderer_enabled && not_running);
    ui->physical_device_combo->setEnabled(not_running);
    SetPhysicalDeviceComboVisibility(ui->graphics_api_combo->currentIndex());

@ -84,8 +83,6 @@ void ConfigureGraphics::SetConfiguration() {
    ui->toggle_vsync_new->setChecked(Settings::values.use_vsync_new);
    ui->graphics_api_combo->setCurrentIndex(static_cast<int>(Settings::values.graphics_api));
    ui->physical_device_combo->setCurrentIndex(static_cast<int>(Settings::values.physical_device));
-    ui->toggle_async_recording->setChecked(Settings::values.async_command_recording);
-    ui->spirv_shader_gen->setChecked(Settings::values.spirv_shader_gen);
 }

 void ConfigureGraphics::ApplyConfiguration() {
@ -99,8 +96,6 @@ void ConfigureGraphics::ApplyConfiguration() {
    Settings::values.graphics_api =
        static_cast<Settings::GraphicsAPI>(ui->graphics_api_combo->currentIndex());
    Settings::values.physical_device = static_cast<u16>(ui->physical_device_combo->currentIndex());
-    Settings::values.async_command_recording = ui->toggle_async_recording->isChecked();
-    Settings::values.spirv_shader_gen = ui->spirv_shader_gen->isChecked();
 }

 void ConfigureGraphics::RetranslateUI() {
@ -123,5 +118,4 @@ void ConfigureGraphics::SetPhysicalDeviceComboVisibility(int index) {
    const bool is_visible = graphics_api == Settings::GraphicsAPI::Vulkan;
    ui->physical_device_label->setVisible(is_visible);
    ui->physical_device_combo->setVisible(is_visible);
-    ui->spirv_shader_gen->setVisible(is_visible);
 }
--- a/src/citra_qt/configuration/configure_graphics.ui
+++ b/src/citra_qt/configuration/configure_graphics.ui
@ -7,7 +7,7 @@
    <x>0</x>
    <y>0</y>
    <width>400</width>
-    <height>513</height>
+    <height>430</height>
   </rect>
  </property>
  <property name="minimumSize">
@ -70,13 +70,6 @@
        </item>
       </layout>
      </item>
-      <item>
-       <widget class="QCheckBox" name="spirv_shader_gen">
-        <property name="text">
-         <string>SPIR-V Shader Generation</string>
-        </property>
-       </widget>
-      </item>
     </layout>
    </widget>
   </item>
@ -178,16 +171,6 @@
      <string>Advanced</string>
     </property>
     <layout class="QVBoxLayout" name="verticalLayout_2">
-      <item>
-       <widget class="QCheckBox" name="toggle_async_recording">
-        <property name="toolTip">
-         <string>&lt;html&gt;&lt;head/&gt;&lt;body&gt;&lt;p&gt;Offloads command buffer recording and fragment shader generation to a worker thread. Can improve performance especially on weaker systems. Disable if you notice better performance. If unsure leave it enabled,&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;</string>
-        </property>
-        <property name="text">
-         <string>Async Command Recording</string>
-        </property>
-       </widget>
-      </item>
      <item>
       <widget class="QCheckBox" name="toggle_disk_shader_cache">
        <property name="toolTip">
--- a/src/citra_qt/configuration/configure_storage.cpp
+++ b/src/citra_qt/configuration/configure_storage.cpp
@ -51,42 +51,28 @@ ConfigureStorage::ConfigureStorage(QWidget* parent)
        ApplyConfiguration();
        SetConfiguration();
    });
-    connect(ui->toggle_custom_storage, &QCheckBox::clicked, this, [this]() {
-        ApplyConfiguration();
-        SetConfiguration();
-    });
 }

 ConfigureStorage::~ConfigureStorage() = default;

 void ConfigureStorage::SetConfiguration() {
-    ui->nand_group->setVisible(Settings::values.use_custom_storage);
+    ui->nand_group->setVisible(Settings::values.use_virtual_sd);
    QString nand_path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::NANDDir));
    ui->nand_dir_path->setText(nand_path);
    ui->open_nand_dir->setEnabled(!nand_path.isEmpty());

-    ui->sdmc_group->setVisible(Settings::values.use_virtual_sd &&
-                               Settings::values.use_custom_storage);
+    ui->sdmc_group->setVisible(Settings::values.use_virtual_sd);
    QString sdmc_path = QString::fromStdString(FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir));
    ui->sdmc_dir_path->setText(sdmc_path);
    ui->open_sdmc_dir->setEnabled(!sdmc_path.isEmpty());

    ui->toggle_virtual_sd->setChecked(Settings::values.use_virtual_sd);
-    ui->toggle_custom_storage->setChecked(Settings::values.use_custom_storage);

    ui->storage_group->setEnabled(!Core::System::GetInstance().IsPoweredOn());
 }

 void ConfigureStorage::ApplyConfiguration() {
    Settings::values.use_virtual_sd = ui->toggle_virtual_sd->isChecked();
-    Settings::values.use_custom_storage = ui->toggle_custom_storage->isChecked();
-
-    if (!Settings::values.use_custom_storage) {
-        FileUtil::UpdateUserPath(FileUtil::UserPath::NANDDir,
-                                 GetDefaultUserPath(FileUtil::UserPath::NANDDir));
-        FileUtil::UpdateUserPath(FileUtil::UserPath::SDMCDir,
-                                 GetDefaultUserPath(FileUtil::UserPath::SDMCDir));
-    }
 }

 void ConfigureStorage::RetranslateUI() {
--- a/src/citra_qt/configuration/configure_storage.ui
+++ b/src/citra_qt/configuration/configure_storage.ui
@ -34,147 +34,131 @@
         </layout>
        </item>
        <item>
-         <widget class="QGroupBox" name="custom_storage_group">
+         <widget class="QGroupBox" name="nand_group">
          <property name="title">
-           <string>Custom Storage</string>
+           <string/>
          </property>
-          <layout class="QVBoxLayout" name="verticalLayout_6">
+          <layout class="QVBoxLayout" name="verticalLayout">
           <item>
-            <widget class="QCheckBox" name="toggle_custom_storage">
-             <property name="text">
-              <string>Use Custom Storage</string>
-             </property>
-            </widget>
+            <layout class="QHBoxLayout" name="horizontalLayout">
+             <item>
+              <widget class="QLabel" name="label">
+               <property name="text">
+                <string>NAND Directory</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QLineEdit" name="nand_dir_path">
+               <property name="enabled">
+                <bool>false</bool>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QPushButton" name="open_nand_dir">
+               <property name="text">
+                <string>Open</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
           </item>
           <item>
-            <widget class="QGroupBox" name="nand_group">
-             <property name="title">
-              <string/>
-             </property>
-             <layout class="QVBoxLayout" name="verticalLayout">
-              <item>
-               <layout class="QHBoxLayout" name="horizontalLayout">
-                <item>
-                 <widget class="QLabel" name="label">
-                  <property name="text">
-                   <string>NAND Directory</string>
-                  </property>
-                 </widget>
-                </item>
-                <item>
-                 <widget class="QLineEdit" name="nand_dir_path">
-                  <property name="enabled">
-                   <bool>false</bool>
-                  </property>
-                 </widget>
-                </item>
-                <item>
-                 <widget class="QPushButton" name="open_nand_dir">
-                  <property name="text">
-                   <string>Open</string>
-                  </property>
-                 </widget>
-                </item>
-               </layout>
-              </item>
-              <item>
-               <layout class="QHBoxLayout" name="horizontalLayout_2">
-                <item>
-                 <widget class="QLabel" name="label_4">
-                  <property name="text">
-                   <string>NOTE: This does not move the contents of the previous directory to the new one.</string>
-                  </property>
-                 </widget>
-                </item>
-                <item>
-                 <spacer name="horizontalSpacer_3">
-                  <property name="orientation">
-                   <enum>Qt::Horizontal</enum>
-                  </property>
-                  <property name="sizeHint" stdset="0">
-                   <size>
-                    <width>40</width>
-                    <height>20</height>
-                   </size>
-                  </property>
-                 </spacer>
-                </item>
-                <item>
-                 <widget class="QPushButton" name="change_nand_dir">
-                  <property name="text">
-                   <string>Change</string>
-                  </property>
-                 </widget>
-                </item>
-               </layout>
-              </item>
-             </layout>
-            </widget>
+            <layout class="QHBoxLayout" name="horizontalLayout_2">
+             <item>
+              <widget class="QLabel" name="label_4">
+               <property name="text">
+                <string>NOTE: This does not move the contents of the previous directory to the new one.</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="horizontalSpacer_3">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>40</width>
+                 <height>20</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+             <item>
+              <widget class="QPushButton" name="change_nand_dir">
+               <property name="text">
+                <string>Change</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
+           </item>
+          </layout>
+         </widget>
+        </item>
+        <item>
+         <widget class="QGroupBox" name="sdmc_group">
+          <property name="title">
+           <string/>
+          </property>
+          <layout class="QVBoxLayout" name="verticalLayout_4">
+           <item>
+            <layout class="QHBoxLayout" name="horizontalLayout_3">
+             <item>
+              <widget class="QLabel" name="label_2">
+               <property name="text">
+                <string>SDMC Directory</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QLineEdit" name="sdmc_dir_path">
+               <property name="enabled">
+                <bool>false</bool>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <widget class="QPushButton" name="open_sdmc_dir">
+               <property name="text">
+                <string>Open</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
           </item>
           <item>
-            <widget class="QGroupBox" name="sdmc_group">
-             <property name="title">
-              <string/>
-             </property>
-             <layout class="QVBoxLayout" name="verticalLayout_4">
-              <item>
-               <layout class="QHBoxLayout" name="horizontalLayout_3">
-                <item>
-                 <widget class="QLabel" name="label_2">
-                  <property name="text">
-                   <string>SDMC Directory</string>
-                  </property>
-                 </widget>
-                </item>
-                <item>
-                 <widget class="QLineEdit" name="sdmc_dir_path">
-                  <property name="enabled">
-                   <bool>false</bool>
-                  </property>
-                 </widget>
-                </item>
-                <item>
-                 <widget class="QPushButton" name="open_sdmc_dir">
-                  <property name="text">
-                   <string>Open</string>
-                  </property>
-                 </widget>
-                </item>
-               </layout>
-              </item>
-              <item>
-               <layout class="QHBoxLayout" name="horizontalLayout_4">
-                <item>
-                 <widget class="QLabel" name="label_3">
-                  <property name="text">
-                   <string>NOTE: This does not move the contents of the previous directory to the new one.</string>
-                  </property>
-                 </widget>
-                </item>
-                <item>
-                 <spacer name="horizontalSpacer_4">
-                  <property name="orientation">
-                   <enum>Qt::Horizontal</enum>
-                  </property>
-                  <property name="sizeHint" stdset="0">
-                   <size>
-                    <width>40</width>
-                    <height>20</height>
-                   </size>
-                  </property>
-                 </spacer>
-                </item>
-                <item>
-                 <widget class="QPushButton" name="change_sdmc_dir">
-                  <property name="text">
-                   <string>Change</string>
-                  </property>
-                 </widget>
-                </item>
-               </layout>
-              </item>
-             </layout>
-            </widget>
+            <layout class="QHBoxLayout" name="horizontalLayout_4">
+             <item>
+              <widget class="QLabel" name="label_3">
+               <property name="text">
+                <string>NOTE: This does not move the contents of the previous directory to the new one.</string>
+               </property>
+              </widget>
+             </item>
+             <item>
+              <spacer name="horizontalSpacer_4">
+               <property name="orientation">
+                <enum>Qt::Horizontal</enum>
+               </property>
+               <property name="sizeHint" stdset="0">
+                <size>
+                 <width>40</width>
+                 <height>20</height>
+                </size>
+               </property>
+              </spacer>
+             </item>
+             <item>
+              <widget class="QPushButton" name="change_sdmc_dir">
+               <property name="text">
+                <string>Change</string>
+               </property>
+              </widget>
+             </item>
+            </layout>
           </item>
          </layout>
         </widget>
--- a/src/citra_qt/game_list.cpp
+++ b/src/citra_qt/game_list.cpp
@ -3,7 +3,6 @@
 // Refer to the license.txt file included.

 #include <QApplication>
-#include <QDir>
 #include <QFileInfo>
 #include <QFileSystemWatcher>
 #include <QHBoxLayout>
@ -32,8 +31,6 @@
 #include "core/file_sys/archive_extsavedata.h"
 #include "core/file_sys/archive_source_sd_savedata.h"
 #include "core/hle/service/fs/archive.h"
-#include "core/settings.h"
-#include "qcursor.h"

 GameListSearchField::KeyReleaseEater::KeyReleaseEater(GameList* gamelist, QObject* parent)
    : QObject(parent), gamelist{gamelist} {}
@ -465,7 +462,6 @@ void GameList::PopupContextMenu(const QPoint& menu_location) {
    default:
        break;
    }
-
    context_menu.exec(tree_view->viewport()->mapToGlobal(menu_location));
 }

@ -479,27 +475,19 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
    QAction* open_texture_load_location =
        context_menu.addAction(tr("Open Custom Texture Location"));
    QAction* open_mods_location = context_menu.addAction(tr("Open Mods Location"));
-    QMenu* shader_menu = context_menu.addMenu(tr("Disk Shader Cache"));
    QAction* dump_romfs = context_menu.addAction(tr("Dump RomFS"));
    QAction* navigate_to_gamedb_entry = context_menu.addAction(tr("Navigate to GameDB entry"));

-    QAction* open_shader_cache_location = shader_menu->addAction(tr("Open Shader Cache Location"));
-    shader_menu->addSeparator();
-    QAction* delete_opengl_disk_shader_cache =
-        shader_menu->addAction(tr("Delete OpenGL Shader Cache"));
-    QAction* delete_vulkan_disk_shader_cache =
-        shader_menu->addAction(tr("Delete Vulkan Shader Cache"));
-
    const bool is_application =
        0x0004000000000000 <= program_id && program_id <= 0x00040000FFFFFFFF;

    std::string sdmc_dir = FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir);
-    open_save_location->setEnabled(
+    open_save_location->setVisible(
        is_application && FileUtil::Exists(FileSys::ArchiveSource_SDSaveData::GetSaveDataPathFor(
                              sdmc_dir, program_id)));

    if (extdata_id) {
-        open_extdata_location->setEnabled(
+        open_extdata_location->setVisible(
            is_application &&
            FileUtil::Exists(FileSys::GetExtDataPathFromId(sdmc_dir, extdata_id)));
    } else {
@ -507,9 +495,9 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
    }

    auto media_type = Service::AM::GetTitleMediaType(program_id);
-    open_application_location->setEnabled(path.toStdString() ==
+    open_application_location->setVisible(path.toStdString() ==
                                          Service::AM::GetTitleContentPath(media_type, program_id));
-    open_update_location->setEnabled(
+    open_update_location->setVisible(
        is_application && FileUtil::Exists(Service::AM::GetTitlePath(Service::FS::MediaType::SDMC,
                                                                     program_id + 0xe00000000) +
                                           "content/"));
@ -548,13 +536,6 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
            emit OpenFolderRequested(program_id, GameListOpenTarget::TEXTURE_LOAD);
        }
    });
-    connect(open_texture_load_location, &QAction::triggered, this, [this, program_id] {
-        if (FileUtil::CreateFullPath(fmt::format("{}textures/{:016X}/",
-                                                 FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
-                                                 program_id))) {
-            emit OpenFolderRequested(program_id, GameListOpenTarget::TEXTURE_LOAD);
-        }
-    });
    connect(open_mods_location, &QAction::triggered, this, [this, program_id] {
        if (FileUtil::CreateFullPath(fmt::format("{}mods/{:016X}/",
                                                 FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
@ -567,26 +548,6 @@ void GameList::AddGamePopup(QMenu& context_menu, const QString& path, u64 progra
    connect(navigate_to_gamedb_entry, &QAction::triggered, this, [this, program_id]() {
        emit NavigateToGamedbEntryRequested(program_id, compatibility_list);
    });
-    connect(open_shader_cache_location, &QAction::triggered, this, [this, program_id] {
-        if (FileUtil::CreateFullPath(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir))) {
-            emit OpenFolderRequested(program_id, GameListOpenTarget::SHADER_CACHE);
-        }
-    });
-    connect(delete_opengl_disk_shader_cache, &QAction::triggered, this, [program_id] {
-        const std::string_view cache_type =
-            Settings::values.separable_shader ? "separable" : "conventional";
-        const std::string path = fmt::format("{}opengl/precompiled/{}/{:016X}.bin",
-                                             FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir),
-                                             cache_type, program_id);
-        QFile file{QString::fromStdString(path)};
-        file.remove();
-    });
-    connect(delete_vulkan_disk_shader_cache, &QAction::triggered, this, [] {
-        const std::string path =
-            fmt::format("{}vulkan", FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir));
-        QDir dir{QString::fromStdString(path)};
-        dir.removeRecursively();
-    });
 };

 void GameList::AddCustomDirPopup(QMenu& context_menu, QModelIndex selected) {
--- a/src/citra_qt/game_list.h
+++ b/src/citra_qt/game_list.h
@ -37,7 +37,6 @@ enum class GameListOpenTarget {
    TEXTURE_DUMP = 4,
    TEXTURE_LOAD = 5,
    MODS = 6,
-    SHADER_CACHE = 7
 };

 class GameList : public QWidget {
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@ -1340,35 +1340,26 @@ void GMainWindow::OnGameListOpenFolder(u64 data_id, GameListOpenTarget target) {
        path = Service::AM::GetTitlePath(media_type, data_id) + "content/";
        break;
    }
-    case GameListOpenTarget::UPDATE_DATA: {
+    case GameListOpenTarget::UPDATE_DATA:
        open_target = "Update Data";
        path = Service::AM::GetTitlePath(Service::FS::MediaType::SDMC, data_id + 0xe00000000) +
               "content/";
        break;
-    }
-    case GameListOpenTarget::TEXTURE_DUMP: {
+    case GameListOpenTarget::TEXTURE_DUMP:
        open_target = "Dumped Textures";
        path = fmt::format("{}textures/{:016X}/",
                           FileUtil::GetUserPath(FileUtil::UserPath::DumpDir), data_id);
        break;
-    }
-    case GameListOpenTarget::TEXTURE_LOAD: {
+    case GameListOpenTarget::TEXTURE_LOAD:
        open_target = "Custom Textures";
        path = fmt::format("{}textures/{:016X}/",
                           FileUtil::GetUserPath(FileUtil::UserPath::LoadDir), data_id);
        break;
-    }
-    case GameListOpenTarget::MODS: {
+    case GameListOpenTarget::MODS:
        open_target = "Mods";
        path = fmt::format("{}mods/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::LoadDir),
                           data_id);
        break;
-    }
-    case GameListOpenTarget::SHADER_CACHE: {
-        open_target = "Shader Cache";
-        path = FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir);
-        break;
-    }
    default:
        LOG_ERROR(Frontend, "Unexpected target {}", static_cast<int>(target));
        return;
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@ -58,7 +58,6 @@ add_library(common STATIC
    announce_multiplayer_room.h
    archives.h
    assert.h
-    atomic_ops.h
    detached_tasks.cpp
    detached_tasks.h
    bit_field.h
@ -129,7 +128,7 @@ if(ARCHITECTURE_x86_64)
            x64/xbyak_abi.h
            x64/xbyak_util.h
    )
-elseif(ARCHITECTURE_arm64)
+elseif(ARCHITECTURE_ARM64)
    target_sources(common
        PRIVATE
            aarch64/cpu_detect.cpp
--- a/src/common/atomic_ops.h
+++ b/src/common/atomic_ops.h
@ -1,166 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include "common/common_types.h"
-
-#if _MSC_VER
-#include <intrin.h>
-#else
-#include <cstring>
-#endif
-
-namespace Common {
-
-#if _MSC_VER
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
-    const u8 result =
-        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
-    return result == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
-    const u16 result =
-        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
-    return result == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
-    const u32 result =
-        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
-    return result == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
-    const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
-                                                     value, expected);
-    return result == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
-    return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
-                                          value[0],
-                                          reinterpret_cast<__int64*>(expected.data())) != 0;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
-                                               u8& actual) {
-    actual =
-        _InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
-                                               u16& actual) {
-    actual =
-        _InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
-                                               u32& actual) {
-    actual =
-        _InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
-                                               u64& actual) {
-    actual = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer), value,
-                                           expected);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
-                                               u128& actual) {
-    const bool result =
-        _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
-                                       value[0], reinterpret_cast<__int64*>(expected.data())) != 0;
-    actual = expected;
-    return result;
-}
-
-[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
-    u128 result{};
-    _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), result[1],
-                                   result[0], reinterpret_cast<__int64*>(result.data()));
-    return result;
-}
-
-#else
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
-    return __sync_bool_compare_and_swap(pointer, expected, value);
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
-    unsigned __int128 value_a;
-    unsigned __int128 expected_a;
-    std::memcpy(&value_a, value.data(), sizeof(u128));
-    std::memcpy(&expected_a, expected.data(), sizeof(u128));
-    return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected,
-                                               u8& actual) {
-    actual = __sync_val_compare_and_swap(pointer, expected, value);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected,
-                                               u16& actual) {
-    actual = __sync_val_compare_and_swap(pointer, expected, value);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected,
-                                               u32& actual) {
-    actual = __sync_val_compare_and_swap(pointer, expected, value);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected,
-                                               u64& actual) {
-    actual = __sync_val_compare_and_swap(pointer, expected, value);
-    return actual == expected;
-}
-
-[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected,
-                                               u128& actual) {
-    unsigned __int128 value_a;
-    unsigned __int128 expected_a;
-    unsigned __int128 actual_a;
-    std::memcpy(&value_a, value.data(), sizeof(u128));
-    std::memcpy(&expected_a, expected.data(), sizeof(u128));
-    actual_a = __sync_val_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
-    std::memcpy(actual.data(), &actual_a, sizeof(u128));
-    return actual_a == expected_a;
-}
-
-[[nodiscard]] inline u128 AtomicLoad128(volatile u64* pointer) {
-    unsigned __int128 zeros_a = 0;
-    unsigned __int128 result_a =
-        __sync_val_compare_and_swap((unsigned __int128*)pointer, zeros_a, zeros_a);
-
-    u128 result;
-    std::memcpy(result.data(), &result_a, sizeof(u128));
-    return result;
-}
-
-#endif
-
-} // namespace Common
--- a/src/common/common_types.h
+++ b/src/common/common_types.h
@ -24,7 +24,6 @@

 #pragma once

-#include <array>
 #include <cstdint>

 #ifdef _MSC_VER
@ -51,9 +50,6 @@ typedef double f64; ///< 64-bit floating point
 typedef u32 VAddr; ///< Represents a pointer in the userspace virtual address space.
 typedef u32 PAddr; ///< Represents a pointer in the ARM11 physical address space.

-using u128 = std::array<std::uint64_t, 2>;
-static_assert(sizeof(u128) == 16, "u128 must be 128 bits wide");
-
 // An inheritable class to disallow the copy constructor and operator= functions
 class NonCopyable {
 protected:
--- a/src/common/file_util.cpp
+++ b/src/common/file_util.cpp
@ -776,9 +776,6 @@ const std::string& GetDefaultUserPath(UserPath path) {
 }

 const void UpdateUserPath(UserPath path, const std::string& filename) {
-    if (filename.empty()) {
-        return;
-    }
    if (!FileUtil::IsDirectory(filename)) {
        LOG_ERROR(Common_Filesystem, "Path is not a directory. UserPath: {}  filename: {}", path,
                  filename);
--- a/src/common/math_util.h
+++ b/src/common/math_util.h
@ -10,8 +10,6 @@

 namespace Common {

-constexpr float PI = 3.14159265f;
-
 template <class T>
 struct Rectangle {
    T left{};
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@ -6,7 +6,7 @@

 #include <bitset>
 #include <initializer_list>
-#include <xbyak/xbyak.h>
+#include <xbyak.h>
 #include "common/assert.h"

 namespace Common::X64 {
--- a/src/common/x64/xbyak_util.h
+++ b/src/common/x64/xbyak_util.h
@ -5,7 +5,7 @@
 #pragma once

 #include <type_traits>
-#include <xbyak/xbyak.h>
+#include <xbyak.h>
 #include "common/x64/xbyak_abi.h"

 namespace Common::X64 {
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@ -12,8 +12,6 @@ add_library(core STATIC
    arm/dyncom/arm_dyncom_thumb.h
    arm/dyncom/arm_dyncom_trans.cpp
    arm/dyncom/arm_dyncom_trans.h
-    arm/exclusive_monitor.cpp
-    arm/exclusive_monitor.h
    arm/skyeye_common/arm_regformat.h
    arm/skyeye_common/armstate.cpp
    arm/skyeye_common/armstate.h
@ -482,14 +480,12 @@ if (ENABLE_WEB_SERVICE)
    endif()
 endif()

-if (ARCHITECTURE_x86_64 OR ARCHITECTURE_arm64)
+if (ARCHITECTURE_x86_64 OR ARCHITECTURE_ARM64)
    target_sources(core PRIVATE
        arm/dynarmic/arm_dynarmic.cpp
        arm/dynarmic/arm_dynarmic.h
        arm/dynarmic/arm_dynarmic_cp15.cpp
        arm/dynarmic/arm_dynarmic_cp15.h
-        arm/dynarmic/arm_exclusive_monitor.cpp
-        arm/dynarmic/arm_exclusive_monitor.h
    )
    target_link_libraries(core PRIVATE dynarmic)
 endif()
--- a/src/core/arm/arm_interface.h
+++ b/src/core/arm/arm_interface.h
@ -122,9 +122,6 @@ public:
     */
    virtual void InvalidateCacheRange(u32 start_address, std::size_t length) = 0;

-    /// Clears the exclusive monitor's state.
-    virtual void ClearExclusiveState() = 0;
-
    /// Notify CPU emulation that page tables have changed
    virtual void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) = 0;

--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@ -3,14 +3,12 @@
 // Refer to the license.txt file included.

 #include <cstring>
-#include <dynarmic/interface/A32/a32.h>
-#include <dynarmic/interface/A32/context.h>
-#include <dynarmic/interface/optimization_flags.h>
+#include <dynarmic/A32/a32.h>
+#include <dynarmic/A32/context.h>
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
-#include "core/arm/dynarmic/arm_exclusive_monitor.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
@ -102,23 +100,10 @@ public:
        memory.Write64(vaddr, value);
    }

-    bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
-        return memory.WriteExclusive8(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
-        return memory.WriteExclusive16(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
-        return memory.WriteExclusive32(vaddr, value, expected);
-    }
-    bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
-        return memory.WriteExclusive64(vaddr, value, expected);
-    }
-
    void InterpreterFallback(VAddr pc, std::size_t num_instructions) override {
        // Should never happen.
        UNREACHABLE_MSG("InterpeterFallback reached with pc = 0x{:08x}, code = 0x{:08x}, num = {}",
-                        pc, MemoryReadCode(pc).value(), num_instructions);
+                        pc, MemoryReadCode(pc), num_instructions);
    }

    void CallSVC(std::uint32_t swi) override {
@ -129,8 +114,6 @@ public:
        switch (exception) {
        case Dynarmic::A32::Exception::UndefinedInstruction:
        case Dynarmic::A32::Exception::UnpredictableInstruction:
-        case Dynarmic::A32::Exception::DecodeError:
-        case Dynarmic::A32::Exception::NoExecuteFault:
            break;
        case Dynarmic::A32::Exception::Breakpoint:
            if (GDBStub::IsConnected()) {
@ -147,11 +130,10 @@ public:
        case Dynarmic::A32::Exception::Yield:
        case Dynarmic::A32::Exception::PreloadData:
        case Dynarmic::A32::Exception::PreloadDataWithIntentToWrite:
-        case Dynarmic::A32::Exception::PreloadInstruction:
            return;
        }
        ASSERT_MSG(false, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", exception,
-                   pc, MemoryReadCode(pc).value());
+                   pc, MemoryReadCode(pc));
    }

    void AddTicks(std::uint64_t ticks) override {
@ -167,12 +149,10 @@ public:
    Memory::MemorySystem& memory;
 };

-ARM_Dynarmic::ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
-                           std::shared_ptr<Core::Timing::Timer> timer_,
-                           Core::ExclusiveMonitor& exclusive_monitor_)
-    : ARM_Interface(core_id_, timer_), system(*system_), memory(memory_),
-      cb(std::make_unique<DynarmicUserCallbacks>(*this)),
-      exclusive_monitor{dynamic_cast<Core::DynarmicExclusiveMonitor&>(exclusive_monitor_)} {
+ARM_Dynarmic::ARM_Dynarmic(Core::System* system, Memory::MemorySystem& memory, u32 id,
+                           std::shared_ptr<Core::Timing::Timer> timer)
+    : ARM_Interface(id, timer), system(*system), memory(memory),
+      cb(std::make_unique<DynarmicUserCallbacks>(*this)) {
    SetPageTable(memory.GetCurrentPageTable());
 }

@ -228,7 +208,8 @@ u32 ARM_Dynarmic::GetVFPSystemReg(VFPSystemRegister reg) const {
    default:
        UNREACHABLE_MSG("Unknown VFP system register: {}", reg);
    }
-    return UINT_MAX;
+
+    return 0;
 }

 void ARM_Dynarmic::SetVFPSystemReg(VFPSystemRegister reg, u32 value) {
@ -314,10 +295,6 @@ void ARM_Dynarmic::InvalidateCacheRange(u32 start_address, std::size_t length) {
    jit->InvalidateCacheRange(start_address, length);
 }

-void ARM_Dynarmic::ClearExclusiveState() {
-    jit->ClearExclusiveState();
-}
-
 std::shared_ptr<Memory::PageTable> ARM_Dynarmic::GetPageTable() const {
    return current_page_table;
 }
@ -355,11 +332,6 @@ std::unique_ptr<Dynarmic::A32::Jit> ARM_Dynarmic::MakeJit() {
    config.page_table = &current_page_table->GetPointerArray();
    config.coprocessors[15] = std::make_shared<DynarmicCP15>(cp15_state);
    config.define_unpredictable_behaviour = true;
-
-    // Multi-process state
-    config.processor_id = GetID();
-    config.global_monitor = &exclusive_monitor.monitor;
-
    return std::make_unique<Dynarmic::A32::Jit>(config);
 }

--- a/src/core/arm/dynarmic/arm_dynarmic.h
+++ b/src/core/arm/dynarmic/arm_dynarmic.h
@ -6,7 +6,7 @@

 #include <map>
 #include <memory>
-#include <dynarmic/interface/A32/a32.h>
+#include <dynarmic/A32/a32.h>
 #include "common/common_types.h"
 #include "core/arm/arm_interface.h"
 #include "core/arm/dynarmic/arm_dynarmic_cp15.h"
@ -17,18 +17,15 @@ class MemorySystem;
 } // namespace Memory

 namespace Core {
-class DynarmicExclusiveMonitor;
-class ExclusiveMonitor;
 class System;
-} // namespace Core
+}

 class DynarmicUserCallbacks;

 class ARM_Dynarmic final : public ARM_Interface {
 public:
-    explicit ARM_Dynarmic(Core::System* system_, Memory::MemorySystem& memory_, u32 core_id_,
-                          std::shared_ptr<Core::Timing::Timer> timer,
-                          Core::ExclusiveMonitor& exclusive_monitor_);
+    ARM_Dynarmic(Core::System* system, Memory::MemorySystem& memory, u32 id,
+                 std::shared_ptr<Core::Timing::Timer> timer);
    ~ARM_Dynarmic() override;

    void Run() override;
@ -55,7 +52,6 @@ public:

    void ClearInstructionCache() override;
    void InvalidateCacheRange(u32 start_address, std::size_t length) override;
-    void ClearExclusiveState() override;
    void SetPageTable(const std::shared_ptr<Memory::PageTable>& page_table) override;
    void PurgeState() override;

@ -73,7 +69,6 @@ private:

    u32 fpexc = 0;
    CP15State cp15_state;
-    Core::DynarmicExclusiveMonitor& exclusive_monitor;

    Dynarmic::A32::Jit* jit = nullptr;
    std::shared_ptr<Memory::PageTable> current_page_table = nullptr;
--- a/src/core/arm/dynarmic/arm_dynarmic_cp15.h
+++ b/src/core/arm/dynarmic/arm_dynarmic_cp15.h
@ -5,7 +5,7 @@
 #pragma once

 #include <memory>
-#include <dynarmic/interface/A32/coprocessor.h>
+#include <dynarmic/A32/coprocessor.h>
 #include "common/common_types.h"

 struct CP15State {
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.cpp
@ -1,59 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include "core/arm/dynarmic/arm_exclusive_monitor.h"
-#include "core/memory.h"
-
-namespace Core {
-
-DynarmicExclusiveMonitor::DynarmicExclusiveMonitor(Memory::MemorySystem& memory_,
-                                                   std::size_t core_count_)
-    : monitor{core_count_}, memory{memory_} {}
-
-DynarmicExclusiveMonitor::~DynarmicExclusiveMonitor() = default;
-
-u8 DynarmicExclusiveMonitor::ExclusiveRead8(std::size_t core_index, VAddr addr) {
-    return monitor.ReadAndMark<u8>(core_index, addr, [&]() -> u8 { return memory.Read8(addr); });
-}
-
-u16 DynarmicExclusiveMonitor::ExclusiveRead16(std::size_t core_index, VAddr addr) {
-    return monitor.ReadAndMark<u16>(core_index, addr, [&]() -> u16 { return memory.Read16(addr); });
-}
-
-u32 DynarmicExclusiveMonitor::ExclusiveRead32(std::size_t core_index, VAddr addr) {
-    return monitor.ReadAndMark<u32>(core_index, addr, [&]() -> u32 { return memory.Read32(addr); });
-}
-
-u64 DynarmicExclusiveMonitor::ExclusiveRead64(std::size_t core_index, VAddr addr) {
-    return monitor.ReadAndMark<u64>(core_index, addr, [&]() -> u64 { return memory.Read64(addr); });
-}
-
-void DynarmicExclusiveMonitor::ClearExclusive(std::size_t core_index) {
-    monitor.ClearProcessor(core_index);
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) {
-    return monitor.DoExclusiveOperation<u8>(core_index, vaddr, [&](u8 expected) -> bool {
-        return memory.WriteExclusive8(vaddr, value, expected);
-    });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) {
-    return monitor.DoExclusiveOperation<u16>(core_index, vaddr, [&](u16 expected) -> bool {
-        return memory.WriteExclusive16(vaddr, value, expected);
-    });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) {
-    return monitor.DoExclusiveOperation<u32>(core_index, vaddr, [&](u32 expected) -> bool {
-        return memory.WriteExclusive32(vaddr, value, expected);
-    });
-}
-
-bool DynarmicExclusiveMonitor::ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) {
-    return monitor.DoExclusiveOperation<u64>(core_index, vaddr, [&](u64 expected) -> bool {
-        return memory.WriteExclusive64(vaddr, value, expected);
-    });
-}
-
-} // namespace Core
--- a/src/core/arm/dynarmic/arm_exclusive_monitor.h
+++ b/src/core/arm/dynarmic/arm_exclusive_monitor.h
@ -1,40 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <dynarmic/interface/exclusive_monitor.h>
-
-#include "common/common_types.h"
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#include "core/arm/exclusive_monitor.h"
-
-namespace Memory {
-class MemorySystem;
-}
-
-namespace Core {
-
-class DynarmicExclusiveMonitor final : public ExclusiveMonitor {
-public:
-    explicit DynarmicExclusiveMonitor(Memory::MemorySystem& memory_, std::size_t core_count_);
-    ~DynarmicExclusiveMonitor() override;
-
-    u8 ExclusiveRead8(std::size_t core_index, VAddr addr) override;
-    u16 ExclusiveRead16(std::size_t core_index, VAddr addr) override;
-    u32 ExclusiveRead32(std::size_t core_index, VAddr addr) override;
-    u64 ExclusiveRead64(std::size_t core_index, VAddr addr) override;
-    void ClearExclusive(std::size_t core_index) override;
-
-    bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) override;
-    bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) override;
-    bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) override;
-    bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) override;
-
-private:
-    friend class ::ARM_Dynarmic;
-    Dynarmic::ExclusiveMonitor monitor;
-    Memory::MemorySystem& memory;
-};
-
-} // namespace Core
--- a/src/core/arm/dyncom/arm_dyncom.h
+++ b/src/core/arm/dyncom/arm_dyncom.h
@ -30,7 +30,6 @@ public:

    void ClearInstructionCache() override;
    void InvalidateCacheRange(u32 start_address, std::size_t length) override;
-    void ClearExclusiveState() override{};

    void SetPC(u32 pc) override;
    u32 GetPC() const override;
--- a/src/core/arm/exclusive_monitor.cpp
+++ b/src/core/arm/exclusive_monitor.cpp
@ -1,26 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
-#include "core/arm/dynarmic/arm_exclusive_monitor.h"
-#endif
-#include "core/arm/exclusive_monitor.h"
-#include "core/memory.h"
-#include "core/settings.h"
-
-namespace Core {
-
-ExclusiveMonitor::~ExclusiveMonitor() = default;
-
-std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::MemorySystem& memory,
-                                                             std::size_t num_cores) {
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
-    if (Settings::values.use_cpu_jit) {
-        return std::make_unique<Core::DynarmicExclusiveMonitor>(memory, num_cores);
-    }
-#endif
-    // TODO(merry): Passthrough exclusive monitor
-    return nullptr;
-}
-
-} // namespace Core
--- a/src/core/arm/exclusive_monitor.h
+++ b/src/core/arm/exclusive_monitor.h
@ -1,35 +0,0 @@
-// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#pragma once
-
-#include <memory>
-
-#include "common/common_types.h"
-
-namespace Memory {
-class MemorySystem;
-}
-
-namespace Core {
-
-class ExclusiveMonitor {
-public:
-    virtual ~ExclusiveMonitor();
-
-    virtual u8 ExclusiveRead8(std::size_t core_index, VAddr addr) = 0;
-    virtual u16 ExclusiveRead16(std::size_t core_index, VAddr addr) = 0;
-    virtual u32 ExclusiveRead32(std::size_t core_index, VAddr addr) = 0;
-    virtual u64 ExclusiveRead64(std::size_t core_index, VAddr addr) = 0;
-    virtual void ClearExclusive(std::size_t core_index) = 0;
-
-    virtual bool ExclusiveWrite8(std::size_t core_index, VAddr vaddr, u8 value) = 0;
-    virtual bool ExclusiveWrite16(std::size_t core_index, VAddr vaddr, u16 value) = 0;
-    virtual bool ExclusiveWrite32(std::size_t core_index, VAddr vaddr, u32 value) = 0;
-    virtual bool ExclusiveWrite64(std::size_t core_index, VAddr vaddr, u64 value) = 0;
-};
-
-std::unique_ptr<Core::ExclusiveMonitor> MakeExclusiveMonitor(Memory::MemorySystem& memory,
-                                                             std::size_t num_cores);
-
-} // namespace Core
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -13,8 +13,7 @@
 #include "common/logging/log.h"
 #include "common/texture.h"
 #include "core/arm/arm_interface.h"
-#include "core/arm/exclusive_monitor.h"
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
+#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_ARM64)
 #include "core/arm/dynarmic/arm_dynarmic.h"
 #endif
 #include "core/arm/dyncom/arm_dyncom.h"
@ -365,12 +364,11 @@ System::ResultStatus System::Init(Frontend::EmuWindow& emu_window, u32 system_mo
    kernel = std::make_unique<Kernel::KernelSystem>(
        *memory, *timing, [this] { PrepareReschedule(); }, system_mode, num_cores, n3ds_mode);

-    exclusive_monitor = MakeExclusiveMonitor(*memory, num_cores);
    if (Settings::values.use_cpu_jit) {
-#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_arm64)
+#if defined(ARCHITECTURE_x86_64) || defined(ARCHITECTURE_ARM64)
        for (u32 i = 0; i < num_cores; ++i) {
-            cpu_cores.push_back(std::make_shared<ARM_Dynarmic>(
-                this, *memory, i, timing->GetTimer(i), *exclusive_monitor));
+            cpu_cores.push_back(
+                std::make_shared<ARM_Dynarmic>(this, *memory, i, timing->GetTimer(i)));
        }
 #else
        for (u32 i = 0; i < num_cores; ++i) {
@ -543,7 +541,6 @@ void System::Shutdown(bool is_deserializing) {
    dsp_core.reset();
    kernel.reset();
    cpu_cores.clear();
-    exclusive_monitor.reset();
    timing.reset();

    if (video_dumper && video_dumper->IsDumping()) {
--- a/src/core/core.h
+++ b/src/core/core.h
@ -61,7 +61,6 @@ class RendererBase;

 namespace Core {

-class ExclusiveMonitor;
 class Timing;

 class System {
@ -362,8 +361,6 @@ private:
    std::unique_ptr<Kernel::KernelSystem> kernel;
    std::unique_ptr<Timing> timing;

-    std::unique_ptr<Core::ExclusiveMonitor> exclusive_monitor;
-
 private:
    static System s_instance;

--- a/src/core/core_timing.cpp
+++ b/src/core/core_timing.cpp
@ -174,22 +174,6 @@ void Timing::Timer::MoveEvents() {
    }
 }

-u32 Timing::Timer::StartAdjust() {
-    ASSERT((adjust_value_curr_handle & 1) == 0); // Should always be even
-    adjust_value_last = std::chrono::steady_clock::now();
-    return ++adjust_value_curr_handle;
-}
-
-void Timing::Timer::EndAdjust(u32 start_adjust_handle) {
-    std::chrono::time_point<std::chrono::steady_clock> new_timer = std::chrono::steady_clock::now();
-    ASSERT(new_timer >= adjust_value_last && start_adjust_handle == adjust_value_curr_handle);
-    AddTicks(nsToCycles(static_cast<float>(
-        std::chrono::duration_cast<std::chrono::nanoseconds>(new_timer - adjust_value_last)
-            .count() /
-        cpu_clock_scale)));
-    ++adjust_value_curr_handle;
-}
-
 s64 Timing::Timer::GetMaxSliceLength() const {
    const auto& next_event = event_queue.begin();
    if (next_event != event_queue.end()) {
--- a/src/core/core_timing.h
+++ b/src/core/core_timing.h
@ -203,11 +203,6 @@ public:

        void MoveEvents();

-        // Use these two functions to adjust the guest system tick on host blocking operations, so
-        // that the guest can tell how much time passed during the host call.
-        u32 StartAdjust();
-        void EndAdjust(u32 start_adjust_handle);
-
    private:
        friend class Timing;
        // The queue is a min-heap using std::make_heap/push_heap/pop_heap.
@ -232,9 +227,6 @@ public:
        s64 downcount = MAX_SLICE_LENGTH;
        s64 executed_ticks = 0;
        u64 idled_cycles = 0;
-
-        std::chrono::time_point<std::chrono::steady_clock> adjust_value_last;
-        u32 adjust_value_curr_handle = 0;
        // Stores a scaling for the internal clockspeed. Changing this number results in
        // under/overclocking the guest cpu
        double cpu_clock_scale = 1.0;
--- a/src/core/frontend/emu_window.h
+++ b/src/core/frontend/emu_window.h
@ -16,9 +16,7 @@ namespace Frontend {
 /// WindowInformation
 enum class WindowSystemType : u8 {
    Headless,
-    Android,
    Windows,
-    MacOS,
    X11,
    Wayland,
 };
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@ -849,14 +849,14 @@ static void ReadMemory() {
        SendReply("E01");
    }

-    if (!Memory::IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
-                                       addr)) {
+    auto& memory = Core::System::GetInstance().Memory();
+    if (!memory.IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
+                                      addr)) {
        return SendReply("E00");
    }

    std::vector<u8> data(len);
-    Core::System::GetInstance().Memory().ReadBlock(
-        *Core::System::GetInstance().Kernel().GetCurrentProcess(), addr, data.data(), len);
+    memory.ReadBlock(addr, data.data(), len);

    MemToGdbHex(reply, data.data(), len);
    reply[len * 2] = '\0';
@ -873,16 +873,16 @@ static void WriteMemory() {
    auto len_pos = std::find(start_offset, command_buffer + command_length, ':');
    u32 len = HexToInt(start_offset, static_cast<u32>(len_pos - start_offset));

-    if (!Memory::IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
-                                       addr)) {
+    auto& memory = Core::System::GetInstance().Memory();
+    if (!memory.IsValidVirtualAddress(*Core::System::GetInstance().Kernel().GetCurrentProcess(),
+                                      addr)) {
        return SendReply("E00");
    }

    std::vector<u8> data(len);

    GdbHexToMem(data.data(), len_pos + 1, len);
-    Core::System::GetInstance().Memory().WriteBlock(
-        *Core::System::GetInstance().Kernel().GetCurrentProcess(), addr, data.data(), len);
+    memory.WriteBlock(addr, data.data(), len);
    Core::GetRunningCore().ClearInstructionCache();
    SendReply("OK");
 }
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@ -10,7 +10,6 @@
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/scm_rev.h"
-#include "common/scope_exit.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/core_timing.h"
@ -39,7 +38,6 @@
 #include "core/hle/kernel/wait_object.h"
 #include "core/hle/lock.h"
 #include "core/hle/result.h"
-#include "core/hle/service/service.h"

 namespace Kernel {

@ -374,7 +372,7 @@ ResultCode SVC::UnmapMemoryBlock(Handle handle, u32 addr) {

 /// Connect to an OS service given the port name, returns the handle to the port to out
 ResultCode SVC::ConnectToPort(Handle* out_handle, VAddr port_name_address) {
-    if (!Memory::IsValidVirtualAddress(*kernel.GetCurrentProcess(), port_name_address))
+    if (!memory.IsValidVirtualAddress(*kernel.GetCurrentProcess(), port_name_address))
        return ERR_NOT_FOUND;

    static constexpr std::size_t PortNameMaxLength = 11;
@ -541,7 +539,7 @@ ResultCode SVC::WaitSynchronizationN(s32* out, VAddr handles_address, s32 handle
                                     bool wait_all, s64 nano_seconds) {
    Thread* thread = kernel.GetCurrentThreadManager().GetCurrentThread();

-    if (!Memory::IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
+    if (!memory.IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
        return ERR_INVALID_POINTER;

    // NOTE: on real hardware, there is no nullptr check for 'out' (tested with firmware 4.4). If
@ -687,7 +685,7 @@ static ResultCode ReceiveIPCRequest(Kernel::KernelSystem& kernel, Memory::Memory
 /// In a single operation, sends a IPC reply and waits for a new request.
 ResultCode SVC::ReplyAndReceive(s32* index, VAddr handles_address, s32 handle_count,
                                Handle reply_target) {
-    if (!Memory::IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
+    if (!memory.IsValidVirtualAddress(*kernel.GetCurrentProcess(), handles_address))
        return ERR_INVALID_POINTER;

    // Check if 'handle_count' is invalid
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@ -337,8 +337,7 @@ ResultVal<std::shared_ptr<Thread>> KernelSystem::CreateThread(
    }

    // TODO(yuriks): Other checks, returning 0xD9001BEA
-
-    if (!Memory::IsValidVirtualAddress(*owner_process, entry_point)) {
+    if (!memory.IsValidVirtualAddress(*owner_process, entry_point)) {
        LOG_ERROR(Kernel_SVC, "(name={}): invalid entry {:08x}", name, entry_point);
        // TODO: Verify error
        return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel,
--- a/src/core/hle/service/nwm/nwm_uds.cpp
+++ b/src/core/hle/service/nwm/nwm_uds.cpp
@ -492,10 +492,7 @@ void NWM_UDS::HandleDeauthenticationFrame(const Network::WifiPacket& packet) {
    auto node_it = std::find_if(node_info.begin(), node_info.end(), [&node](const NodeInfo& info) {
        return info.network_node_id == node.node_id;
    });
-    if (node_it == node_info.end()) {
-        LOG_ERROR(Service_NWM, "node_it is last node of node_info");
-        return;
-    }
+    ASSERT(node_it != node_info.end());

    connection_status.node_bitmask &= ~(1 << (node.node_id - 1));
    connection_status.changed_nodes |= 1 << (node.node_id - 1);
@ -1100,6 +1097,9 @@ void NWM_UDS::SendTo(Kernel::HLERequestContext& ctx) {
    u32 data_size = rp.Pop<u32>();
    u8 flags = rp.Pop<u8>();

+    // There should never be a dest_node_id of 0
+    ASSERT(dest_node_id != 0);
+
    std::vector<u8> input_buffer = rp.PopStaticBuffer();
    ASSERT(input_buffer.size() >= data_size);
    input_buffer.resize(data_size);
@ -1114,14 +1114,6 @@ void NWM_UDS::SendTo(Kernel::HLERequestContext& ctx) {
        return;
    }

-    // There should never be a dest_node_id of 0
-    if (dest_node_id == 0) {
-        rb.Push(ResultCode(ErrorDescription::NotFound, ErrorModule::UDS,
-                           ErrorSummary::WrongArgument, ErrorLevel::Status));
-        LOG_ERROR(Service_NWM, "dest_node_id is 0");
-        return;
-    }
-
    if (dest_node_id == connection_status.network_node_id) {
        LOG_ERROR(Service_NWM, "tried to send packet to itself");
        rb.Push(ResultCode(ErrorDescription::NotFound, ErrorModule::UDS,
--- a/src/core/hle/service/soc_u.cpp
+++ b/src/core/hle/service/soc_u.cpp
@ -212,25 +212,19 @@ struct CTRPollFD {

        /// Translates the resulting events of a Poll operation from 3ds specific to platform
        /// specific
-        static u32 TranslateToPlatform(Events input_event, bool isOutput) {
-#if _WIN32
-            constexpr bool isWin = true;
-#else
-            constexpr bool isWin = false;
-#endif
-
+        static u32 TranslateToPlatform(Events input_event) {
            u32 ret = 0;
            if (input_event.pollin)
                ret |= POLLIN;
-            if (input_event.pollpri && !isWin)
+            if (input_event.pollpri)
                ret |= POLLPRI;
-            if (input_event.pollhup && (!isWin || isOutput))
+            if (input_event.pollhup)
                ret |= POLLHUP;
-            if (input_event.pollerr && (!isWin || isOutput))
+            if (input_event.pollerr)
                ret |= POLLERR;
            if (input_event.pollout)
                ret |= POLLOUT;
-            if (input_event.pollnval && (isWin && isOutput))
+            if (input_event.pollnval)
                ret |= POLLNVAL;
            return ret;
        }
@ -239,26 +233,20 @@ struct CTRPollFD {
    Events revents; ///< Events received (output)

    /// Converts a platform-specific pollfd to a 3ds specific structure
-    static CTRPollFD FromPlatform(SOC::SOC_U& socu, pollfd const& fd) {
+    static CTRPollFD FromPlatform(pollfd const& fd) {
        CTRPollFD result;
        result.events.hex = Events::TranslateTo3DS(fd.events).hex;
        result.revents.hex = Events::TranslateTo3DS(fd.revents).hex;
-        for (auto iter = socu.open_sockets.begin(); iter != socu.open_sockets.end(); ++iter) {
-            if (iter->second.socket_fd == fd.fd) {
-                result.fd = iter->first;
-                break;
-            }
-        }
+        result.fd = static_cast<u32>(fd.fd);
        return result;
    }

    /// Converts a 3ds specific pollfd to a platform-specific structure
-    static pollfd ToPlatform(SOC::SOC_U& socu, CTRPollFD const& fd) {
+    static pollfd ToPlatform(CTRPollFD const& fd) {
        pollfd result;
-        result.events = Events::TranslateToPlatform(fd.events, false);
-        result.revents = Events::TranslateToPlatform(fd.revents, true);
-        auto iter = socu.open_sockets.find(fd.fd);
-        result.fd = (iter != socu.open_sockets.end()) ? iter->second.socket_fd : 0;
+        result.events = Events::TranslateToPlatform(fd.events);
+        result.revents = Events::TranslateToPlatform(fd.revents);
+        result.fd = fd.fd;
        return result;
    }
 };
@ -354,14 +342,6 @@ struct CTRAddrInfo {

 static_assert(sizeof(CTRAddrInfo) == 0x130, "Size of CTRAddrInfo is not correct");

-void SOC_U::PreTimerAdjust() {
-    timer_adjust_handle = Core::System::GetInstance().GetRunningCore().GetTimer().StartAdjust();
-}
-
-void SOC_U::PostTimerAdjust() {
-    Core::System::GetInstance().GetRunningCore().GetTimer().EndAdjust(timer_adjust_handle);
-}
-
 void SOC_U::CleanupSockets() {
    for (auto sock : open_sockets)
        closesocket(sock.second.socket_fd);
@ -396,28 +376,21 @@ void SOC_U::Socket(Kernel::HLERequestContext& ctx) {
        return;
    }

-    u64 ret = static_cast<u64>(::socket(domain, type, protocol));
-    u32 socketHandle = GetNextSocketID();
+    u32 ret = static_cast<u32>(::socket(domain, type, protocol));

-    if ((s64)ret != SOCKET_ERROR_VALUE)
-        open_sockets[socketHandle] = {static_cast<decltype(SocketHolder::socket_fd)>(ret), true};
+    if ((s32)ret != SOCKET_ERROR_VALUE)
+        open_sockets[ret] = {ret, true};

-    if ((s64)ret == SOCKET_ERROR_VALUE)
+    if ((s32)ret == SOCKET_ERROR_VALUE)
        ret = TranslateError(GET_ERRNO);

    rb.Push(RESULT_SUCCESS);
-    rb.Push(socketHandle);
+    rb.Push(ret);
 }

 void SOC_U::Bind(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x05, 2, 4);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 len = rp.Pop<u32>();
    rp.PopPID();
    auto sock_addr_buf = rp.PopStaticBuffer();
@ -427,7 +400,7 @@ void SOC_U::Bind(Kernel::HLERequestContext& ctx) {

    sockaddr sock_addr = CTRSockAddr::ToPlatform(ctr_sock_addr);

-    s32 ret = ::bind(fd_info->second.socket_fd, &sock_addr, std::max<u32>(sizeof(sock_addr), len));
+    s32 ret = ::bind(socket_handle, &sock_addr, std::max<u32>(sizeof(sock_addr), len));

    if (ret != 0)
        ret = TranslateError(GET_ERRNO);
@ -440,12 +413,6 @@ void SOC_U::Bind(Kernel::HLERequestContext& ctx) {
 void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x13, 3, 2);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 ctr_cmd = rp.Pop<u32>();
    u32 ctr_arg = rp.Pop<u32>();
    rp.PopPID();
@ -460,10 +427,11 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
    if (ctr_cmd == 3) { // F_GETFL
 #ifdef _WIN32
        posix_ret = 0;
-        if (fd_info->second.blocking == false)
+        auto iter = open_sockets.find(socket_handle);
+        if (iter != open_sockets.end() && iter->second.blocking == false)
            posix_ret |= 4; // O_NONBLOCK
 #else
-        int ret = ::fcntl(fd_info->second.socket_fd, F_GETFL, 0);
+        int ret = ::fcntl(socket_handle, F_GETFL, 0);
        if (ret == SOCKET_ERROR_VALUE) {
            posix_ret = TranslateError(GET_ERRNO);
            return;
@ -475,7 +443,7 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
    } else if (ctr_cmd == 4) { // F_SETFL
 #ifdef _WIN32
        unsigned long tmp = (ctr_arg & 4 /* O_NONBLOCK */) ? 1 : 0;
-        int ret = ioctlsocket(fd_info->second.socket_fd, FIONBIO, &tmp);
+        int ret = ioctlsocket(socket_handle, FIONBIO, &tmp);
        if (ret == SOCKET_ERROR_VALUE) {
            posix_ret = TranslateError(GET_ERRNO);
            return;
@ -484,7 +452,7 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
        if (iter != open_sockets.end())
            iter->second.blocking = (tmp == 0);
 #else
-        int flags = ::fcntl(fd_info->second.socket_fd, F_GETFL, 0);
+        int flags = ::fcntl(socket_handle, F_GETFL, 0);
        if (flags == SOCKET_ERROR_VALUE) {
            posix_ret = TranslateError(GET_ERRNO);
            return;
@ -494,7 +462,7 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
        if (ctr_arg & 4) // O_NONBLOCK
            flags |= O_NONBLOCK;

-        int ret = ::fcntl(fd_info->second.socket_fd, F_SETFL, flags);
+        int ret = ::fcntl(socket_handle, F_SETFL, flags);
        if (ret == SOCKET_ERROR_VALUE) {
            posix_ret = TranslateError(GET_ERRNO);
            return;
@ -510,16 +478,10 @@ void SOC_U::Fcntl(Kernel::HLERequestContext& ctx) {
 void SOC_U::Listen(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x03, 2, 2);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 backlog = rp.Pop<u32>();
    rp.PopPID();

-    s32 ret = ::listen(fd_info->second.socket_fd, backlog);
+    s32 ret = ::listen(socket_handle, backlog);
    if (ret != 0)
        ret = TranslateError(GET_ERRNO);

@ -534,19 +496,11 @@ void SOC_U::Accept(Kernel::HLERequestContext& ctx) {
    // performing nonblocking operations and spinlock until the data is available
    IPC::RequestParser rp(ctx, 0x04, 2, 2);
    const auto socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    [[maybe_unused]] const auto max_addr_len = static_cast<socklen_t>(rp.Pop<u32>());
    rp.PopPID();
    sockaddr addr;
    socklen_t addr_len = sizeof(addr);
-    PreTimerAdjust();
-    u32 ret = static_cast<u32>(::accept(fd_info->second.socket_fd, &addr, &addr_len));
-    PostTimerAdjust();
+    u32 ret = static_cast<u32>(::accept(socket_handle, &addr, &addr_len));

    if (static_cast<s32>(ret) != SOCKET_ERROR_VALUE) {
        open_sockets[ret] = {ret, true};
@ -589,22 +543,13 @@ void SOC_U::GetHostId(Kernel::HLERequestContext& ctx) {
 void SOC_U::Close(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x0B, 1, 2);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    rp.PopPID();

    s32 ret = 0;
-
-    PreTimerAdjust();
-    ret = closesocket(fd_info->second.socket_fd);
-    PostTimerAdjust();
-
    open_sockets.erase(socket_handle);

+    ret = closesocket(socket_handle);
+
    if (ret != 0)
        ret = TranslateError(GET_ERRNO);

@ -616,12 +561,6 @@ void SOC_U::Close(Kernel::HLERequestContext& ctx) {
 void SOC_U::SendTo(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x0A, 4, 6);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 len = rp.Pop<u32>();
    u32 flags = rp.Pop<u32>();
    u32 addr_len = rp.Pop<u32>();
@ -630,18 +569,16 @@ void SOC_U::SendTo(Kernel::HLERequestContext& ctx) {
    auto dest_addr_buff = rp.PopStaticBuffer();

    s32 ret = -1;
-    PreTimerAdjust();
    if (addr_len > 0) {
        CTRSockAddr ctr_dest_addr;
        std::memcpy(&ctr_dest_addr, dest_addr_buff.data(), sizeof(ctr_dest_addr));
        sockaddr dest_addr = CTRSockAddr::ToPlatform(ctr_dest_addr);
-        ret = ::sendto(fd_info->second.socket_fd, reinterpret_cast<const char*>(input_buff.data()),
-                       len, flags, &dest_addr, sizeof(dest_addr));
+        ret = ::sendto(socket_handle, reinterpret_cast<const char*>(input_buff.data()), len, flags,
+                       &dest_addr, sizeof(dest_addr));
    } else {
-        ret = ::sendto(fd_info->second.socket_fd, reinterpret_cast<const char*>(input_buff.data()),
-                       len, flags, nullptr, 0);
+        ret = ::sendto(socket_handle, reinterpret_cast<const char*>(input_buff.data()), len, flags,
+                       nullptr, 0);
    }
-    PostTimerAdjust();

    if (ret == SOCKET_ERROR_VALUE)
        ret = TranslateError(GET_ERRNO);
@ -654,12 +591,6 @@ void SOC_U::SendTo(Kernel::HLERequestContext& ctx) {
 void SOC_U::RecvFromOther(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x7, 4, 4);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 len = rp.Pop<u32>();
    u32 flags = rp.Pop<u32>();
    u32 addr_len = rp.Pop<u32>();
@ -673,20 +604,19 @@ void SOC_U::RecvFromOther(Kernel::HLERequestContext& ctx) {
    socklen_t src_addr_len = sizeof(src_addr);

    s32 ret = -1;
-    PreTimerAdjust();
    if (addr_len > 0) {
-        ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
-                         len, flags, &src_addr, &src_addr_len);
+        ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
+                         &src_addr, &src_addr_len);
        if (ret >= 0 && src_addr_len > 0) {
            ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
            std::memcpy(addr_buff.data(), &ctr_src_addr, sizeof(ctr_src_addr));
        }
    } else {
-        ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
-                         len, flags, NULL, 0);
+        ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
+                         NULL, 0);
        addr_buff.resize(0);
    }
-    PostTimerAdjust();
+
    if (ret == SOCKET_ERROR_VALUE) {
        ret = TranslateError(GET_ERRNO);
    } else {
@ -706,12 +636,6 @@ void SOC_U::RecvFrom(Kernel::HLERequestContext& ctx) {
    // performing nonblocking operations and spinlock until the data is available
    IPC::RequestParser rp(ctx, 0x08, 4, 2);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 len = rp.Pop<u32>();
    u32 flags = rp.Pop<u32>();
    u32 addr_len = rp.Pop<u32>();
@ -724,21 +648,19 @@ void SOC_U::RecvFrom(Kernel::HLERequestContext& ctx) {
    socklen_t src_addr_len = sizeof(src_addr);

    s32 ret = -1;
-    PreTimerAdjust();
    if (addr_len > 0) {
        // Only get src adr if input adr available
-        ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
-                         len, flags, &src_addr, &src_addr_len);
+        ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
+                         &src_addr, &src_addr_len);
        if (ret >= 0 && src_addr_len > 0) {
            ctr_src_addr = CTRSockAddr::FromPlatform(src_addr);
            std::memcpy(addr_buff.data(), &ctr_src_addr, sizeof(ctr_src_addr));
        }
    } else {
-        ret = ::recvfrom(fd_info->second.socket_fd, reinterpret_cast<char*>(output_buff.data()),
-                         len, flags, NULL, 0);
+        ret = ::recvfrom(socket_handle, reinterpret_cast<char*>(output_buff.data()), len, flags,
+                         NULL, 0);
        addr_buff.resize(0);
    }
-    PostTimerAdjust();

    s32 total_received = ret;
    if (ret == SOCKET_ERROR_VALUE) {
@ -769,32 +691,21 @@ void SOC_U::Poll(Kernel::HLERequestContext& ctx) {

    // The 3ds_pollfd and the pollfd structures may be different (Windows/Linux have different
    // sizes)
-    // so we have to copy the data in order
+    // so we have to copy the data
    std::vector<pollfd> platform_pollfd(nfds);
-    for (u32 i = 0; i < nfds; i++) {
-        platform_pollfd[i] = CTRPollFD::ToPlatform(*this, ctr_fds[i]);
-    }
+    std::transform(ctr_fds.begin(), ctr_fds.end(), platform_pollfd.begin(), CTRPollFD::ToPlatform);

-    PreTimerAdjust();
    s32 ret = ::poll(platform_pollfd.data(), nfds, timeout);
-    PostTimerAdjust();

-    // Now update the output 3ds_pollfd structure
-    for (u32 i = 0; i < nfds; i++) {
-        ctr_fds[i] = CTRPollFD::FromPlatform(*this, platform_pollfd[i]);
-    }
+    // Now update the output pollfd structure
+    std::transform(platform_pollfd.begin(), platform_pollfd.end(), ctr_fds.begin(),
+                   CTRPollFD::FromPlatform);

    std::vector<u8> output_fds(nfds * sizeof(CTRPollFD));
    std::memcpy(output_fds.data(), ctr_fds.data(), nfds * sizeof(CTRPollFD));

-    if (ret == SOCKET_ERROR_VALUE) {
-        int err = GET_ERRNO;
-        LOG_ERROR(Service_SOC, "Socket error: {}", err);
-
+    if (ret == SOCKET_ERROR_VALUE)
        ret = TranslateError(GET_ERRNO);
-    }
-
-    size_t test = platform_pollfd.size();

    IPC::RequestBuilder rb = rp.MakeBuilder(2, 2);
    rb.Push(RESULT_SUCCESS);
@ -805,18 +716,12 @@ void SOC_U::Poll(Kernel::HLERequestContext& ctx) {
 void SOC_U::GetSockName(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x17, 2, 2);
    const auto socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    [[maybe_unused]] const auto max_addr_len = rp.Pop<u32>();
    rp.PopPID();

    sockaddr dest_addr;
    socklen_t dest_addr_len = sizeof(dest_addr);
-    s32 ret = ::getsockname(fd_info->second.socket_fd, &dest_addr, &dest_addr_len);
+    s32 ret = ::getsockname(socket_handle, &dest_addr, &dest_addr_len);

    CTRSockAddr ctr_dest_addr = CTRSockAddr::FromPlatform(dest_addr);
    std::vector<u8> dest_addr_buff(sizeof(ctr_dest_addr));
@ -834,16 +739,10 @@ void SOC_U::GetSockName(Kernel::HLERequestContext& ctx) {
 void SOC_U::Shutdown(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x0C, 2, 2);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    s32 how = rp.Pop<s32>();
    rp.PopPID();

-    s32 ret = ::shutdown(fd_info->second.socket_fd, how);
+    s32 ret = ::shutdown(socket_handle, how);
    if (ret != 0)
        ret = TranslateError(GET_ERRNO);
    IPC::RequestBuilder rb = rp.MakeBuilder(2, 0);
@ -854,18 +753,12 @@ void SOC_U::Shutdown(Kernel::HLERequestContext& ctx) {
 void SOC_U::GetPeerName(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x18, 2, 2);
    const auto socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    [[maybe_unused]] const auto max_addr_len = rp.Pop<u32>();
    rp.PopPID();

    sockaddr dest_addr;
    socklen_t dest_addr_len = sizeof(dest_addr);
-    const int ret = ::getpeername(fd_info->second.socket_fd, &dest_addr, &dest_addr_len);
+    const int ret = ::getpeername(socket_handle, &dest_addr, &dest_addr_len);

    CTRSockAddr ctr_dest_addr = CTRSockAddr::FromPlatform(dest_addr);
    std::vector<u8> dest_addr_buff(sizeof(ctr_dest_addr));
@ -888,12 +781,6 @@ void SOC_U::Connect(Kernel::HLERequestContext& ctx) {
    // performing nonblocking operations and spinlock until the data is available
    IPC::RequestParser rp(ctx, 0x06, 2, 4);
    const auto socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    [[maybe_unused]] const auto input_addr_len = rp.Pop<u32>();
    rp.PopPID();
    auto input_addr_buf = rp.PopStaticBuffer();
@ -902,9 +789,7 @@ void SOC_U::Connect(Kernel::HLERequestContext& ctx) {
    std::memcpy(&ctr_input_addr, input_addr_buf.data(), sizeof(ctr_input_addr));

    sockaddr input_addr = CTRSockAddr::ToPlatform(ctr_input_addr);
-    PreTimerAdjust();
-    s32 ret = ::connect(fd_info->second.socket_fd, &input_addr, sizeof(input_addr));
-    PostTimerAdjust();
+    s32 ret = ::connect(socket_handle, &input_addr, sizeof(input_addr));
    if (ret != 0)
        ret = TranslateError(GET_ERRNO);

@ -936,12 +821,6 @@ void SOC_U::ShutdownSockets(Kernel::HLERequestContext& ctx) {
 void SOC_U::GetSockOpt(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x11, 4, 2);
    u32 socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    u32 level = rp.Pop<u32>();
    s32 optname = rp.Pop<s32>();
    socklen_t optlen = static_cast<socklen_t>(rp.Pop<u32>());
@ -959,7 +838,7 @@ void SOC_U::GetSockOpt(Kernel::HLERequestContext& ctx) {
 #endif
    } else {
        char* optval_data = reinterpret_cast<char*>(optval.data());
-        err = ::getsockopt(fd_info->second.socket_fd, level, optname, optval_data, &optlen);
+        err = ::getsockopt(socket_handle, level, optname, optval_data, &optlen);
        if (err == SOCKET_ERROR_VALUE) {
            err = TranslateError(GET_ERRNO);
        }
@ -975,12 +854,6 @@ void SOC_U::GetSockOpt(Kernel::HLERequestContext& ctx) {
 void SOC_U::SetSockOpt(Kernel::HLERequestContext& ctx) {
    IPC::RequestParser rp(ctx, 0x12, 4, 4);
    const auto socket_handle = rp.Pop<u32>();
-    auto fd_info = open_sockets.find(socket_handle);
-    if (fd_info == open_sockets.end()) {
-        IPC::RequestBuilder rb = rp.MakeBuilder(1, 0);
-        rb.Push(ERR_INVALID_HANDLE);
-        return;
-    }
    const auto level = rp.Pop<u32>();
    const auto optname = rp.Pop<s32>();
    [[maybe_unused]] const auto optlen = static_cast<socklen_t>(rp.Pop<u32>());
@ -997,7 +870,7 @@ void SOC_U::SetSockOpt(Kernel::HLERequestContext& ctx) {
 #endif
    } else {
        const char* optval_data = reinterpret_cast<const char*>(optval.data());
-        err = static_cast<u32>(::setsockopt(fd_info->second.socket_fd, level, optname, optval_data,
+        err = static_cast<u32>(::setsockopt(socket_handle, level, optname, optval_data,
                                            static_cast<socklen_t>(optval.size())));
        if (err == SOCKET_ERROR_VALUE) {
            err = TranslateError(GET_ERRNO);
--- a/src/core/hle/service/soc_u.h
+++ b/src/core/hle/service/soc_u.h
@ -6,7 +6,6 @@

 #include <unordered_map>
 #include <boost/serialization/unordered_map.hpp>
-#include "core/hle/result.h"
 #include "core/hle/service/service.h"

 namespace Core {
@ -17,13 +16,7 @@ namespace Service::SOC {

 /// Holds information about a particular socket
 struct SocketHolder {
-#ifdef _WIN32
-    using SOCKET = unsigned long long;
-    SOCKET socket_fd; ///< The socket descriptor
-#else
    u32 socket_fd; ///< The socket descriptor
-#endif // _WIN32
-
    bool blocking; ///< Whether the socket is blocking or not, it is only read on Windows.

 private:
@ -41,10 +34,6 @@ public:
    ~SOC_U();

 private:
-    static constexpr ResultCode ERR_INVALID_HANDLE =
-        ResultCode(ErrorDescription::InvalidHandle, ErrorModule::SOC, ErrorSummary::InvalidArgument,
-                   ErrorLevel::Permanent);
-
    void Socket(Kernel::HLERequestContext& ctx);
    void Bind(Kernel::HLERequestContext& ctx);
    void Fcntl(Kernel::HLERequestContext& ctx);
@ -70,29 +59,16 @@ private:
    void GetAddrInfoImpl(Kernel::HLERequestContext& ctx);
    void GetNameInfoImpl(Kernel::HLERequestContext& ctx);

-    // Socked ids
-    u32 next_socket_id = 3;
-    u32 GetNextSocketID() {
-        return next_socket_id++;
-    }
-
-    // System timer adjust
-    u32 timer_adjust_handle;
-    void PreTimerAdjust();
-    void PostTimerAdjust();
-
    /// Close all open sockets
    void CleanupSockets();

    /// Holds info about the currently open sockets
-    friend struct CTRPollFD;
    std::unordered_map<u32, SocketHolder> open_sockets;

    template <class Archive>
    void serialize(Archive& ar, const unsigned int) {
        ar& boost::serialization::base_object<Kernel::SessionRequestHandler>(*this);
        ar& open_sockets;
-        ar& timer_adjust_handle;
    }
    friend class boost::serialization::access;
 };
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@ -2,23 +2,19 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <array>
 #include <cstring>
 #include <boost/serialization/array.hpp>
 #include <boost/serialization/binary_object.hpp>
 #include "audio_core/dsp_interface.h"
 #include "common/archives.h"
 #include "common/assert.h"
-#include "common/atomic_ops.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "common/swap.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
 #include "core/global.h"
-#include "core/hle/kernel/memory.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/lock.h"
 #include "core/memory.h"
 #include "core/settings.h"
 #include "video_core/renderer_base.h"
@ -146,6 +142,144 @@ public:
        }
    }

+    void WalkBlock(const Kernel::Process& process, const VAddr src_addr, const std::size_t size,
+                   auto on_unmapped, auto on_memory, auto on_special, auto on_rasterizer,
+                   auto increment) {
+        auto& page_table = *process.vm_manager.page_table;
+
+        std::size_t remaining_size = size;
+        std::size_t page_index = src_addr >> CITRA_PAGE_BITS;
+        std::size_t page_offset = src_addr & CITRA_PAGE_MASK;
+
+        while (remaining_size > 0) {
+            const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
+            const VAddr current_vaddr =
+                static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
+
+            switch (page_table.attributes[page_index]) {
+            case PageType::Unmapped: {
+                on_unmapped(copy_amount, current_vaddr);
+                break;
+            }
+            case PageType::Memory: {
+                DEBUG_ASSERT(page_table.pointers[page_index]);
+                u8* const src_ptr = page_table.pointers[page_index] + page_offset;
+                on_memory(copy_amount, src_ptr);
+                break;
+            }
+            case PageType::Special: {
+                MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
+                DEBUG_ASSERT(handler);
+                on_special(handler, copy_amount, current_vaddr);
+                break;
+            }
+            case PageType::RasterizerCachedMemory: {
+                u8* const rasterizer_ptr = GetPointerForRasterizerCache(current_vaddr);
+                on_rasterizer(current_vaddr, copy_amount, rasterizer_ptr);
+                break;
+            }
+            default:
+                UNREACHABLE();
+            }
+
+            page_index++;
+            page_offset = 0;
+            increment(copy_amount);
+            remaining_size -= copy_amount;
+        }
+    }
+
+    template <bool UNSAFE>
+    void ReadBlockImpl(const Kernel::Process& process, const VAddr src_addr, void* dest_buffer,
+                       const std::size_t size) {
+        WalkBlock(
+            process, src_addr, size,
+            [src_addr, size, &dest_buffer](const std::size_t copy_amount,
+                                           const VAddr current_vaddr) {
+                LOG_ERROR(HW_Memory,
+                          "Unmapped ReadBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
+                          current_vaddr, src_addr, size);
+                std::memset(dest_buffer, 0, copy_amount);
+            },
+            [&dest_buffer](const std::size_t copy_amount, const u8* const src_ptr) {
+                std::memcpy(dest_buffer, src_ptr, copy_amount);
+            },
+            [&dest_buffer](MMIORegionPointer& handler, const std::size_t copy_amount,
+                           const VAddr current_vaddr) {
+                handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
+            },
+            [&dest_buffer](const VAddr current_vaddr, const std::size_t copy_amount,
+                           const u8* const rasterizer_ptr) {
+                if constexpr (!UNSAFE) {
+                    RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
+                                                 FlushMode::Flush);
+                }
+                std::memcpy(dest_buffer, rasterizer_ptr, copy_amount);
+            },
+            [&dest_buffer](const std::size_t copy_amount) {
+                dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
+            });
+    }
+
+    template <bool UNSAFE>
+    void WriteBlockImpl(const Kernel::Process& process, const VAddr dest_addr,
+                        const void* src_buffer, const std::size_t size) {
+        WalkBlock(
+            process, dest_addr, size,
+            [dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
+                LOG_ERROR(HW_Memory,
+                          "Unmapped WriteBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
+                          current_vaddr, dest_addr, size);
+            },
+            [&src_buffer](const std::size_t copy_amount, u8* const dest_ptr) {
+                std::memcpy(dest_ptr, src_buffer, copy_amount);
+            },
+            [&src_buffer](MMIORegionPointer& handler, const std::size_t copy_amount,
+                          const VAddr current_vaddr) {
+                handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
+            },
+            [&src_buffer](const VAddr current_vaddr, const std::size_t copy_amount,
+                          u8* const host_ptr) {
+                if constexpr (!UNSAFE) {
+                    RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
+                                                 FlushMode::Invalidate);
+                }
+                std::memcpy(host_ptr, src_buffer, copy_amount);
+            },
+            [&src_buffer](const std::size_t copy_amount) {
+                src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
+            });
+    }
+
+    MemoryRef GetPointerForRasterizerCache(VAddr addr) const {
+        if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
+            return {fcram_mem, addr - LINEAR_HEAP_VADDR};
+        }
+        if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
+            return {fcram_mem, addr - NEW_LINEAR_HEAP_VADDR};
+        }
+        if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
+            return {vram_mem, addr - VRAM_VADDR};
+        }
+
+        UNREACHABLE();
+        return MemoryRef{};
+    }
+
+    /**
+     * This function should only be called for virtual addreses with attribute `PageType::Special`.
+     */
+    MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) {
+        for (const auto& region : page_table.special_regions) {
+            if (vaddr >= region.base && vaddr < (region.base + region.size)) {
+                return region.handler;
+            }
+        }
+
+        ASSERT_MSG(false, "Mapped IO page without a handler @ {:08X}", vaddr);
+        return nullptr; // Should never happen
+    }
+
 private:
    friend class boost::serialization::access;
    template <class Archive>
@ -270,16 +404,7 @@ void MemorySystem::UnmapRegion(PageTable& page_table, VAddr base, u32 size) {
 }

 MemoryRef MemorySystem::GetPointerForRasterizerCache(VAddr addr) const {
-    if (addr >= LINEAR_HEAP_VADDR && addr < LINEAR_HEAP_VADDR_END) {
-        return {impl->fcram_mem, addr - LINEAR_HEAP_VADDR};
-    }
-    if (addr >= NEW_LINEAR_HEAP_VADDR && addr < NEW_LINEAR_HEAP_VADDR_END) {
-        return {impl->fcram_mem, addr - NEW_LINEAR_HEAP_VADDR};
-    }
-    if (addr >= VRAM_VADDR && addr < VRAM_VADDR_END) {
-        return {impl->vram_mem, addr - VRAM_VADDR};
-    }
-    UNREACHABLE();
+    return impl->GetPointerForRasterizerCache(addr);
 }

 void MemorySystem::RegisterPageTable(std::shared_ptr<PageTable> page_table) {
@ -293,19 +418,6 @@ void MemorySystem::UnregisterPageTable(std::shared_ptr<PageTable> page_table) {
    }
 }

-/**
- * This function should only be called for virtual addreses with attribute `PageType::Special`.
- */
-static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) {
-    for (const auto& region : page_table.special_regions) {
-        if (vaddr >= region.base && vaddr < (region.base + region.size)) {
-            return region.handler;
-        }
-    }
-    ASSERT_MSG(false, "Mapped IO page without a handler @ {:08X}", vaddr);
-    return nullptr; // Should never happen
-}
-
 template <typename T>
 T ReadMMIO(MMIORegionPointer mmio_handler, VAddr addr);

@ -336,10 +448,12 @@ T MemorySystem::Read(const VAddr vaddr) {
        return value;
    }
    case PageType::Special:
-        return ReadMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr);
+        return ReadMMIO<T>(impl->GetMMIOHandler(*impl->current_page_table, vaddr), vaddr);
    default:
        UNREACHABLE();
    }
+
+    return T{};
 }

 template <typename T>
@ -369,48 +483,14 @@ void MemorySystem::Write(const VAddr vaddr, const T data) {
        break;
    }
    case PageType::Special:
-        WriteMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
+        WriteMMIO<T>(impl->GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
        break;
    default:
        UNREACHABLE();
    }
 }

-template <typename T>
-bool MemorySystem::WriteExclusive(const VAddr vaddr, const T data, const T expected) {
-    u8* page_pointer = impl->current_page_table->pointers[vaddr >> CITRA_PAGE_BITS];
-
-    if (page_pointer) {
-        const auto volatile_pointer =
-            reinterpret_cast<volatile T*>(&page_pointer[vaddr & CITRA_PAGE_MASK]);
-        return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
-    }
-
-    PageType type = impl->current_page_table->attributes[vaddr >> CITRA_PAGE_BITS];
-    switch (type) {
-    case PageType::Unmapped:
-        LOG_ERROR(HW_Memory, "unmapped Write{} 0x{:08X} @ 0x{:08X} at PC 0x{:08X}",
-                  sizeof(data) * 8, (u32)data, vaddr, Core::GetRunningCore().GetPC());
-        return true;
-    case PageType::Memory:
-        ASSERT_MSG(false, "Mapped memory page without a pointer @ {:08X}", vaddr);
-        return true;
-    case PageType::RasterizerCachedMemory: {
-        RasterizerFlushVirtualRegion(vaddr, sizeof(T), FlushMode::Invalidate);
-        const auto volatile_pointer =
-            reinterpret_cast<volatile T*>(GetPointerForRasterizerCache(vaddr).GetPtr());
-        return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
-    }
-    case PageType::Special:
-        WriteMMIO<T>(GetMMIOHandler(*impl->current_page_table, vaddr), vaddr, data);
-        return false;
-    default:
-        UNREACHABLE();
-    }
-    return true;
-}
-
-bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
+bool MemorySystem::IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
    auto& page_table = *process.vm_manager.page_table;

    auto page_pointer = page_table.pointers[vaddr >> CITRA_PAGE_BITS];
@ -423,7 +503,7 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
    if (page_table.attributes[vaddr >> CITRA_PAGE_BITS] != PageType::Special)
        return false;

-    MMIORegionPointer mmio_region = GetMMIOHandler(page_table, vaddr);
+    MMIORegionPointer mmio_region = impl->GetMMIOHandler(page_table, vaddr);
    if (mmio_region) {
        return mmio_region->IsValidAddress(vaddr);
    }
@ -432,7 +512,7 @@ bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) {
 }

 bool MemorySystem::IsValidPhysicalAddress(const PAddr paddr) const {
-    return GetPhysicalPointer(paddr) != nullptr;
+    return GetPhysicalRef(paddr);
 }

 u8* MemorySystem::GetPointer(const VAddr vaddr) {
@ -471,11 +551,14 @@ std::string MemorySystem::ReadCString(VAddr vaddr, std::size_t max_length) {
    string.reserve(max_length);
    for (std::size_t i = 0; i < max_length; ++i) {
        char c = Read8(vaddr);
-        if (c == '\0')
+        if (c == '\0') {
            break;
+        }
+
        string.push_back(c);
        ++vaddr;
    }
+
    string.shrink_to_fit();
    return string;
 }
@ -484,10 +567,6 @@ u8* MemorySystem::GetPhysicalPointer(PAddr address) {
    return GetPhysicalRef(address);
 }

-const u8* MemorySystem::GetPhysicalPointer(PAddr address) const {
-    return GetPhysicalRef(address);
-}
-
 MemoryRef MemorySystem::GetPhysicalRef(PAddr address) const {
    constexpr std::array memory_areas = {
        std::make_pair(VRAM_PADDR, VRAM_SIZE),
@ -698,53 +777,12 @@ u64 MemorySystem::Read64(const VAddr addr) {

 void MemorySystem::ReadBlock(const Kernel::Process& process, const VAddr src_addr,
                             void* dest_buffer, const std::size_t size) {
-    auto& page_table = *process.vm_manager.page_table;
+    return impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
+}

-    std::size_t remaining_size = size;
-    std::size_t page_index = src_addr >> CITRA_PAGE_BITS;
-    std::size_t page_offset = src_addr & CITRA_PAGE_MASK;
-
-    while (remaining_size > 0) {
-        const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
-        const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
-
-        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
-            LOG_ERROR(HW_Memory,
-                      "unmapped ReadBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
-                      "0x{:08X}",
-                      current_vaddr, src_addr, size, Core::GetRunningCore().GetPC());
-            std::memset(dest_buffer, 0, copy_amount);
-            break;
-        }
-        case PageType::Memory: {
-            DEBUG_ASSERT(page_table.pointers[page_index]);
-
-            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
-            std::memcpy(dest_buffer, src_ptr, copy_amount);
-            break;
-        }
-        case PageType::Special: {
-            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
-            DEBUG_ASSERT(handler);
-            handler->ReadBlock(current_vaddr, dest_buffer, copy_amount);
-            break;
-        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Flush);
-            std::memcpy(dest_buffer, GetPointerForRasterizerCache(current_vaddr), copy_amount);
-            break;
-        }
-        default:
-            UNREACHABLE();
-        }
-
-        page_index++;
-        page_offset = 0;
-        dest_buffer = static_cast<u8*>(dest_buffer) + copy_amount;
-        remaining_size -= copy_amount;
-    }
+void MemorySystem::ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size) {
+    const auto& process = *Core::System::GetInstance().Kernel().GetCurrentProcess();
+    return impl->ReadBlockImpl<false>(process, src_addr, dest_buffer, size);
 }

 void MemorySystem::Write8(const VAddr addr, const u8 data) {
@ -763,119 +801,41 @@ void MemorySystem::Write64(const VAddr addr, const u64 data) {
    Write<u64_le>(addr, data);
 }

-bool MemorySystem::WriteExclusive8(const VAddr addr, const u8 data, const u8 expected) {
-    return WriteExclusive<u8>(addr, data, expected);
-}
-
-bool MemorySystem::WriteExclusive16(const VAddr addr, const u16 data, const u16 expected) {
-    return WriteExclusive<u16_le>(addr, data, expected);
-}
-
-bool MemorySystem::WriteExclusive32(const VAddr addr, const u32 data, const u32 expected) {
-    return WriteExclusive<u32_le>(addr, data, expected);
-}
-
-bool MemorySystem::WriteExclusive64(const VAddr addr, const u64 data, const u64 expected) {
-    return WriteExclusive<u64_le>(addr, data, expected);
-}
-
 void MemorySystem::WriteBlock(const Kernel::Process& process, const VAddr dest_addr,
                              const void* src_buffer, const std::size_t size) {
-    auto& page_table = *process.vm_manager.page_table;
-    std::size_t remaining_size = size;
-    std::size_t page_index = dest_addr >> CITRA_PAGE_BITS;
-    std::size_t page_offset = dest_addr & CITRA_PAGE_MASK;
+    return impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
+}

-    while (remaining_size > 0) {
-        const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
-        const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
-
-        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
-            LOG_ERROR(HW_Memory,
-                      "unmapped WriteBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
-                      "0x{:08X}",
-                      current_vaddr, dest_addr, size, Core::GetRunningCore().GetPC());
-            break;
-        }
-        case PageType::Memory: {
-            DEBUG_ASSERT(page_table.pointers[page_index]);
-
-            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
-            std::memcpy(dest_ptr, src_buffer, copy_amount);
-            break;
-        }
-        case PageType::Special: {
-            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
-            DEBUG_ASSERT(handler);
-            handler->WriteBlock(current_vaddr, src_buffer, copy_amount);
-            break;
-        }
-        case PageType::RasterizerCachedMemory: {
-            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
-                                         FlushMode::Invalidate);
-            std::memcpy(GetPointerForRasterizerCache(current_vaddr), src_buffer, copy_amount);
-            break;
-        }
-        default:
-            UNREACHABLE();
-        }
-
-        page_index++;
-        page_offset = 0;
-        src_buffer = static_cast<const u8*>(src_buffer) + copy_amount;
-        remaining_size -= copy_amount;
-    }
+void MemorySystem::WriteBlock(const VAddr dest_addr, const void* src_buffer,
+                              const std::size_t size) {
+    auto& process = *Core::System::GetInstance().Kernel().GetCurrentProcess();
+    return impl->WriteBlockImpl<false>(process, dest_addr, src_buffer, size);
 }

 void MemorySystem::ZeroBlock(const Kernel::Process& process, const VAddr dest_addr,
                             const std::size_t size) {
-    auto& page_table = *process.vm_manager.page_table;
-    std::size_t remaining_size = size;
-    std::size_t page_index = dest_addr >> CITRA_PAGE_BITS;
-    std::size_t page_offset = dest_addr & CITRA_PAGE_MASK;
+    static const std::array<u8, CITRA_PAGE_SIZE> zeros{0};

-    static const std::array<u8, CITRA_PAGE_SIZE> zeros = {};
-
-    while (remaining_size > 0) {
-        const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
-        const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
-
-        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+    impl->WalkBlock(
+        process, dest_addr, size,
+        [dest_addr, size](const std::size_t copy_amount, const VAddr current_vaddr) {
            LOG_ERROR(HW_Memory,
-                      "unmapped ZeroBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
-                      "0x{:08X}",
-                      current_vaddr, dest_addr, size, Core::GetRunningCore().GetPC());
-            break;
-        }
-        case PageType::Memory: {
-            DEBUG_ASSERT(page_table.pointers[page_index]);
-
-            u8* dest_ptr = page_table.pointers[page_index] + page_offset;
+                      "Unmapped ZeroBlock @ 0x{:016X} (start address = 0x{:016X}, size = {})",
+                      current_vaddr, dest_addr, size);
+        },
+        [](const std::size_t copy_amount, u8* const dest_ptr) {
            std::memset(dest_ptr, 0, copy_amount);
-            break;
-        }
-        case PageType::Special: {
-            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
-            DEBUG_ASSERT(handler);
+        },
+        [&zeros = zeros](MMIORegionPointer& handler, const std::size_t copy_amount,
+                         const VAddr current_vaddr) {
            handler->WriteBlock(current_vaddr, zeros.data(), copy_amount);
-            break;
-        }
-        case PageType::RasterizerCachedMemory: {
+        },
+        [](const VAddr current_vaddr, const std::size_t copy_amount, u8* const rasterizer_ptr) {
            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
                                         FlushMode::Invalidate);
-            std::memset(GetPointerForRasterizerCache(current_vaddr), 0, copy_amount);
-            break;
-        }
-        default:
-            UNREACHABLE();
-        }
-
-        page_index++;
-        page_offset = 0;
-        remaining_size -= copy_amount;
-    }
+            std::memset(rasterizer_ptr, 0, copy_amount);
+        },
+        [](const std::size_t copy_amount) {});
 }

 void MemorySystem::CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
@ -886,55 +846,35 @@ void MemorySystem::CopyBlock(const Kernel::Process& process, VAddr dest_addr, VA
 void MemorySystem::CopyBlock(const Kernel::Process& dest_process,
                             const Kernel::Process& src_process, VAddr dest_addr, VAddr src_addr,
                             std::size_t size) {
-    auto& page_table = *src_process.vm_manager.page_table;
-    std::size_t remaining_size = size;
-    std::size_t page_index = src_addr >> CITRA_PAGE_BITS;
-    std::size_t page_offset = src_addr & CITRA_PAGE_MASK;
+    std::array<u8, CITRA_PAGE_SIZE> copy_buffer{};

-    while (remaining_size > 0) {
-        const std::size_t copy_amount = std::min(CITRA_PAGE_SIZE - page_offset, remaining_size);
-        const VAddr current_vaddr = static_cast<VAddr>((page_index << CITRA_PAGE_BITS) + page_offset);
-
-        switch (page_table.attributes[page_index]) {
-        case PageType::Unmapped: {
+    impl->WalkBlock(
+        src_process, src_addr, size,
+        [this, &dest_process, &dest_addr, &src_addr, size](const std::size_t copy_amount,
+                                                           const VAddr current_vaddr) {
            LOG_ERROR(HW_Memory,
-                      "unmapped CopyBlock @ 0x{:08X} (start address = 0x{:08X}, size = {}) at PC "
-                      "0x{:08X}",
-                      current_vaddr, src_addr, size, Core::GetRunningCore().GetPC());
+                      "unmapped CopyBlock @ 0x{:08X} (start address = 0x{:08X}, size = {})",
+                      current_vaddr, src_addr, size);
            ZeroBlock(dest_process, dest_addr, copy_amount);
-            break;
-        }
-        case PageType::Memory: {
-            DEBUG_ASSERT(page_table.pointers[page_index]);
-            const u8* src_ptr = page_table.pointers[page_index] + page_offset;
-            WriteBlock(dest_process, dest_addr, src_ptr, copy_amount);
-            break;
-        }
-        case PageType::Special: {
-            MMIORegionPointer handler = GetMMIOHandler(page_table, current_vaddr);
-            DEBUG_ASSERT(handler);
-            std::vector<u8> buffer(copy_amount);
-            handler->ReadBlock(current_vaddr, buffer.data(), buffer.size());
-            WriteBlock(dest_process, dest_addr, buffer.data(), buffer.size());
-            break;
-        }
-        case PageType::RasterizerCachedMemory: {
+        },
+        [this, &dest_process, &dest_addr](const std::size_t copy_amount, const u8* const src_ptr) {
+            impl->WriteBlockImpl<false>(dest_process, dest_addr, src_ptr, copy_amount);
+        },
+        [this, &dest_process, &dest_addr, &copy_buffer](
+            MMIORegionPointer& handler, const std::size_t copy_amount, const VAddr current_vaddr) {
+            handler->ReadBlock(current_vaddr, copy_buffer.data(), copy_amount);
+            impl->WriteBlockImpl<false>(dest_process, dest_addr, copy_buffer.data(), copy_amount);
+        },
+        [this, &dest_process, &dest_addr](const VAddr current_vaddr, const std::size_t copy_amount,
+                                          u8* const rasterizer_ptr) {
            RasterizerFlushVirtualRegion(current_vaddr, static_cast<u32>(copy_amount),
                                         FlushMode::Flush);
-            WriteBlock(dest_process, dest_addr, GetPointerForRasterizerCache(current_vaddr),
-                       copy_amount);
-            break;
-        }
-        default:
-            UNREACHABLE();
-        }
-
-        page_index++;
-        page_offset = 0;
-        dest_addr += static_cast<VAddr>(copy_amount);
-        src_addr += static_cast<VAddr>(copy_amount);
-        remaining_size -= copy_amount;
-    }
+            impl->WriteBlockImpl<false>(dest_process, dest_addr, rasterizer_ptr, copy_amount);
+        },
+        [&dest_addr, &src_addr](const std::size_t copy_amount) {
+            dest_addr += static_cast<VAddr>(copy_amount);
+            src_addr += static_cast<VAddr>(copy_amount);
+        });
 }

 template <>
--- a/src/core/memory.h
+++ b/src/core/memory.h
@ -5,12 +5,9 @@
 #pragma once
 #include <array>
 #include <cstddef>
-#include <memory>
 #include <string>
-#include <vector>
 #include <boost/serialization/array.hpp>
 #include <boost/serialization/vector.hpp>
-#include "common/common_types.h"
 #include "common/memory_ref.h"
 #include "core/mmio.h"

@ -102,11 +99,10 @@ struct PageTable {

    private:
        std::array<u8*, PAGE_TABLE_NUM_ENTRIES> raw;
-
        std::array<MemoryRef, PAGE_TABLE_NUM_ENTRIES> refs;
-
        friend struct PageTable;
    };
+
    Pointers pointers;

    /**
@ -313,55 +309,257 @@ public:
    void SetCurrentPageTable(std::shared_ptr<PageTable> page_table);
    std::shared_ptr<PageTable> GetCurrentPageTable() const;

-    u8 Read8(VAddr addr);
-    u16 Read16(VAddr addr);
-    u32 Read32(VAddr addr);
-    u64 Read64(VAddr addr);
-
-    void Write8(VAddr addr, u8 data);
-    void Write16(VAddr addr, u16 data);
-    void Write32(VAddr addr, u32 data);
-    void Write64(VAddr addr, u64 data);
+    /**
+     * Gets a pointer to the given address.
+     *
+     * @param vaddr Virtual address to retrieve a pointer to.
+     *
+     * @returns The pointer to the given address, if the address is valid.
+     *          If the address is not valid, nullptr will be returned.
+     */
+    u8* GetPointer(VAddr vaddr);

    /**
-     * Writes a {8, 16, 32, 64}-bit unsigned integer to the given virtual address in
-     * the current process' address space if and only if the address contains
-     * the expected value. This operation is atomic.
+     * Gets a pointer to the given address.
     *
-     * @param addr The virtual address to write the X-bit unsigned integer to.
-     * @param data The X-bit unsigned integer to write to the given virtual address.
-     * @param expected The X-bit unsigned integer to check against the given virtual address.
-     * @returns true if the operation failed
+     * @param vaddr Virtual address to retrieve a pointer to.
+     *
+     * @returns The pointer to the given address, if the address is valid.
+     *          If the address is not valid, nullptr will be returned.
+     */
+    const u8* GetPointer(VAddr vaddr) const;
+
+    /**
+     * Reads an 8-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 8-bit value from.
+     *
+     * @returns the read 8-bit unsigned value.
+     */
+    u8 Read8(VAddr addr);
+
+    /**
+     * Reads a 16-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 16-bit value from.
+     *
+     * @returns the read 16-bit unsigned value.
+     */
+    u16 Read16(VAddr addr);
+
+    /**
+     * Reads a 32-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 32-bit value from.
+     *
+     * @returns the read 32-bit unsigned value.
+     */
+    u32 Read32(VAddr addr);
+
+    /**
+     * Reads a 64-bit unsigned value from the current process' address space
+     * at the given virtual address.
+     *
+     * @param addr The virtual address to read the 64-bit value from.
+     *
+     * @returns the read 64-bit value.
+     */
+    u64 Read64(VAddr addr);
+
+    /**
+     * Writes an 8-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 8-bit unsigned integer to.
+     * @param data The 8-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory at the given virtual address contains the specified data value.
+     */
+    void Write8(VAddr addr, u8 data);
+
+    /**
+     * Writes a 16-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 16-bit unsigned integer to.
+     * @param data The 16-bit unsigned integer to write to the given virtual address.
     *
     * @post The memory range [addr, sizeof(data)) contains the given data value.
     */
-    bool WriteExclusive8(const VAddr addr, const u8 data, const u8 expected);
-    bool WriteExclusive16(const VAddr addr, const u16 data, const u16 expected);
-    bool WriteExclusive32(const VAddr addr, const u32 data, const u32 expected);
-    bool WriteExclusive64(const VAddr addr, const u64 data, const u64 expected);
+    void Write16(VAddr addr, u16 data);

+    /**
+     * Writes a 32-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 32-bit unsigned integer to.
+     * @param data The 32-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    void Write32(VAddr addr, u32 data);
+
+    /**
+     * Writes a 64-bit unsigned integer to the given virtual address in
+     * the current process' address space.
+     *
+     * @param addr The virtual address to write the 64-bit unsigned integer to.
+     * @param data The 64-bit unsigned integer to write to the given virtual address.
+     *
+     * @post The memory range [addr, sizeof(data)) contains the given data value.
+     */
+    void Write64(VAddr addr, u64 data);
+
+    /**
+     * Reads a null-terminated string from the given virtual address.
+     * This function will continually read characters until either:
+     *
+     * - A null character ('\0') is reached.
+     * - max_length characters have been read.
+     *
+     * @note The final null-terminating character (if found) is not included
+     *       in the returned string.
+     *
+     * @param vaddr      The address to begin reading the string from.
+     * @param max_length The maximum length of the string to read in characters.
+     *
+     * @returns The read string.
+     */
+    std::string ReadCString(VAddr vaddr, std::size_t max_length);
+
+    /**
+     * Reads a contiguous block of bytes from a specified process' address space.
+     *
+     * @param process     The process to read the data from.
+     * @param src_addr    The virtual address to begin reading from.
+     * @param dest_buffer The buffer to place the read bytes into.
+     * @param size        The amount of data to read, in bytes.
+     *
+     * @note If a size of 0 is specified, then this function reads nothing and
+     *       no attempts to access memory are made at all.
+     *
+     * @pre dest_buffer must be at least size bytes in length, otherwise a
+     *      buffer overrun will occur.
+     *
+     * @post The range [dest_buffer, size) contains the read bytes from the
+     *       process' address space.
+     */
    void ReadBlock(const Kernel::Process& process, VAddr src_addr, void* dest_buffer,
                   std::size_t size);
+
+    /**
+     * Reads a contiguous block of bytes from the current process' address space.
+     *
+     * @param src_addr    The virtual address to begin reading from.
+     * @param dest_buffer The buffer to place the read bytes into.
+     * @param size        The amount of data to read, in bytes.
+     *
+     * @note If a size of 0 is specified, then this function reads nothing and
+     *       no attempts to access memory are made at all.
+     *
+     * @pre dest_buffer must be at least size bytes in length, otherwise a
+     *      buffer overrun will occur.
+     *
+     * @post The range [dest_buffer, size) contains the read bytes from the
+     *       current process' address space.
+     */
+    void ReadBlock(VAddr src_addr, void* dest_buffer, std::size_t size);
+
+    /**
+     * Writes a range of bytes into a given process' address space at the specified
+     * virtual address.
+     *
+     * @param process    The process to write data into the address space of.
+     * @param dest_addr  The destination virtual address to begin writing the data at.
+     * @param src_buffer The data to write into the process' address space.
+     * @param size       The size of the data to write, in bytes.
+     *
+     * @post The address range [dest_addr, size) in the process' address space
+     *       contains the data that was within src_buffer.
+     *
+     * @post If an attempt is made to write into an unmapped region of memory, the writes
+     *       will be ignored and an error will be logged.
+     *
+     * @post If a write is performed into a region of memory that is considered cached
+     *       rasterizer memory, will cause the currently active rasterizer to be notified
+     *       and will mark that region as invalidated to caches that the active
+     *       graphics backend may be maintaining over the course of execution.
+     */
    void WriteBlock(const Kernel::Process& process, VAddr dest_addr, const void* src_buffer,
                    std::size_t size);
+
+    /**
+     * Writes a range of bytes into a given process' address space at the specified
+     * virtual address.
+     *
+     * @param dest_addr  The destination virtual address to begin writing the data at.
+     * @param src_buffer The data to write into the process' address space.
+     * @param size       The size of the data to write, in bytes.
+     *
+     * @post The address range [dest_addr, size) in the process' address space
+     *       contains the data that was within src_buffer.
+     *
+     * @post If an attempt is made to write into an unmapped region of memory, the writes
+     *       will be ignored and an error will be logged.
+     *
+     * @post If a write is performed into a region of memory that is considered cached
+     *       rasterizer memory, will cause the currently active rasterizer to be notified
+     *       and will mark that region as invalidated to caches that the active
+     *       graphics backend may be maintaining over the course of execution.
+     */
+    void WriteBlock(VAddr dest_addr, const void* src_buffer, std::size_t size);
+
+    /**
+     * Zeros a range of bytes within the current process' address space at the specified
+     * virtual address.
+     *
+     * @param process   The process that will have data zeroed within its address space.
+     * @param dest_addr The destination virtual address to zero the data from.
+     * @param size      The size of the range to zero out, in bytes.
+     *
+     * @post The range [dest_addr, size) within the process' address space contains the
+     *       value 0.
+     */
    void ZeroBlock(const Kernel::Process& process, VAddr dest_addr, const std::size_t size);
+
+    /**
+     * Copies data within a process' address space to another location within the
+     * same address space.
+     *
+     * @param process   The process that will have data copied within its address space.
+     * @param dest_addr The destination virtual address to begin copying the data into.
+     * @param src_addr  The source virtual address to begin copying the data from.
+     * @param size      The size of the data to copy, in bytes.
+     *
+     * @post The range [dest_addr, size) within the process' address space contains the
+     *       same data within the range [src_addr, size).
+     */
    void CopyBlock(const Kernel::Process& process, VAddr dest_addr, VAddr src_addr,
                   std::size_t size);
    void CopyBlock(const Kernel::Process& dest_process, const Kernel::Process& src_process,
                   VAddr dest_addr, VAddr src_addr, std::size_t size);

-    std::string ReadCString(VAddr vaddr, std::size_t max_length);
+    /**
+     * Marks each page within the specified address range as cached or uncached.
+     *
+     * @param vaddr  The virtual address indicating the start of the address range.
+     * @param size   The size of the address range in bytes.
+     * @param cached Whether or not any pages within the address range should be
+     *               marked as cached or uncached.
+     */
+    void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);

    /// Gets a pointer to the memory region beginning at the specified physical address.
    u8* GetPhysicalPointer(PAddr address);

-    /// Gets a pointer to the memory region beginning at the specified physical address.
-    const u8* GetPhysicalPointer(PAddr address) const;
-
+    /// Returns a reference to the memory region beginning at the specified physical address
    MemoryRef GetPhysicalRef(PAddr address) const;

-    u8* GetPointer(VAddr vaddr);
-    const u8* GetPointer(VAddr vaddr) const;
+    /// Determines if the given VAddr is valid for the specified process.
+    bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);

    /// Returns true if the address refers to a valid memory region
    bool IsValidPhysicalAddress(PAddr paddr) const;
@ -378,11 +576,6 @@ public:
    /// Gets a serializable ref to FCRAM with the given offset
    MemoryRef GetFCRAMRef(std::size_t offset) const;

-    /**
-     * Mark each page touching the region as cached.
-     */
-    void RasterizerMarkRegionCached(PAddr start, u32 size, bool cached);
-
    /// Registers page table for rasterizer cache marking
    void RegisterPageTable(std::shared_ptr<PageTable> page_table);

@ -398,9 +591,6 @@ private:
    template <typename T>
    void Write(const VAddr vaddr, const T data);

-    template <typename T>
-    bool WriteExclusive(const VAddr vaddr, const T data, const T expected);
-
    /**
     * Gets the pointer for virtual memory where the page is marked as RasterizerCachedMemory.
     * This is used to access the memory where the page pointer is nullptr due to rasterizer cache.
@ -412,7 +602,6 @@ private:
    void MapPages(PageTable& page_table, u32 base, u32 size, MemoryRef memory, PageType type);

    class Impl;
-
    std::unique_ptr<Impl> impl;

    friend class boost::serialization::access;
@ -424,9 +613,6 @@ public:
    class BackingMemImpl;
 };

-/// Determines if the given VAddr is valid for the specified process.
-bool IsValidVirtualAddress(const Kernel::Process& process, VAddr vaddr);
-
 } // namespace Memory

 BOOST_CLASS_EXPORT_KEY(Memory::MemorySystem::BackingMemImpl<Memory::Region::FCRAM>)
--- a/src/core/settings.cpp
+++ b/src/core/settings.cpp
@ -90,7 +90,6 @@ void LogSettings() {
    LogSetting("Core_UseCpuJit", values.use_cpu_jit);
    LogSetting("Core_CPUClockPercentage", values.cpu_clock_percentage);
    LogSetting("Renderer_GraphicsAPI", GetAPIName(values.graphics_api));
-    LogSetting("Renderer_AsyncRecording", values.async_command_recording);
    LogSetting("Renderer_UseHwRenderer", values.use_hw_renderer);
    LogSetting("Renderer_UseHwShader", values.use_hw_shader);
    LogSetting("Renderer_SeparableShader", values.separable_shader);
@ -129,11 +128,8 @@ void LogSettings() {
    LogSetting("Camera_OuterLeftConfig", values.camera_config[Service::CAM::OuterLeftCamera]);
    LogSetting("Camera_OuterLeftFlip", values.camera_flip[Service::CAM::OuterLeftCamera]);
    LogSetting("DataStorage_UseVirtualSd", values.use_virtual_sd);
-    LogSetting("DataStorage_UseCustomStorage", values.use_custom_storage);
-    if (values.use_custom_storage) {
-        LogSetting("DataStorage_SdmcDir", FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir));
-        LogSetting("DataStorage_NandDir", FileUtil::GetUserPath(FileUtil::UserPath::NANDDir));
-    }
+    LogSetting("DataStorage_SdmcDir", FileUtil::GetUserPath(FileUtil::UserPath::SDMCDir));
+    LogSetting("DataStorage_NandDir", FileUtil::GetUserPath(FileUtil::UserPath::NANDDir));
    LogSetting("System_IsNew3ds", values.is_new_3ds);
    LogSetting("System_RegionValue", values.region_value);
    LogSetting("Debugging_UseGdbstub", values.use_gdbstub);
--- a/src/core/settings.h
+++ b/src/core/settings.h
@ -157,7 +157,6 @@ struct Values {

    // Data Storage
    bool use_virtual_sd;
-    bool use_custom_storage;

    // System
    int region_value;
@ -167,10 +166,8 @@ struct Values {
    // Renderer
    GraphicsAPI graphics_api;
    u16 physical_device;
-    bool spirv_shader_gen;
    bool renderer_debug;
    bool dump_command_buffers;
-    bool async_command_recording;
    bool use_hw_renderer;
    bool use_hw_shader;
    bool separable_shader;
--- a/src/input_common/gcadapter/gc_adapter.cpp
+++ b/src/input_common/gcadapter/gc_adapter.cpp
@ -4,7 +4,6 @@

 #include <chrono>
 #include <thread>
-#include <vector>

 #ifdef _MSC_VER
 #pragma warning(push)
--- a/src/input_common/motion_emu.cpp
+++ b/src/input_common/motion_emu.cpp
@ -47,7 +47,7 @@ public:
            } else {
                tilt_direction = mouse_move.Cast<float>();
                tilt_angle = std::clamp(tilt_direction.Normalize() * sensitivity, 0.0f,
-                                        Common::PI * this->tilt_clamp / 180.0f);
+                                        std::numbers::pi_v<float> * this->tilt_clamp / 180.0f);
            }
        }
    }
@ -110,7 +110,7 @@ private:

            // Find the angular rate vector in world space
            auto angular_rate = ((q - old_q) * inv_q).xyz * 2;
-            angular_rate *= 1000 / update_millisecond / Common::PI * 180;
+            angular_rate *= 1000 / update_millisecond / std::numbers::pi_v<float> * 180;

            // Transform the two vectors from world space to 3DS space
            gravity = QuaternionRotate(inv_q, gravity);
--- a/src/input_common/sdl/sdl_impl.cpp
+++ b/src/input_common/sdl/sdl_impl.cpp
@ -16,8 +16,6 @@
 #include <utility>
 #include <vector>
 #include <SDL.h>
-#include "common/assert.h"
-#include "common/math_util.h"
 #include "common/logging/log.h"
 #include "common/param_package.h"
 #include "common/threadsafe_queue.h"
@ -598,9 +596,9 @@ void SDLState::HandleGameControllerEvent(const SDL_Event& event) {
                                   event.csensor.data[2] / SDL_STANDARD_GRAVITY);
                break;
            case SDL_SENSOR_GYRO:
-                joystick->SetGyro(-event.csensor.data[0] * (180.0f / Common::PI),
-                                  event.csensor.data[1] * (180.0f / Common::PI),
-                                  -event.csensor.data[2] * (180.0f / Common::PI));
+                joystick->SetGyro(-event.csensor.data[0] * (180.0f / std::numbers::pi),
+                                  event.csensor.data[1] * (180.0f / std::numbers::pi),
+                                  -event.csensor.data[2] * (180.0f / std::numbers::pi));
                break;
            }
        }
--- a/src/tests/core/memory/memory.cpp
+++ b/src/tests/core/memory/memory.cpp
@ -3,34 +3,31 @@
 // Refer to the license.txt file included.

 #include <catch2/catch_test_macros.hpp>
-#include "core/core.h"
 #include "core/core_timing.h"
-#include "core/hle/kernel/memory.h"
 #include "core/hle/kernel/process.h"
-#include "core/hle/kernel/shared_page.h"
 #include "core/memory.h"

-TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") {
+TEST_CASE("memory.IsValidVirtualAddress", "[core][memory]") {
    Core::Timing timing(1, 100);
    Memory::MemorySystem memory;
    Kernel::KernelSystem kernel(
        memory, timing, [] {}, 0, 1, 0);
    SECTION("these regions should not be mapped on an empty process") {
        auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::PROCESS_IMAGE_VADDR) == false);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::HEAP_VADDR) == false);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::LINEAR_HEAP_VADDR) == false);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == false);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == false);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::TLS_AREA_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::PROCESS_IMAGE_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::HEAP_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::LINEAR_HEAP_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::TLS_AREA_VADDR) == false);
    }

    SECTION("CONFIG_MEMORY_VADDR and SHARED_PAGE_VADDR should be valid after mapping them") {
        auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
        kernel.MapSharedPages(process->vm_manager);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == true);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == true);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == true);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == true);
    }

    SECTION("special regions should be valid after mapping them") {
@ -38,13 +35,13 @@ TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") {
        SECTION("VRAM") {
            kernel.HandleSpecialMapping(process->vm_manager,
                                        {Memory::VRAM_VADDR, Memory::VRAM_SIZE, false, false});
-            CHECK(Memory::IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == true);
+            CHECK(memory.IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == true);
        }

        SECTION("IO (Not yet implemented)") {
            kernel.HandleSpecialMapping(
                process->vm_manager, {Memory::IO_AREA_VADDR, Memory::IO_AREA_SIZE, false, false});
-            CHECK_FALSE(Memory::IsValidVirtualAddress(*process, Memory::IO_AREA_VADDR) == true);
+            CHECK_FALSE(memory.IsValidVirtualAddress(*process, Memory::IO_AREA_VADDR) == true);
        }
    }

@ -52,6 +49,6 @@ TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") {
        auto process = kernel.CreateProcess(kernel.CreateCodeSet("", 0));
        kernel.MapSharedPages(process->vm_manager);
        process->vm_manager.UnmapRange(Memory::CONFIG_MEMORY_VADDR, Memory::CONFIG_MEMORY_SIZE);
-        CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
+        CHECK(memory.IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false);
    }
 }
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@ -86,8 +86,6 @@ add_library(video_core STATIC
    renderer_vulkan/renderer_vulkan.h
    renderer_vulkan/vk_blit_helper.cpp
    renderer_vulkan/vk_blit_helper.h
-    renderer_vulkan/vk_blit_screen.cpp
-    renderer_vulkan/vk_blit_screen.h
    renderer_vulkan/vk_common.cpp
    renderer_vulkan/vk_common.h
    renderer_vulkan/vk_descriptor_manager.cpp
@ -113,8 +111,6 @@ add_library(video_core STATIC
    renderer_vulkan/vk_renderpass_cache.h
    renderer_vulkan/vk_shader_gen.cpp
    renderer_vulkan/vk_shader_gen.h
-    renderer_vulkan/vk_shader_gen_spv.cpp
-    renderer_vulkan/vk_shader_gen_spv.h
    renderer_vulkan/vk_shader_util.cpp
    renderer_vulkan/vk_shader_util.h
    renderer_vulkan/vk_stream_buffer.cpp
@ -206,8 +202,7 @@ if (NOT MSVC)
 endif()

 target_link_libraries(video_core PUBLIC common core)
-target_link_libraries(video_core PRIVATE nihstro-headers Boost::serialization glm::glm)
-target_link_libraries(video_core PRIVATE vulkan-headers vma sirit SPIRV glslang glad)
+target_link_libraries(video_core PRIVATE glad vma vulkan-headers glm::glm SPIRV glslang nihstro-headers Boost::serialization)
 set_target_properties(video_core PROPERTIES INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})

 if (ARCHITECTURE_x86_64)
--- a/src/video_core/host_shaders/vulkan_present.frag
+++ b/src/video_core/host_shaders/vulkan_present.frag
--- a/src/video_core/host_shaders/vulkan_present.vert
+++ b/src/video_core/host_shaders/vulkan_present.vert
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@ -5,88 +5,10 @@
 #include <limits>
 #include "core/memory.h"
 #include "video_core/rasterizer_accelerated.h"
-#include "video_core/pica_state.h"
 #include "video_core/video_core.h"

 namespace VideoCore {

-static Common::Vec4f ColorRGBA8(const u32 color) {
-    const auto rgba =
-        Common::Vec4u{color >> 0 & 0xFF, color >> 8 & 0xFF, color >> 16 & 0xFF, color >> 24 & 0xFF};
-    return rgba / 255.0f;
-}
-
-static Common::Vec3f LightColor(const Pica::LightingRegs::LightColor& color) {
-    return Common::Vec3u{color.r, color.g, color.b} / 255.0f;
-}
-
-RasterizerAccelerated::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
-                                                      bool flip_quaternion) {
-    position[0] = v.pos.x.ToFloat32();
-    position[1] = v.pos.y.ToFloat32();
-    position[2] = v.pos.z.ToFloat32();
-    position[3] = v.pos.w.ToFloat32();
-    color[0] = v.color.x.ToFloat32();
-    color[1] = v.color.y.ToFloat32();
-    color[2] = v.color.z.ToFloat32();
-    color[3] = v.color.w.ToFloat32();
-    tex_coord0[0] = v.tc0.x.ToFloat32();
-    tex_coord0[1] = v.tc0.y.ToFloat32();
-    tex_coord1[0] = v.tc1.x.ToFloat32();
-    tex_coord1[1] = v.tc1.y.ToFloat32();
-    tex_coord2[0] = v.tc2.x.ToFloat32();
-    tex_coord2[1] = v.tc2.y.ToFloat32();
-    tex_coord0_w = v.tc0_w.ToFloat32();
-    normquat[0] = v.quat.x.ToFloat32();
-    normquat[1] = v.quat.y.ToFloat32();
-    normquat[2] = v.quat.z.ToFloat32();
-    normquat[3] = v.quat.w.ToFloat32();
-    view[0] = v.view.x.ToFloat32();
-    view[1] = v.view.y.ToFloat32();
-    view[2] = v.view.z.ToFloat32();
-
-    if (flip_quaternion) {
-        normquat = -normquat;
-    }
-}
-
-RasterizerAccelerated::RasterizerAccelerated() {
-    uniform_block_data.lighting_lut_dirty.fill(true);
-}
-
-/**
- * This is a helper function to resolve an issue when interpolating opposite quaternions. See below
- * for a detailed description of this issue (yuriks):
- *
- * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
- * interpolate two quaternions that are opposite, instead of going from one rotation to another
- * using the shortest path, you'll go around the longest path. You can test if two quaternions are
- * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
- * making Dot(Q1, -Q2) positive.
- *
- * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
- * correct for most cases but can still rotate around the long way sometimes. An implementation
- * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
- * between each step would work for those cases at the cost of being more complex to implement.
- *
- * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
- * these issues, making this basic implementation actually more accurate to the hardware.
- */
-static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
-    Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
-    Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
-
-    return (Common::Dot(a, b) < 0.f);
-}
-
-void RasterizerAccelerated::AddTriangle(const Pica::Shader::OutputVertex& v0,
-                                        const Pica::Shader::OutputVertex& v1,
-                                        const Pica::Shader::OutputVertex& v2) {
-    vertex_batch.emplace_back(v0, false);
-    vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
-    vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
-}
-
 void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
    const u32 page_start = addr >> Memory::CITRA_PAGE_BITS;
    const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1;
@ -179,233 +101,4 @@ void RasterizerAccelerated::ClearAll(bool flush) {
    cached_pages = {};
 }

-RasterizerAccelerated::VertexArrayInfo RasterizerAccelerated::AnalyzeVertexArray(bool is_indexed) {
-    const auto& regs = Pica::g_state.regs;
-    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
-
-    u32 vertex_min;
-    u32 vertex_max;
-    if (is_indexed) {
-        const auto& index_info = regs.pipeline.index_array;
-        const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
-        const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
-        const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
-        const bool index_u16 = index_info.format != 0;
-
-        vertex_min = 0xFFFF;
-        vertex_max = 0;
-        const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
-        FlushRegion(address, size);
-        for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
-            const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
-            vertex_min = std::min(vertex_min, vertex);
-            vertex_max = std::max(vertex_max, vertex);
-        }
-    } else {
-        vertex_min = regs.pipeline.vertex_offset;
-        vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
-    }
-
-    const u32 vertex_num = vertex_max - vertex_min + 1;
-    u32 vs_input_size = 0;
-    for (const auto& loader : vertex_attributes.attribute_loaders) {
-        if (loader.component_count != 0) {
-            vs_input_size += loader.byte_count * vertex_num;
-        }
-    }
-
-    return {vertex_min, vertex_max, vs_input_size};
-}
-
-void RasterizerAccelerated::SyncDepthScale() {
-    float depth_scale =
-        Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
-
-    if (depth_scale != uniform_block_data.data.depth_scale) {
-        uniform_block_data.data.depth_scale = depth_scale;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncDepthOffset() {
-    float depth_offset =
-        Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
-
-    if (depth_offset != uniform_block_data.data.depth_offset) {
-        uniform_block_data.data.depth_offset = depth_offset;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncFogColor() {
-    const auto& regs = Pica::g_state.regs;
-    uniform_block_data.data.fog_color = {
-        regs.texturing.fog_color.r.Value() / 255.0f,
-        regs.texturing.fog_color.g.Value() / 255.0f,
-        regs.texturing.fog_color.b.Value() / 255.0f,
-    };
-    uniform_block_data.dirty = true;
-}
-
-void RasterizerAccelerated::SyncProcTexNoise() {
-    const auto& regs = Pica::g_state.regs.texturing;
-    uniform_block_data.data.proctex_noise_f = {
-        Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
-        Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
-    };
-    uniform_block_data.data.proctex_noise_a = {
-        regs.proctex_noise_u.amplitude / 4095.0f,
-        regs.proctex_noise_v.amplitude / 4095.0f,
-    };
-    uniform_block_data.data.proctex_noise_p = {
-        Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
-        Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
-    };
-
-    uniform_block_data.dirty = true;
-}
-
-void RasterizerAccelerated::SyncProcTexBias() {
-    const auto& regs = Pica::g_state.regs.texturing;
-    uniform_block_data.data.proctex_bias =
-        Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
-            .ToFloat32();
-
-    uniform_block_data.dirty = true;
-}
-
-void RasterizerAccelerated::SyncAlphaTest() {
-    const auto& regs = Pica::g_state.regs;
-    if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
-        uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncCombinerColor() {
-    auto combiner_color =
-        ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
-    if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
-        uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncTevConstColor(std::size_t stage_index,
-                                         const Pica::TexturingRegs::TevStageConfig& tev_stage) {
-    const auto const_color = ColorRGBA8(tev_stage.const_color);
-
-    if (const_color == uniform_block_data.data.const_color[stage_index]) {
-        return;
-    }
-
-    uniform_block_data.data.const_color[stage_index] = const_color;
-    uniform_block_data.dirty = true;
-}
-
-void RasterizerAccelerated::SyncGlobalAmbient() {
-    auto color = LightColor(Pica::g_state.regs.lighting.global_ambient);
-    if (color != uniform_block_data.data.lighting_global_ambient) {
-        uniform_block_data.data.lighting_global_ambient = color;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightSpecular0(int light_index) {
-    auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
-    if (color != uniform_block_data.data.light_src[light_index].specular_0) {
-        uniform_block_data.data.light_src[light_index].specular_0 = color;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightSpecular1(int light_index) {
-    auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
-    if (color != uniform_block_data.data.light_src[light_index].specular_1) {
-        uniform_block_data.data.light_src[light_index].specular_1 = color;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightDiffuse(int light_index) {
-    auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
-    if (color != uniform_block_data.data.light_src[light_index].diffuse) {
-        uniform_block_data.data.light_src[light_index].diffuse = color;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightAmbient(int light_index) {
-    auto color = LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
-    if (color != uniform_block_data.data.light_src[light_index].ambient) {
-        uniform_block_data.data.light_src[light_index].ambient = color;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightPosition(int light_index) {
-    const Common::Vec3f position = {
-        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
-        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
-        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()};
-
-    if (position != uniform_block_data.data.light_src[light_index].position) {
-        uniform_block_data.data.light_src[light_index].position = position;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightSpotDirection(int light_index) {
-    const auto& light = Pica::g_state.regs.lighting.light[light_index];
-    const auto spot_direction = Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f};
-
-    if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
-        uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightDistanceAttenuationBias(int light_index) {
-    float dist_atten_bias =
-        Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
-            .ToFloat32();
-
-    if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
-        uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncLightDistanceAttenuationScale(int light_index) {
-    float dist_atten_scale =
-        Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
-            .ToFloat32();
-
-    if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
-        uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncShadowBias() {
-    const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
-    float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
-    float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
-
-    if (constant != uniform_block_data.data.shadow_bias_constant ||
-        linear != uniform_block_data.data.shadow_bias_linear) {
-        uniform_block_data.data.shadow_bias_constant = constant;
-        uniform_block_data.data.shadow_bias_linear = linear;
-        uniform_block_data.dirty = true;
-    }
-}
-
-void RasterizerAccelerated::SyncShadowTextureBias() {
-    int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
-    if (bias != uniform_block_data.data.shadow_texture_bias) {
-        uniform_block_data.data.shadow_texture_bias = bias;
-        uniform_block_data.dirty = true;
-    }
-}
-
 } // namespace VideoCore
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@ -3,137 +3,19 @@
 // Refer to the license.txt file included.

 #pragma once
-
-#include "common/vector_math.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/regs_texturing.h"
-#include "video_core/shader/shader_uniforms.h"

 namespace VideoCore {

 class RasterizerAccelerated : public RasterizerInterface {
 public:
-    RasterizerAccelerated();
    virtual ~RasterizerAccelerated() = default;

-    void AddTriangle(const Pica::Shader::OutputVertex& v0,
-                     const Pica::Shader::OutputVertex& v1,
-                     const Pica::Shader::OutputVertex& v2) override;
-
    void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
+
    void ClearAll(bool flush) override;

-protected:
-    /// Syncs the depth scale to match the PICA register
-    void SyncDepthScale();
-
-    /// Syncs the depth offset to match the PICA register
-    void SyncDepthOffset();
-
-    /// Syncs the fog states to match the PICA register
-    void SyncFogColor();
-
-    /// Sync the procedural texture noise configuration to match the PICA register
-    void SyncProcTexNoise();
-
-    /// Sync the procedural texture bias configuration to match the PICA register
-    void SyncProcTexBias();
-
-    /// Syncs the alpha test states to match the PICA register
-    void SyncAlphaTest();
-
-    /// Syncs the TEV combiner color buffer to match the PICA register
-    void SyncCombinerColor();
-
-    /// Syncs the TEV constant color to match the PICA register
-    void SyncTevConstColor(std::size_t tev_index,
-                           const Pica::TexturingRegs::TevStageConfig& tev_stage);
-
-    /// Syncs the lighting global ambient color to match the PICA register
-    void SyncGlobalAmbient();
-
-    /// Syncs the specified light's specular 0 color to match the PICA register
-    void SyncLightSpecular0(int light_index);
-
-    /// Syncs the specified light's specular 1 color to match the PICA register
-    void SyncLightSpecular1(int light_index);
-
-    /// Syncs the specified light's diffuse color to match the PICA register
-    void SyncLightDiffuse(int light_index);
-
-    /// Syncs the specified light's ambient color to match the PICA register
-    void SyncLightAmbient(int light_index);
-
-    /// Syncs the specified light's position to match the PICA register
-    void SyncLightPosition(int light_index);
-
-    /// Syncs the specified spot light direcition to match the PICA register
-    void SyncLightSpotDirection(int light_index);
-
-    /// Syncs the specified light's distance attenuation bias to match the PICA register
-    void SyncLightDistanceAttenuationBias(int light_index);
-
-    /// Syncs the specified light's distance attenuation scale to match the PICA register
-    void SyncLightDistanceAttenuationScale(int light_index);
-
-    /// Syncs the shadow rendering bias to match the PICA register
-    void SyncShadowBias();
-
-    /// Syncs the shadow texture bias to match the PICA register
-    void SyncShadowTextureBias();
-
-protected:
-    /// Structure that keeps tracks of the uniform state
-    struct UniformBlockData {
-        Pica::Shader::UniformData data{};
-        std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
-        bool lighting_lut_dirty_any = true;
-        bool fog_lut_dirty = true;
-        bool proctex_noise_lut_dirty = true;
-        bool proctex_color_map_dirty = true;
-        bool proctex_alpha_map_dirty = true;
-        bool proctex_lut_dirty = true;
-        bool proctex_diff_lut_dirty = true;
-        bool dirty = true;
-    };
-
-    /// Structure that the hardware rendered vertices are composed of
-    struct HardwareVertex {
-        HardwareVertex() = default;
-        HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
-
-        Common::Vec4f position;
-        Common::Vec4f color;
-        Common::Vec2f tex_coord0;
-        Common::Vec2f tex_coord1;
-        Common::Vec2f tex_coord2;
-        float tex_coord0_w;
-        Common::Vec4f normquat;
-        Common::Vec3f view;
-    };
-
-    struct VertexArrayInfo {
-        u32 vs_input_index_min;
-        u32 vs_input_index_max;
-        u32 vs_input_size;
-    };
-
-    /// Retrieve the range and the size of the input vertex
-    VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
-
-protected:
+private:
    std::array<u16, 0x30000> cached_pages{};
-    std::vector<HardwareVertex> vertex_batch;
-    bool shader_dirty = true;
-
-    UniformBlockData uniform_block_data{};
-    std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
-        lighting_lut_data{};
-    std::array<Common::Vec2f, 128> fog_lut_data{};
-    std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
-    std::array<Common::Vec2f, 128> proctex_color_map_data{};
-    std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
-    std::array<Common::Vec4f, 256> proctex_lut_data{};
-    std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
 };
 } // namespace VideoCore
--- a/src/video_core/rasterizer_cache/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache/rasterizer_cache.h
@ -908,7 +908,7 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i

    MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);

-    const auto staging = runtime.FindStaging(
+    const auto& staging = runtime.FindStaging(
        load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true);
    MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(load_info.addr);
    if (!source_ptr) [[unlikely]] {
@ -939,7 +939,7 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
    const u32 flush_end = boost::icl::last_next(interval);
    ASSERT(flush_start >= surface->addr && flush_end <= surface->end);

-    const auto staging = runtime.FindStaging(
+    const auto& staging = runtime.FindStaging(
        flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false);
    const BufferTextureCopy download = {.buffer_offset = 0,
                                        .buffer_size = staging.size,
--- a/src/video_core/rasterizer_cache/surface_base.h
+++ b/src/video_core/rasterizer_cache/surface_base.h
@ -91,7 +91,8 @@ public:
    u32 fill_size = 0;

 public:
-    std::vector<std::weak_ptr<Watcher>> watchers;
+    u32 watcher_count = 0;
+    std::array<std::weak_ptr<Watcher>, 8> watchers;
 };

 template <class S>
@ -189,7 +190,7 @@ template <class S>
 auto SurfaceBase<S>::CreateWatcher() -> std::shared_ptr<Watcher> {
    auto weak_ptr = reinterpret_cast<S*>(this)->weak_from_this();
    auto watcher = std::make_shared<Watcher>(std::move(weak_ptr));
-    watchers.push_back(watcher);
+    watchers[watcher_count++] = watcher;
    return watcher;
 }

@ -211,7 +212,8 @@ void SurfaceBase<S>::UnlinkAllWatcher() {
        }
    }

-    watchers.clear();
+    watchers = {};
+    watcher_count = 0;
 }

 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/frame_dumper_opengl.cpp
+++ b/src/video_core/renderer_opengl/frame_dumper_opengl.cpp
@ -13,11 +13,6 @@ FrameDumperOpenGL::FrameDumperOpenGL(VideoDumper::Backend& video_dumper_,
                                     Frontend::EmuWindow& emu_window)
    : video_dumper(video_dumper_), context(emu_window.CreateSharedContext()) {}

-FrameDumperOpenGL::~FrameDumperOpenGL() {
-    if (present_thread.joinable())
-        present_thread.join();
-}
-
 bool FrameDumperOpenGL::IsDumping() const {
    return video_dumper.IsDumping();
 }
@ -27,22 +22,19 @@ Layout::FramebufferLayout FrameDumperOpenGL::GetLayout() const {
 }

 void FrameDumperOpenGL::StartDumping() {
-    if (present_thread.joinable())
-        present_thread.join();
-
-    present_thread = std::thread(&FrameDumperOpenGL::PresentLoop, this);
+    present_thread = std::jthread([&](std::stop_token stop_token) { PresentLoop(stop_token); });
 }

 void FrameDumperOpenGL::StopDumping() {
-    stop_requested.store(true, std::memory_order_relaxed);
+    present_thread.request_stop();
 }

-void FrameDumperOpenGL::PresentLoop() {
+void FrameDumperOpenGL::PresentLoop(std::stop_token stop_token) {
    const auto scope = context->Acquire();
    InitializeOpenGLObjects();

    const auto& layout = GetLayout();
-    while (!stop_requested.exchange(false)) {
+    while (!stop_token.stop_requested()) {
        auto frame = mailbox->TryGetPresentFrame(200);
        if (!frame) {
            continue;
--- a/src/video_core/renderer_opengl/frame_dumper_opengl.h
+++ b/src/video_core/renderer_opengl/frame_dumper_opengl.h
@ -29,7 +29,7 @@ class RendererOpenGL;
 class FrameDumperOpenGL {
 public:
    explicit FrameDumperOpenGL(VideoDumper::Backend& video_dumper, Frontend::EmuWindow& emu_window);
-    ~FrameDumperOpenGL();
+    ~FrameDumperOpenGL() = default;

    bool IsDumping() const;
    Layout::FramebufferLayout GetLayout() const;
@ -41,12 +41,11 @@ public:
 private:
    void InitializeOpenGLObjects();
    void CleanupOpenGLObjects();
-    void PresentLoop();
+    void PresentLoop(std::stop_token stop_token);

    VideoDumper::Backend& video_dumper;
    std::unique_ptr<Frontend::GraphicsContext> context;
-    std::thread present_thread;
-    std::atomic_bool stop_requested{false};
+    std::jthread present_thread;

    // PBOs used to dump frames faster
    std::array<OGLBuffer, 2> pbos;
--- a/src/video_core/renderer_opengl/gl_driver.h
+++ b/src/video_core/renderer_opengl/gl_driver.h
@ -10,7 +10,7 @@ namespace OpenGL {
 enum class Vendor { Unknown = 0, AMD = 1, Nvidia = 2, Intel = 3, Generic = 4 };

 enum class DriverBug {
-    // AMD drivers sometimes freezes when one shader stage is changed but not the others.
+    // AMD drivers sometimes freeze when one shader stage is changed but not the others.
    ShaderStageChangeFreeze = 1 << 0,
    // On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
    // read position is near the end and is an out-of-bound access to the vertex buffer. This is
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -20,26 +20,39 @@

 namespace OpenGL {

-constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
-constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
-constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
-constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
+MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
+MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
+MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
+
+static bool IsVendorAmd() {
+    const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc.";
+}
+#ifdef __APPLE__
+static bool IsVendorIntel() {
+    std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    return gpu_vendor == "Intel Inc.";
+}
+#endif

 RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver)
-    : driver{driver}, runtime{driver}, res_cache{*this, runtime},
-      shader_program_manager{emu_window, driver, !driver.IsOpenGLES()},
-      vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE},
-      uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE},
-      index_buffer{GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE},
-      texture_buffer{GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE},
-      texture_lf_buffer{GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE} {
+    : driver{driver}, runtime{driver}, res_cache{*this, runtime}, is_amd(IsVendorAmd()),
+      vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd),
+      uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false),
+      index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false),
+      texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false),
+      texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) {

    // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
    state.clip_distance[0] = true;

    // Create a 1x1 clear texture to use in the NULL case,
    // instead of OpenGL's default of solid black
-    default_texture.Create();
+    glGenTextures(1, &default_texture);
+    glBindTexture(GL_TEXTURE_2D, default_texture);
    // For some reason alpha 0 wraps around to 1.0, so use 1/255 instead
    u8 framebuffer_data[4] = {0, 0, 0, 1};
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
@ -58,6 +71,19 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
    sw_vao.Create();
    hw_vao.Create();

+    uniform_block_data.dirty = true;
+
+    uniform_block_data.lighting_lut_dirty.fill(true);
+    uniform_block_data.lighting_lut_dirty_any = true;
+
+    uniform_block_data.fog_lut_dirty = true;
+
+    uniform_block_data.proctex_noise_lut_dirty = true;
+    uniform_block_data.proctex_color_map_dirty = true;
+    uniform_block_data.proctex_alpha_map_dirty = true;
+    uniform_block_data.proctex_lut_dirty = true;
+    uniform_block_data.proctex_diff_lut_dirty = true;
+
    glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
    uniform_size_aligned_vs =
        Common::AlignUp<std::size_t>(sizeof(VSUniformData), uniform_buffer_alignment);
@ -122,6 +148,17 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
    state.Apply();
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());

+#ifdef __APPLE__
+    if (IsVendorIntel()) {
+        shader_program_manager = std::make_unique<ShaderProgramManager>(
+            emu_window, VideoCore::g_separable_shader_enabled, is_amd);
+    } else {
+        shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
+    }
+#else
+    shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
+#endif
+
    glEnable(GL_BLEND);

    // Explicitly call the derived version to avoid warnings about calling virtual
@ -133,7 +170,7 @@ RasterizerOpenGL::~RasterizerOpenGL() = default;

 void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
                                         const VideoCore::DiskResourceLoadCallback& callback) {
-    shader_program_manager.LoadDiskCache(stop_loading, callback);
+    shader_program_manager->LoadDiskCache(stop_loading, callback);
 }

 void RasterizerOpenGL::SyncEntireState() {
@ -178,6 +215,39 @@ void RasterizerOpenGL::SyncEntireState() {
    SyncShadowTextureBias();
 }

+/**
+ * This is a helper function to resolve an issue when interpolating opposite quaternions. See below
+ * for a detailed description of this issue (yuriks):
+ *
+ * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
+ * interpolate two quaternions that are opposite, instead of going from one rotation to another
+ * using the shortest path, you'll go around the longest path. You can test if two quaternions are
+ * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
+ * making Dot(Q1, -Q2) positive.
+ *
+ * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
+ * correct for most cases but can still rotate around the long way sometimes. An implementation
+ * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
+ * between each step would work for those cases at the cost of being more complex to implement.
+ *
+ * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
+ * these issues, making this basic implementation actually more accurate to the hardware.
+ */
+static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
+    Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
+    Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
+
+    return (Common::Dot(a, b) < 0.f);
+}
+
+void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
+                                   const Pica::Shader::OutputVertex& v1,
+                                   const Pica::Shader::OutputVertex& v2) {
+    vertex_batch.emplace_back(v0, false);
+    vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
+    vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
+}
+
 static constexpr std::array<GLenum, 4> vs_attrib_types{
    GL_BYTE,          // VertexAttributeFormat::BYTE
    GL_UNSIGNED_BYTE, // VertexAttributeFormat::UBYTE
@ -185,7 +255,50 @@ static constexpr std::array<GLenum, 4> vs_attrib_types{
    GL_FLOAT          // VertexAttributeFormat::FLOAT
 };

-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
+struct VertexArrayInfo {
+    u32 vs_input_index_min;
+    u32 vs_input_index_max;
+    u32 vs_input_size;
+};
+
+RasterizerOpenGL::VertexArrayInfo RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
+    const auto& regs = Pica::g_state.regs;
+    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
+
+    u32 vertex_min;
+    u32 vertex_max;
+    if (is_indexed) {
+        const auto& index_info = regs.pipeline.index_array;
+        const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
+        const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
+        const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
+        const bool index_u16 = index_info.format != 0;
+
+        vertex_min = 0xFFFF;
+        vertex_max = 0;
+        const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
+        res_cache.FlushRegion(address, size, nullptr);
+        for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
+            const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
+            vertex_min = std::min(vertex_min, vertex);
+            vertex_max = std::max(vertex_max, vertex);
+        }
+    } else {
+        vertex_min = regs.pipeline.vertex_offset;
+        vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
+    }
+
+    const u32 vertex_num = vertex_max - vertex_min + 1;
+    u32 vs_input_size = 0;
+    for (const auto& loader : vertex_attributes.attribute_loaders) {
+        if (loader.component_count != 0) {
+            vs_input_size += loader.byte_count * vertex_num;
+        }
+    }
+
+    return {vertex_min, vertex_max, vs_input_size};
+}
+
 void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
                                        GLuint vs_input_index_min, GLuint vs_input_index_max) {
    MICROPROFILE_SCOPE(OpenGL_VAO);
@ -265,14 +378,12 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
    }
 }

-MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
 bool RasterizerOpenGL::SetupVertexShader() {
    MICROPROFILE_SCOPE(OpenGL_VS);
-    return shader_program_manager.UseProgrammableVertexShader(Pica::g_state.regs,
+    return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs,
                                                               Pica::g_state.vs);
 }

-MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
 bool RasterizerOpenGL::SetupGeometryShader() {
    MICROPROFILE_SCOPE(OpenGL_GS);
    const auto& regs = Pica::g_state.regs;
@ -282,7 +393,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
        return false;
    }

-    shader_program_manager.UseFixedGeometryShader(regs);
+    shader_program_manager->UseFixedGeometryShader(regs);
    return true;
 }

@ -343,7 +454,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) {
    SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max);
    vertex_buffer.Unmap(vs_input_size);

-    shader_program_manager.ApplyTo(state);
+    shader_program_manager->ApplyTo(state);
    state.Apply();

    if (is_indexed) {
@ -378,7 +489,6 @@ void RasterizerOpenGL::DrawTriangles() {
    Draw(false, false);
 }

-MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
 bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
    MICROPROFILE_SCOPE(OpenGL_Drawing);
    const auto& regs = Pica::g_state.regs;
@ -606,7 +716,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
                // the geometry in question.
                // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
                // on the male character's face, which in the OpenGL default appear black.
-                state.texture_units[texture_index].texture_2d = default_texture.handle;
+                state.texture_units[texture_index].texture_2d = default_texture;
            }
        } else {
            state.texture_units[texture_index].texture_2d = 0;
@ -670,9 +780,9 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
    } else {
        state.draw.vertex_array = sw_vao.handle;
        state.draw.vertex_buffer = vertex_buffer.GetHandle();
-        shader_program_manager.UseTrivialVertexShader();
-        shader_program_manager.UseTrivialGeometryShader();
-        shader_program_manager.ApplyTo(state);
+        shader_program_manager->UseTrivialVertexShader();
+        shader_program_manager->UseTrivialGeometryShader();
+        shader_program_manager->ApplyTo(state);
        state.Apply();

        std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
@ -767,7 +877,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {

    // Blending
    case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
-        if (driver.IsOpenGLES()) {
+        if (GLES) {
            // With GLES, we need this in the fragment shader to emulate logic operations
            shader_dirty = true;
        }
@ -891,7 +1001,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {

    // Logic op
    case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
-        if (driver.IsOpenGLES()) {
+        if (GLES) {
            // With GLES, we need this in the fragment shader to emulate logic operations
            shader_dirty = true;
        }
@ -1229,7 +1339,6 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
    }
 }

-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
 void RasterizerOpenGL::FlushAll() {
    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
    res_cache.FlushAll();
@ -1251,7 +1360,6 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) {
    res_cache.InvalidateRegion(addr, size, nullptr);
 }

-MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
 bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
    MICROPROFILE_SCOPE(OpenGL_Blits);

@ -1502,7 +1610,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
 }

 void RasterizerOpenGL::SetShader() {
-    shader_program_manager.UseFragmentShader(Pica::g_state.regs);
+    shader_program_manager->UseFragmentShader(Pica::g_state.regs);
 }

 void RasterizerOpenGL::SyncClipEnabled() {
@ -1545,6 +1653,24 @@ void RasterizerOpenGL::SyncCullMode() {
    }
 }

+void RasterizerOpenGL::SyncDepthScale() {
+    float depth_scale =
+        Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
+    if (depth_scale != uniform_block_data.data.depth_scale) {
+        uniform_block_data.data.depth_scale = depth_scale;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncDepthOffset() {
+    float depth_offset =
+        Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
+    if (depth_offset != uniform_block_data.data.depth_offset) {
+        uniform_block_data.data.depth_offset = depth_offset;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncBlendEnabled() {
    state.blend.enabled = (Pica::g_state.regs.framebuffer.output_merger.alphablend_enable == 1);
 }
@ -1574,11 +1700,56 @@ void RasterizerOpenGL::SyncBlendColor() {
    state.blend.color.alpha = blend_color[3];
 }

+void RasterizerOpenGL::SyncFogColor() {
+    const auto& regs = Pica::g_state.regs;
+    uniform_block_data.data.fog_color = {
+        regs.texturing.fog_color.r.Value() / 255.0f,
+        regs.texturing.fog_color.g.Value() / 255.0f,
+        regs.texturing.fog_color.b.Value() / 255.0f,
+    };
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerOpenGL::SyncProcTexNoise() {
+    const auto& regs = Pica::g_state.regs.texturing;
+    uniform_block_data.data.proctex_noise_f = {
+        Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
+        Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
+    };
+    uniform_block_data.data.proctex_noise_a = {
+        regs.proctex_noise_u.amplitude / 4095.0f,
+        regs.proctex_noise_v.amplitude / 4095.0f,
+    };
+    uniform_block_data.data.proctex_noise_p = {
+        Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
+        Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
+    };
+
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerOpenGL::SyncProcTexBias() {
+    const auto& regs = Pica::g_state.regs.texturing;
+    uniform_block_data.data.proctex_bias =
+        Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
+            .ToFloat32();
+
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerOpenGL::SyncAlphaTest() {
+    const auto& regs = Pica::g_state.regs;
+    if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
+        uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncLogicOp() {
    const auto& regs = Pica::g_state.regs;
    state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);

-    if (driver.IsOpenGLES()) {
+    if (GLES) {
        if (!regs.framebuffer.output_merger.alphablend_enable) {
            if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
                // Color output is disabled by logic operation. We use color write mask to skip
@ -1591,7 +1762,7 @@ void RasterizerOpenGL::SyncLogicOp() {

 void RasterizerOpenGL::SyncColorWriteMask() {
    const auto& regs = Pica::g_state.regs;
-    if (driver.IsOpenGLES()) {
+    if (GLES) {
        if (!regs.framebuffer.output_merger.alphablend_enable) {
            if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
                // Color output is disabled by logic operation. We use color write mask to skip
@ -1657,6 +1828,131 @@ void RasterizerOpenGL::SyncDepthTest() {
            : GL_ALWAYS;
 }

+void RasterizerOpenGL::SyncCombinerColor() {
+    auto combiner_color =
+        PicaToGL::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
+    if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
+        uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncTevConstColor(std::size_t stage_index,
+                                         const Pica::TexturingRegs::TevStageConfig& tev_stage) {
+    const auto const_color = PicaToGL::ColorRGBA8(tev_stage.const_color);
+
+    if (const_color == uniform_block_data.data.const_color[stage_index]) {
+        return;
+    }
+
+    uniform_block_data.data.const_color[stage_index] = const_color;
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerOpenGL::SyncGlobalAmbient() {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient);
+    if (color != uniform_block_data.data.lighting_global_ambient) {
+        uniform_block_data.data.lighting_global_ambient = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
+    if (color != uniform_block_data.data.light_src[light_index].specular_0) {
+        uniform_block_data.data.light_src[light_index].specular_0 = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightSpecular1(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
+    if (color != uniform_block_data.data.light_src[light_index].specular_1) {
+        uniform_block_data.data.light_src[light_index].specular_1 = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
+    if (color != uniform_block_data.data.light_src[light_index].diffuse) {
+        uniform_block_data.data.light_src[light_index].diffuse = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightAmbient(int light_index) {
+    auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
+    if (color != uniform_block_data.data.light_src[light_index].ambient) {
+        uniform_block_data.data.light_src[light_index].ambient = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightPosition(int light_index) {
+    const auto& light = Pica::g_state.regs.lighting.light[light_index];
+    const Common::Vec3f position = {Pica::float16::FromRaw(light.x).ToFloat32(),
+                                    Pica::float16::FromRaw(light.y).ToFloat32(),
+                                    Pica::float16::FromRaw(light.z).ToFloat32()};
+
+    if (position != uniform_block_data.data.light_src[light_index].position) {
+        uniform_block_data.data.light_src[light_index].position = position;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightSpotDirection(int light_index) {
+    const auto& light = Pica::g_state.regs.lighting.light[light_index];
+    const auto spot_direction =
+        Common::Vec3f{light.spot_x / 2047.0f, light.spot_y / 2047.0f, light.spot_z / 2047.0f};
+
+    if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
+        uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
+    const auto& light = Pica::g_state.regs.lighting.light[light_index];
+    float dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
+
+    if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
+        uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
+    const auto& light = Pica::g_state.regs.lighting.light[light_index];
+    float dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
+
+    if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
+        uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncShadowBias() {
+    const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
+    GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
+    GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
+
+    if (constant != uniform_block_data.data.shadow_bias_constant ||
+        linear != uniform_block_data.data.shadow_bias_linear) {
+        uniform_block_data.data.shadow_bias_constant = constant;
+        uniform_block_data.data.shadow_bias_linear = linear;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerOpenGL::SyncShadowTextureBias() {
+    GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1;
+    if (bias != uniform_block_data.data.shadow_texture_bias) {
+        uniform_block_data.data.shadow_texture_bias = bias;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncAndUploadLUTsLF() {
    constexpr std::size_t max_size =
        sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@ -3,13 +3,17 @@
 // Refer to the license.txt file included.

 #pragma once
-
+#include "common/vector_math.h"
 #include "core/hw/gpu.h"
+#include "video_core/pica_types.h"
 #include "video_core/rasterizer_accelerated.h"
+#include "video_core/regs_lighting.h"
+#include "video_core/regs_texturing.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
 #include "video_core/renderer_opengl/gl_texture_runtime.h"
+#include "video_core/shader/shader.h"

 namespace Frontend {
 class EmuWindow;
@ -30,6 +34,8 @@ public:
    void LoadDiskResources(const std::atomic_bool& stop_loading,
                           const VideoCore::DiskResourceLoadCallback& callback) override;

+    void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
+                     const Pica::Shader::OutputVertex& v2) override;
    void DrawTriangles() override;
    void NotifyPicaRegisterChanged(u32 id) override;
    void FlushAll() override;
@ -73,6 +79,48 @@ private:
        bool supress_mipmap_for_cube = false;
    };

+    /// Structure that the hardware rendered vertices are composed of
+    struct HardwareVertex {
+        HardwareVertex() = default;
+        HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
+            position[0] = v.pos.x.ToFloat32();
+            position[1] = v.pos.y.ToFloat32();
+            position[2] = v.pos.z.ToFloat32();
+            position[3] = v.pos.w.ToFloat32();
+            color[0] = v.color.x.ToFloat32();
+            color[1] = v.color.y.ToFloat32();
+            color[2] = v.color.z.ToFloat32();
+            color[3] = v.color.w.ToFloat32();
+            tex_coord0[0] = v.tc0.x.ToFloat32();
+            tex_coord0[1] = v.tc0.y.ToFloat32();
+            tex_coord1[0] = v.tc1.x.ToFloat32();
+            tex_coord1[1] = v.tc1.y.ToFloat32();
+            tex_coord2[0] = v.tc2.x.ToFloat32();
+            tex_coord2[1] = v.tc2.y.ToFloat32();
+            tex_coord0_w = v.tc0_w.ToFloat32();
+            normquat[0] = v.quat.x.ToFloat32();
+            normquat[1] = v.quat.y.ToFloat32();
+            normquat[2] = v.quat.z.ToFloat32();
+            normquat[3] = v.quat.w.ToFloat32();
+            view[0] = v.view.x.ToFloat32();
+            view[1] = v.view.y.ToFloat32();
+            view[2] = v.view.z.ToFloat32();
+
+            if (flip_quaternion) {
+                normquat = -normquat;
+            }
+        }
+
+        Common::Vec4f position;
+        Common::Vec4f color;
+        Common::Vec2f tex_coord0;
+        Common::Vec2f tex_coord1;
+        Common::Vec2f tex_coord2;
+        float tex_coord0_w;
+        Common::Vec4f normquat;
+        Common::Vec3f view;
+    };
+
    /// Syncs the clip enabled status to match the PICA register
    void SyncClipEnabled();

@ -85,6 +133,12 @@ private:
    /// Syncs the cull mode to match the PICA register
    void SyncCullMode();

+    /// Syncs the depth scale to match the PICA register
+    void SyncDepthScale();
+
+    /// Syncs the depth offset to match the PICA register
+    void SyncDepthOffset();
+
    /// Syncs the blend enabled status to match the PICA register
    void SyncBlendEnabled();

@ -94,6 +148,18 @@ private:
    /// Syncs the blend color to match the PICA register
    void SyncBlendColor();

+    /// Syncs the fog states to match the PICA register
+    void SyncFogColor();
+
+    /// Sync the procedural texture noise configuration to match the PICA register
+    void SyncProcTexNoise();
+
+    /// Sync the procedural texture bias configuration to match the PICA register
+    void SyncProcTexBias();
+
+    /// Syncs the alpha test states to match the PICA register
+    void SyncAlphaTest();
+
    /// Syncs the logic op states to match the PICA register
    void SyncLogicOp();

@ -112,6 +178,46 @@ private:
    /// Syncs the depth test states to match the PICA register
    void SyncDepthTest();

+    /// Syncs the TEV combiner color buffer to match the PICA register
+    void SyncCombinerColor();
+
+    /// Syncs the TEV constant color to match the PICA register
+    void SyncTevConstColor(std::size_t tev_index,
+                           const Pica::TexturingRegs::TevStageConfig& tev_stage);
+
+    /// Syncs the lighting global ambient color to match the PICA register
+    void SyncGlobalAmbient();
+
+    /// Syncs the specified light's specular 0 color to match the PICA register
+    void SyncLightSpecular0(int light_index);
+
+    /// Syncs the specified light's specular 1 color to match the PICA register
+    void SyncLightSpecular1(int light_index);
+
+    /// Syncs the specified light's diffuse color to match the PICA register
+    void SyncLightDiffuse(int light_index);
+
+    /// Syncs the specified light's ambient color to match the PICA register
+    void SyncLightAmbient(int light_index);
+
+    /// Syncs the specified light's position to match the PICA register
+    void SyncLightPosition(int light_index);
+
+    /// Syncs the specified spot light direcition to match the PICA register
+    void SyncLightSpotDirection(int light_index);
+
+    /// Syncs the specified light's distance attenuation bias to match the PICA register
+    void SyncLightDistanceAttenuationBias(int light_index);
+
+    /// Syncs the specified light's distance attenuation scale to match the PICA register
+    void SyncLightDistanceAttenuationScale(int light_index);
+
+    /// Syncs the shadow rendering bias to match the PICA register
+    void SyncShadowBias();
+
+    /// Syncs the shadow texture bias to match the PICA register
+    void SyncShadowTextureBias();
+
    /// Syncs and uploads the lighting, fog and proctex LUTs
    void SyncAndUploadLUTs();
    void SyncAndUploadLUTsLF();
@ -125,6 +231,15 @@ private:
    /// Internal implementation for AccelerateDrawBatch
    bool AccelerateDrawBatchInternal(bool is_indexed);

+    struct VertexArrayInfo {
+        u32 vs_input_index_min;
+        u32 vs_input_index_max;
+        u32 vs_input_size;
+    };
+
+    /// Retrieve the range and the size of the input vertex
+    VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
+
    /// Setup vertex array for AccelerateDrawBatch
    void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, GLuint vs_input_index_min,
                          GLuint vs_input_index_max);
@ -138,15 +253,41 @@ private:
 private:
    Driver& driver;
    OpenGLState state;
+    GLuint default_texture;
+
    TextureRuntime runtime;
    RasterizerCache res_cache;
-    ShaderProgramManager shader_program_manager;
+
+    std::vector<HardwareVertex> vertex_batch;
+
+    bool is_amd;
+    bool shader_dirty = true;
+
+    struct {
+        UniformData data;
+        std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty;
+        bool lighting_lut_dirty_any;
+        bool fog_lut_dirty;
+        bool proctex_noise_lut_dirty;
+        bool proctex_color_map_dirty;
+        bool proctex_alpha_map_dirty;
+        bool proctex_lut_dirty;
+        bool proctex_diff_lut_dirty;
+        bool dirty;
+    } uniform_block_data = {};
+
+    std::unique_ptr<ShaderProgramManager> shader_program_manager;
+
+    // They shall be big enough for about one frame.
+    static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
+    static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
+    static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
+    static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;

    OGLVertexArray sw_vao; // VAO for software shader draw
    OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
    std::array<bool, 16> hw_vao_enabled_attributes{};

-    OGLTexture default_texture;
    std::array<SamplerInfo, 3> texture_samplers;
    OGLStreamBuffer vertex_buffer;
    OGLStreamBuffer uniform_buffer;
@ -163,6 +304,15 @@ private:
    OGLTexture texture_buffer_lut_lf;
    OGLTexture texture_buffer_lut_rg;
    OGLTexture texture_buffer_lut_rgba;
+
+    std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
+        lighting_lut_data{};
+    std::array<Common::Vec2f, 128> fog_lut_data{};
+    std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
+    std::array<Common::Vec2f, 128> proctex_color_map_data{};
+    std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
+    std::array<Common::Vec4f, 256> proctex_lut_data{};
+    std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
 };

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@ -11,7 +11,7 @@
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state.h"
-#include "video_core/renderer_opengl/gl_driver.h"
+#include "video_core/renderer_opengl/gl_vars.h"
 #include "video_core/video_core.h"

 namespace OpenGL {
@ -327,13 +327,12 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR

 class ShaderProgramManager::Impl {
 public:
-    explicit Impl(bool separable)
-        : separable(separable), programmable_vertex_shaders(separable),
+    explicit Impl(bool separable, bool is_amd)
+        : is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
          trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
          fragment_shaders(separable), disk_cache(separable) {
-        if (separable) {
+        if (separable)
            pipeline.Create();
-        }
    }

    struct ShaderTuple {
@ -362,19 +361,25 @@ public:
    static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2,
                  "ShaderTuple layout changed!");

+    bool is_amd;
    bool separable;
+
    ShaderTuple current;
+
    ProgrammableVertexShaders programmable_vertex_shaders;
    TrivialVertexShader trivial_vertex_shader;
+
    FixedGeometryShaders fixed_geometry_shaders;
+
    FragmentShaders fragment_shaders;
    std::unordered_map<u64, OGLProgram> program_cache;
    OGLPipeline pipeline;
    ShaderDiskCache disk_cache;
 };

-ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable)
-    : impl(std::make_unique<Impl>(separable)), emu_window{emu_window_}, driver{driver} {}
+ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
+                                           bool is_amd)
+    : impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}

 ShaderProgramManager::~ShaderProgramManager() = default;

@ -436,7 +441,10 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {

 void ShaderProgramManager::ApplyTo(OpenGLState& state) {
    if (impl->separable) {
-        if (driver.HasBug(DriverBug::ShaderStageChangeFreeze)) {
+        if (impl->is_amd) {
+            // Without this reseting, AMD sometimes freezes when one stage is changed but not
+            // for the others. On the other hand, including this reset seems to introduce memory
+            // leak in Intel Graphics.
            glUseProgramStages(
                impl->pipeline.handle,
                GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@ -107,13 +107,12 @@ static_assert(sizeof(VSUniformData) == 1856,
 static_assert(sizeof(VSUniformData) < 16384,
              "VSUniformData structure must be less than 16kb as per the OpenGL spec");

-class Driver;
 class OpenGLState;

 /// A class that manage different shader stages and configures them with given config data.
 class ShaderProgramManager {
 public:
-    ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable);
+    ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
    ~ShaderProgramManager();

    void LoadDiskCache(const std::atomic_bool& stop_loading,
@ -134,7 +133,7 @@ public:
 private:
    class Impl;
    std::unique_ptr<Impl> impl;
+
    Frontend::EmuWindow& emu_window;
-    Driver& driver;
 };
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@ -12,21 +12,32 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",

 namespace OpenGL {

-OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback, bool prefer_coherent)
+OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
+                                 bool prefer_coherent)
    : gl_target(target), buffer_size(size) {
    gl_buffer.Create();
    glBindBuffer(gl_target, gl_buffer.handle);

+    GLsizeiptr allocate_size = size;
+    if (array_buffer_for_amd) {
+        // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
+        // read position is near the end and is an out-of-bound access to the vertex buffer. This is
+        // probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
+        // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
+        // crash.
+        allocate_size *= 2;
+    }
+
    if (GLAD_GL_ARB_buffer_storage) {
        persistent = true;
        coherent = prefer_coherent;
        GLbitfield flags =
-            (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
-        glBufferStorage(gl_target, size, nullptr, flags);
+            GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
+        glBufferStorage(gl_target, allocate_size, nullptr, flags);
        mapped_ptr = static_cast<u8*>(glMapBufferRange(
-            gl_target, 0, buffer_size, flags | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0)));
+            gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
    } else {
-        glBufferData(gl_target, size, nullptr, GL_STREAM_DRAW);
+        glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
    }
 }

@ -67,8 +78,8 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a

    if (invalidate || !persistent) {
        MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
-        GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
-                           (coherent ? GL_MAP_COHERENT_BIT : 0) | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0) |
+        GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
+                           (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
                           (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
        mapped_ptr = static_cast<u8*>(
            glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
@ -81,7 +92,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
 void OGLStreamBuffer::Unmap(GLsizeiptr size) {
    ASSERT(size <= mapped_size);

-    if (!coherent && !readback && size > 0) {
+    if (!coherent && size > 0) {
        glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
    }

--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@ -10,7 +10,8 @@ namespace OpenGL {

 class OGLStreamBuffer : private NonCopyable {
 public:
-    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback = false, bool prefer_coherent = false);
+    explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd,
+                             bool prefer_coherent = false);
    ~OGLStreamBuffer();

    GLuint GetHandle() const;
@ -32,7 +33,6 @@ private:
    OGLBuffer gl_buffer;
    GLenum gl_target;

-    bool readback = false;
    bool coherent = false;
    bool persistent = false;

--- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp
@ -36,7 +36,7 @@ static constexpr std::array COLOR_TUPLES_OES = {
    FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4},   // RGBA4
 };

-[[nodiscard]] GLbitfield MakeBufferMask(VideoCore::SurfaceType type) {
+GLbitfield MakeBufferMask(VideoCore::SurfaceType type) {
    switch (type) {
    case VideoCore::SurfaceType::Color:
    case VideoCore::SurfaceType::Texture:
@ -53,13 +53,9 @@ static constexpr std::array COLOR_TUPLES_OES = {
    return GL_COLOR_BUFFER_BIT;
 }

-constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
-constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
-
 TextureRuntime::TextureRuntime(Driver& driver)
-    : driver{driver}, filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()},
-      downloader_es{false}, upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE},
-      download_buffer{GL_PIXEL_PACK_BUFFER, DOWNLOAD_BUFFER_SIZE, true}  {
+    : driver{driver}, downloader_es{false}, filterer{Settings::values.texture_filter_name,
+                                                     VideoCore::GetResolutionScaleFactor()} {

    read_fbo.Create();
    draw_fbo.Create();
@ -74,14 +70,51 @@ TextureRuntime::TextureRuntime(Driver& driver)
    Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
 }

-StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
-    auto& buffer = upload ? upload_buffer : download_buffer;
-    auto [data, offset, invalidate] = buffer.Map(size, 4);
+const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
+    const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER;
+    const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT;
+    auto& search = upload ? upload_buffers : download_buffers;

-    return StagingData{.buffer = buffer.GetHandle(),
-                       .size = size,
-                       .mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
-                       .buffer_offset = offset};
+    // Attempt to find a free buffer that fits the requested data
+    for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) {
+        if (!upload || it->IsFree()) {
+            it->mapped = std::span{it->mapped.data(), size};
+            return *it;
+        }
+    }
+
+    OGLBuffer buffer{};
+    buffer.Create();
+
+    glBindBuffer(target, buffer.handle);
+
+    // Allocate a new buffer and map the data to the host
+    std::byte* data = nullptr;
+    if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) {
+        const GLbitfield storage =
+            upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT;
+        glBufferStorageEXT(target, size, nullptr,
+                           storage | GL_MAP_PERSISTENT_BIT_EXT | GL_MAP_COHERENT_BIT_EXT);
+        data = reinterpret_cast<std::byte*>(glMapBufferRange(
+            target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | GL_MAP_COHERENT_BIT_EXT));
+    } else if (driver.HasArbBufferStorage()) {
+        const GLbitfield storage =
+            upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT;
+        glBufferStorage(target, size, nullptr,
+                        storage | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
+        data = reinterpret_cast<std::byte*>(glMapBufferRange(
+            target, 0, size, access | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT));
+    } else {
+        UNIMPLEMENTED();
+    }
+
+    glBindBuffer(target, 0);
+
+    StagingBuffer staging = {
+        .buffer = std::move(buffer), .mapped = std::span{data, size}, .size = size};
+
+    const auto& it = search.emplace(std::move(staging));
+    return *it;
 }

 const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) {
@ -121,14 +154,12 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::spa
 OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
                                    VideoCore::TextureType type) {
    const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
-    const u32 levels = std::log2(std::max(width, height)) + 1;
    const GLenum target =
        type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
-
-    // Attempt to recycle an unused texture
    const VideoCore::HostTextureTag key = {
        .format = format, .width = width, .height = height, .layers = layers};

+    // Attempt to recycle an unused texture
    if (auto it = texture_recycler.find(key); it != texture_recycler.end()) {
        OGLTexture texture = std::move(it->second);
        texture_recycler.erase(it);
@ -146,7 +177,8 @@ OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
    glActiveTexture(GL_TEXTURE0);
    glBindTexture(target, texture.handle);

-    glTexStorage2D(target, levels, tuple.internal_format, width, height);
+    glTexStorage2D(target, std::bit_width(std::max(width, height)), tuple.internal_format, width,
+                   height);

    glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
    glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
@ -338,41 +370,42 @@ Surface::~Surface() {
    }
 }

-MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
-void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) {
+MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGLSurface", "Texture Upload", MP_RGB(128, 192, 64));
+void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) {
    MICROPROFILE_SCOPE(OpenGL_Upload);

    // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
    ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);

+    OpenGLState prev_state = OpenGLState::GetCurState();
+    SCOPE_EXIT({ prev_state.Apply(); });
+
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle);
+
    const bool is_scaled = res_scale != 1;
    if (is_scaled) {
        ScaledUpload(upload, staging);
    } else {
-        OpenGLState prev_state = OpenGLState::GetCurState();
-        SCOPE_EXIT({ prev_state.Apply(); });
-
-        glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
-        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer);
-
        glActiveTexture(GL_TEXTURE0);
        glBindTexture(GL_TEXTURE_2D, texture.handle);

        const auto& tuple = runtime.GetFormatTuple(pixel_format);
        glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, upload.texture_rect.left,
                        upload.texture_rect.bottom, upload.texture_rect.GetWidth(),
-                        upload.texture_rect.GetHeight(), tuple.format, tuple.type,
-                        reinterpret_cast<void*>(staging.buffer_offset));
-
-        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-        runtime.upload_buffer.Unmap(staging.size);
+                        upload.texture_rect.GetHeight(), tuple.format, tuple.type, 0);
    }

+    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+
+    // Lock the staging buffer until glTexSubImage completes
+    staging.Lock();
    InvalidateAllWatcher();
 }

-MICROPROFILE_DEFINE(OpenGL_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
-void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
+MICROPROFILE_DEFINE(OpenGL_Download, "OpenGLSurface", "Texture Download", MP_RGB(128, 192, 64));
+void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) {
    MICROPROFILE_SCOPE(OpenGL_Download);

    // Ensure no bad interactions with GL_PACK_ALIGNMENT
@ -382,11 +415,11 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
    SCOPE_EXIT({ prev_state.Apply(); });

    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
-    glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer);
+    glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle);

    const bool is_scaled = res_scale != 1;
    if (is_scaled) {
-        ScaledDownload(download, staging);
+        ScaledDownload(download);
    } else {
        runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type,
                                texture);
@ -394,17 +427,15 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
        const auto& tuple = runtime.GetFormatTuple(pixel_format);
        glReadPixels(download.texture_rect.left, download.texture_rect.bottom,
                     download.texture_rect.GetWidth(), download.texture_rect.GetHeight(),
-                     tuple.format, tuple.type,
-                     reinterpret_cast<void*>(staging.buffer_offset));
-
-        runtime.download_buffer.Unmap(staging.size);
+                     tuple.format, tuple.type, 0);
    }

+    glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
 }

 void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload,
-                           const StagingData& staging) {
+                           const StagingBuffer& staging) {
    const u32 rect_width = upload.texture_rect.GetWidth();
    const u32 rect_height = upload.texture_rect.GetHeight();
    const auto scaled_rect = upload.texture_rect * res_scale;
@ -437,7 +468,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload,
    }
 }

-void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging) {
+void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) {
    const u32 rect_width = download.texture_rect.GetWidth();
    const u32 rect_height = download.texture_rect.GetHeight();
    const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale;
@ -467,14 +498,11 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, const
    const auto& tuple = runtime.GetFormatTuple(pixel_format);
    if (driver.IsOpenGLES()) {
        const auto& downloader_es = runtime.GetDownloaderES();
-        downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect_height, rect_width,
-                                  reinterpret_cast<void*>(staging.buffer_offset));
+        downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect_height,
+                                  rect_width, 0);
    } else {
-        glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
-                      reinterpret_cast<void*>(staging.buffer_offset));
+        glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0);
    }
-
-    runtime.download_buffer.Unmap(staging.size);
 }

 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_texture_runtime.h
+++ b/src/video_core/renderer_opengl/gl_texture_runtime.h
@ -8,7 +8,6 @@
 #include "video_core/rasterizer_cache/rasterizer_cache.h"
 #include "video_core/rasterizer_cache/surface_base.h"
 #include "video_core/renderer_opengl/gl_format_reinterpreter.h"
-#include "video_core/renderer_opengl/gl_stream_buffer.h"
 #include "video_core/renderer_opengl/texture_downloader_es.h"
 #include "video_core/renderer_opengl/texture_filters/texture_filterer.h"

@ -20,11 +19,35 @@ struct FormatTuple {
    GLenum type;
 };

-struct StagingData {
-    GLuint buffer;
-    u32 size = 0;
-    std::span<std::byte> mapped{};
-    GLintptr buffer_offset = 0;
+struct StagingBuffer {
+    OGLBuffer buffer{};
+    mutable OGLSync buffer_lock{};
+    mutable std::span<std::byte> mapped{};
+    u32 size{};
+
+    bool operator<(const StagingBuffer& other) const {
+        return size < other.size;
+    }
+
+    /// Returns true if the buffer does not take part in pending transfer operations
+    bool IsFree() const {
+        if (buffer_lock) {
+            GLint status;
+            glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status);
+            return status == GL_SIGNALED;
+        }
+
+        return true;
+    }
+
+    /// Prevents the runtime from reusing the buffer until the transfer operation is complete
+    void Lock() const {
+        if (buffer_lock) {
+            buffer_lock.Release();
+        }
+
+        buffer_lock.Create();
+    }
 };

 class Driver;
@ -42,7 +65,7 @@ public:
    ~TextureRuntime() = default;

    /// Maps an internal staging buffer of the provided size of pixel uploads/downloads
-    StagingData FindStaging(u32 size, bool upload);
+    const StagingBuffer& FindStaging(u32 size, bool upload);

    /// Returns the OpenGL format tuple associated with the provided pixel format
    const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
@ -99,12 +122,17 @@ private:

 private:
    Driver& driver;
-    TextureFilterer filterer;
    TextureDownloaderES downloader_es;
+    TextureFilterer filterer;
    std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
-    std::unordered_multimap<VideoCore::HostTextureTag, OGLTexture> texture_recycler;
-    OGLStreamBuffer upload_buffer, download_buffer;
+
+    // Staging buffers stored in increasing size
+    std::multiset<StagingBuffer> upload_buffers;
+    std::multiset<StagingBuffer> download_buffers;
    OGLFramebuffer read_fbo, draw_fbo;
+
+    // Recycled textures to reduce driver allocation overhead
+    std::unordered_multimap<VideoCore::HostTextureTag, OGLTexture> texture_recycler;
 };

 class Surface : public VideoCore::SurfaceBase<Surface> {
@ -113,10 +141,10 @@ public:
    ~Surface() override;

    /// Uploads pixel data in staging to a rectangle region of the surface texture
-    void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
+    void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);

    /// Downloads pixel data to staging from a rectangle region of the surface texture
-    void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
+    void Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging);

    /// Returns the bpp of the internal surface format
    u32 GetInternalBytesPerPixel() const {
@ -125,10 +153,10 @@ public:

 private:
    /// Uploads pixel data to scaled texture
-    void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging);
+    void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging);

    /// Downloads scaled image by downscaling the requested rectangle
-    void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging);
+    void ScaledDownload(const VideoCore::BufferTextureCopy& download);

 private:
    TextureRuntime& runtime;
--- a/src/video_core/renderer_opengl/texture_downloader_es.cpp
+++ b/src/video_core/renderer_opengl/texture_downloader_es.cpp
@ -141,6 +141,7 @@ void main(){
    glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, MAX_SIZE, MAX_SIZE);
    glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
                              r32ui_renderbuffer.handle);
+    glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);

    state.draw.draw_framebuffer = depth16_fbo.handle;
    state.renderbuffer = r16_renderbuffer.handle;
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@ -149,12 +149,14 @@ struct ScreenRectVertex {
    Common::Vec2f tex_coord;
 };

+constexpr u32 VERTEX_BUFFER_SIZE = sizeof(ScreenRectVertex) * 8192;
+
 RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
-    : RendererBase{window}, instance{window, Settings::values.physical_device},
-      scheduler{instance, renderpass_cache, *this},
+    : RendererBase{window}, instance{window, Settings::values.physical_device}, scheduler{instance, *this},
      renderpass_cache{instance, scheduler}, desc_manager{instance, scheduler},
      runtime{instance, scheduler, renderpass_cache, desc_manager},
      swapchain{instance, scheduler, renderpass_cache},
+      vertex_buffer{instance, scheduler, VERTEX_BUFFER_SIZE, vk::BufferUsageFlagBits::eVertexBuffer, {}},
      rasterizer{render_window, instance, scheduler, desc_manager, runtime, renderpass_cache} {

    auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
@ -887,19 +889,12 @@ void RendererVulkan::SwapBuffers() {
    const auto& layout = render_window.GetFramebufferLayout();
    PrepareRendertarget();

-    const auto RecreateSwapchain = [&] {
-        scheduler.Finish();
-        const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
+    // Create swapchain if needed
+    if (swapchain.NeedsRecreation()) {
        swapchain.Create(layout.width, layout.height);
-    };
+    }

-    do {
-        if (swapchain.NeedsRecreation()) {
-            RecreateSwapchain();
-        }
-        scheduler.WaitWorker();
-        swapchain.AcquireNextImage();
-    } while (swapchain.NeedsRecreation());
+    swapchain.AcquireNextImage();

    scheduler.Record([layout](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
        const vk::Viewport viewport = {.x = 0.0f,
@ -926,8 +921,9 @@ void RendererVulkan::SwapBuffers() {
    DrawScreens(layout, false);

    const vk::Semaphore image_acquired = swapchain.GetImageAcquiredSemaphore();
-    const vk::Semaphore present_ready = swapchain.GetPresentReadySemaphore();
+    const VkSemaphore present_ready = swapchain.GetPresentReadySemaphore();
    scheduler.Flush(present_ready, image_acquired);
+    scheduler.WaitWorker();
    swapchain.Present();

    m_current_frame++;
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@ -10,7 +10,7 @@
 #include "common/math_util.h"
 #include "core/hw/gpu.h"
 #include "video_core/renderer_base.h"
-#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_descriptor_manager.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
@ -23,6 +23,42 @@ struct FramebufferLayout;

 namespace Vulkan {

+/// Structure used for storing information about the textures for each 3DS screen
+struct TextureInfo {
+    ImageAlloc alloc;
+    u32 width;
+    u32 height;
+    GPU::Regs::PixelFormat format;
+};
+
+/// Structure used for storing information about the display target for each 3DS screen
+struct ScreenInfo {
+    ImageAlloc* display_texture = nullptr;
+    Common::Rectangle<float> display_texcoords;
+    TextureInfo texture;
+    vk::Sampler sampler;
+};
+
+// Uniform data used for presenting the 3DS screens
+struct PresentUniformData {
+    glm::mat4 modelview;
+    Common::Vec4f i_resolution;
+    Common::Vec4f o_resolution;
+    int screen_id_l = 0;
+    int screen_id_r = 0;
+    int layer = 0;
+    int reverse_interlaced = 0;
+
+    // Returns an immutable byte view of the uniform data
+    auto AsBytes() const {
+        return std::as_bytes(std::span{this, 1});
+    }
+};
+
+static_assert(sizeof(PresentUniformData) < 256, "PresentUniformData must be below 256 bytes!");
+
+constexpr u32 PRESENT_PIPELINES = 3;
+
 class RasterizerVulkan;

 class RendererVulkan : public RendererBase {
@ -73,10 +109,25 @@ private:
    DescriptorManager desc_manager;
    TextureRuntime runtime;
    Swapchain swapchain;
+    StreamBuffer vertex_buffer;
    RasterizerVulkan rasterizer;

-    // Display information for top and bottom screens respectively
+    // Present pipelines (Normal, Anaglyph, Interlaced)
+    vk::PipelineLayout present_pipeline_layout;
+    vk::DescriptorSetLayout present_descriptor_layout;
+    vk::DescriptorUpdateTemplate present_update_template;
+    std::array<vk::Pipeline, PRESENT_PIPELINES> present_pipelines;
+    std::array<vk::DescriptorSet, PRESENT_PIPELINES> present_descriptor_sets;
+    std::array<vk::ShaderModule, PRESENT_PIPELINES> present_shaders;
+    std::array<vk::Sampler, 2> present_samplers;
+    vk::ShaderModule present_vertex_shader;
+    u32 current_pipeline = 0;
+    u32 current_sampler = 0;
+
+    /// Display information for top and bottom screens respectively
    std::array<ScreenInfo, 3> screen_infos{};
+    PresentUniformData draw_info{};
+    vk::ClearColorValue clear_color{};
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@ -1,152 +0,0 @@
-// Copyright 2022 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <memory>
-#include <glm/glm.hpp>
-#include "video_core/renderer_vulkan/vk_texture_runtime.h"
-
-namespace Core {
-class System;
-}
-
-namespace Memory {
-class MemorySystem;
-}
-
-namespace Frontend {
-class EmuWindow;
-}
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace Layout {
-struct FramebufferLayout;
-}
-
-namespace Vulkan {
-
-struct ScreenInfo;
-
-class Instance;
-class RasterizerVulkan;
-class Scheduler;
-class Swapchain;
-class RenderpassCache;
-class DescriptorManager;
-
-struct ScreenInfo {
-    vk::ImageView image_view{};
-    u32 width{};
-    u32 height{};
-    Common::Rectangle<f32> texcoords;
-};
-
-using Images = std::array<vk::Image, 3>;
-
-struct PresentUniformData {
-    glm::mat4 modelview;
-    Common::Vec4f i_resolution;
-    Common::Vec4f o_resolution;
-    int screen_id_l = 0;
-    int screen_id_r = 0;
-    int layer = 0;
-    int reverse_interlaced = 0;
-
-    // Returns an immutable byte view of the uniform data
-    auto AsBytes() const {
-        return std::as_bytes(std::span{this, 1});
-    }
-};
-
-constexpr u32 PRESENT_PIPELINES = 3;
-
-class BlitScreen {
-public:
-    explicit BlitScreen(Frontend::EmuWindow& render_window, const Instance& instance,
-                        Scheduler& scheduler, Swapchain& swapchain, RenderpassCache& renderpass_cache,
-                        DescriptorManager& desc_manager, std::array<ScreenInfo, 3>& screen_infos);
-    ~BlitScreen();
-
-    void Recreate();
-
-    [[nodiscard]] vk::Semaphore Draw(const GPU::Regs::FramebufferConfig& framebuffer,
-                                     const vk::Framebuffer& host_framebuffer,
-                                     const Layout::FramebufferLayout layout, vk::Extent2D render_area,
-                                     bool use_accelerated, u32 screen);
-
-    [[nodiscard]] vk::Semaphore DrawToSwapchain(const GPU::Regs::FramebufferConfig& framebuffer,
-                                                bool use_accelerated);
-
-    [[nodiscard]] vk::Framebuffer CreateFramebuffer(const vk::ImageView& image_view,
-                                                    vk::Extent2D extent);
-
-    [[nodiscard]] vk::Framebuffer CreateFramebuffer(const vk::ImageView& image_view,
-                                                    vk::Extent2D extent, vk::RenderPass& rd);
-
-private:
-    void CreateStaticResources();
-    void CreateShaders();
-    void CreateSemaphores();
-    void CreateDescriptorPool();
-    void CreateRenderPass();
-    vk::RenderPass CreateRenderPassImpl(vk::Format format, bool is_present = true);
-    void CreateDescriptorSetLayout();
-    void CreateDescriptorSets();
-    void CreatePipelineLayout();
-    void CreateGraphicsPipeline();
-    void CreateSampler();
-
-    void CreateDynamicResources();
-    void CreateFramebuffers();
-
-    void RefreshResources(const GPU::Regs::FramebufferConfig& framebuffer);
-    void ReleaseRawImages();
-    void CreateStagingBuffer(const GPU::Regs::FramebufferConfig& framebuffer);
-    void CreateRawImages(const GPU::Regs::FramebufferConfig& framebuffer);
-
-    struct BufferData;
-
-    void UpdateDescriptorSet(std::size_t image_index, bool use_accelerated) const;
-    void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
-    void SetVertexData(BufferData& data, const Layout::FramebufferLayout layout) const;
-
-private:
-    Frontend::EmuWindow& render_window;
-    const Instance& instance;
-    Scheduler& scheduler;
-    Swapchain& swapchain;
-    RenderpassCache& renderpass_cache;
-    DescriptorManager& desc_manager;
-    Memory::MemorySystem& memory;
-    std::array<ScreenInfo, 3>& screen_infos;
-    std::size_t image_count;
-    PresentUniformData draw_info{};
-    StreamBuffer vertex_buffer;
-
-    vk::PipelineLayout pipeline_layout;
-    vk::DescriptorSetLayout descriptor_set_layout;
-    vk::DescriptorUpdateTemplate update_template;
-    std::array<vk::Pipeline, PRESENT_PIPELINES> pipelines;
-    std::array<vk::DescriptorSet, PRESENT_PIPELINES> descriptor_sets;
-    std::array<vk::ShaderModule, PRESENT_PIPELINES> shaders;
-    std::array<vk::Sampler, 2> samplers;
-    vk::ShaderModule vertex_shader;
-    u32 current_pipeline = 0;
-    u32 current_sampler = 0;
-
-    vk::RenderPass renderpass;
-    std::vector<vk::Framebuffer> framebuffers;
-    std::vector<u64> resource_ticks;
-    std::vector<vk::Semaphore> semaphores;
-    std::vector<Images> raw_images;
-    GPU::Regs::PixelFormat pixel_format;
-    u32 raw_width;
-    u32 raw_height;
-};
-
-} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_common.h
+++ b/src/video_core/renderer_vulkan/vk_common.h
@ -5,6 +5,7 @@
 #pragma once

 #include <algorithm>
+#include "common/common_types.h"

 // Include vulkan-hpp header
 #define VK_NO_PROTOTYPES 1
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@ -13,8 +13,6 @@

 namespace Vulkan {

-vk::DynamicLoader Instance::dl;
-
 vk::Format ToVkFormat(VideoCore::PixelFormat format) {
    switch (format) {
    case VideoCore::PixelFormat::RGBA8:
@ -42,7 +40,7 @@ vk::Format ToVkFormat(VideoCore::PixelFormat format) {
    }
 }

-Instance::Instance(bool validation, bool dump_command_buffers) {
+Instance::Instance() {
    // Fetch instance independant function pointers
    auto vkGetInstanceProcAddr =
        dl.getProcAddress<PFN_vkGetInstanceProcAddr>("vkGetInstanceProcAddr");
@ -54,19 +52,7 @@ Instance::Instance(bool validation, bool dump_command_buffers) {
                                                  .engineVersion = VK_MAKE_VERSION(1, 0, 0),
                                                  .apiVersion = VK_API_VERSION_1_0};

-    u32 layer_count = 0;
-    std::array<const char*, 2> layers;
-
-    if (validation) {
-        layers[layer_count++] = "VK_LAYER_KHRONOS_validation";
-    }
-    if (dump_command_buffers) {
-        layers[layer_count++] = "VK_LAYER_LUNARG_api_dump";
-    }
-
-    const vk::InstanceCreateInfo instance_info = {.pApplicationInfo = &application_info,
-                                                  .enabledLayerCount = layer_count,
-                                                  .ppEnabledLayerNames = layers.data()};
+    const vk::InstanceCreateInfo instance_info = {.pApplicationInfo = &application_info};

    instance = vk::createInstance(instance_info);

@ -255,7 +241,6 @@ bool Instance::CreateDevice() {

    // Not having geometry shaders will cause issues with accelerated rendering.
    const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
-    device_features = available;
    if (!available.geometryShader) {
        LOG_WARNING(Render_Vulkan,
                    "Geometry shaders not availabe! Accelerated rendering not possible!");
@ -287,6 +272,7 @@ bool Instance::CreateDevice() {
    };

    AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+    AddExtension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME);
    AddExtension(VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME);
    timeline_semaphores = AddExtension(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME);
    extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME);
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@ -31,7 +31,7 @@ struct FormatTraits {
 /// The global Vulkan instance
 class Instance {
 public:
-    Instance(bool validation = false, bool dump_command_buffers = false);
+    Instance(); ///< Portable constructor used to query physical devices
    Instance(Frontend::EmuWindow& window, u32 physical_device_index);
    ~Instance();

@ -85,11 +85,6 @@ public:
        return present_queue;
    }

-    /// Returns true if logic operations need shader emulation
-    bool NeedsLogicOpEmulation() const {
-        return !device_features.logicOp;
-    }
-
    /// Returns true when VK_KHR_timeline_semaphore is supported
    bool IsTimelineSemaphoreSupported() const {
        return timeline_semaphores;
@ -144,13 +139,12 @@ private:
    void CreateAllocator();

 private:
-    static vk::DynamicLoader dl;
+    vk::DynamicLoader dl;
    vk::Device device;
    vk::PhysicalDevice physical_device;
    vk::Instance instance;
    vk::SurfaceKHR surface;
    vk::PhysicalDeviceProperties device_properties;
-    vk::PhysicalDeviceFeatures device_features;
    VmaAllocator allocator;
    vk::Queue present_queue;
    vk::Queue graphics_queue;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -5,8 +5,8 @@
 #include <filesystem>
 #include "common/common_paths.h"
 #include "common/file_util.h"
-#include "common/microprofile.h"
 #include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "core/settings.h"
 #include "video_core/renderer_vulkan/pica_to_vk.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
@ -17,33 +17,37 @@

 namespace Vulkan {

-u32 AttribBytes(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
-    switch (format) {
-    case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
-        return sizeof(float) * size;
-    case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
-        return sizeof(u16) * size;
-    case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
-    case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
-        return sizeof(u8) * size;
+u32 AttribBytes(VertexAttribute attrib) {
+    switch (attrib.type) {
+    case AttribType::Float:
+        return sizeof(float) * attrib.size;
+    case AttribType::Int:
+        return sizeof(u32) * attrib.size;
+    case AttribType::Short:
+        return sizeof(u16) * attrib.size;
+    case AttribType::Byte:
+    case AttribType::Ubyte:
+        return sizeof(u8) * attrib.size;
    }

    return 0;
 }

-vk::Format ToVkAttributeFormat(Pica::PipelineRegs::VertexAttributeFormat format, u32 size) {
+vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
    constexpr std::array attribute_formats = {
+        std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
+                   vk::Format::eR32G32B32A32Sfloat},
+        std::array{vk::Format::eR32Sint, vk::Format::eR32G32Sint, vk::Format::eR32G32B32Sint,
+                   vk::Format::eR32G32B32A32Sint},
+        std::array{vk::Format::eR16Sint, vk::Format::eR16G16Sint, vk::Format::eR16G16B16Sint,
+                   vk::Format::eR16G16B16A16Sint},
        std::array{vk::Format::eR8Sint, vk::Format::eR8G8Sint, vk::Format::eR8G8B8Sint,
                   vk::Format::eR8G8B8A8Sint},
        std::array{vk::Format::eR8Uint, vk::Format::eR8G8Uint, vk::Format::eR8G8B8Uint,
-                   vk::Format::eR8G8B8A8Uint},
-        std::array{vk::Format::eR16Sint, vk::Format::eR16G16Sint, vk::Format::eR16G16B16Sint,
-                   vk::Format::eR16G16B16A16Sint},
-        std::array{vk::Format::eR32Sfloat, vk::Format::eR32G32Sfloat, vk::Format::eR32G32B32Sfloat,
-                   vk::Format::eR32G32B32A32Sfloat}};
+                   vk::Format::eR8G8B8A8Uint}};

-    ASSERT(size <= 4);
-    return attribute_formats[static_cast<u32>(format)][size - 1];
+    ASSERT(attrib.size <= 4);
+    return attribute_formats[static_cast<u32>(attrib.type.Value())][attrib.size.Value() - 1];
 }

 vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
@ -62,21 +66,6 @@ vk::ShaderStageFlagBits ToVkShaderStage(std::size_t index) {
    return vk::ShaderStageFlagBits::eVertex;
 }

-[[nodiscard]] bool IsAttribFormatSupported(const VertexAttribute& attrib, const Instance& instance) {
-    static std::unordered_map<vk::Format, bool> format_support_cache;
-
-    vk::PhysicalDevice physical_device = instance.GetPhysicalDevice();
-    const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
-    auto [it, new_format] = format_support_cache.try_emplace(format, false);
-    if (new_format) {
-        LOG_INFO(Render_Vulkan, "Quering support for format {}", vk::to_string(format));
-        const vk::FormatFeatureFlags features = physical_device.getFormatProperties(format).bufferFeatures;
-        it->second = (features & vk::FormatFeatureFlagBits::eVertexBuffer) == vk::FormatFeatureFlagBits::eVertexBuffer;
-    }
-
-    return it->second;
-};
-
 PipelineCache::PipelineCache(const Instance& instance, Scheduler& scheduler,
                             RenderpassCache& renderpass_cache, DescriptorManager& desc_manager)
    : instance{instance}, scheduler{scheduler}, renderpass_cache{renderpass_cache}, desc_manager{desc_manager} {
@ -100,11 +89,7 @@ PipelineCache::~PipelineCache() {
        device.destroyShaderModule(module);
    }

-    for (auto& [key, module] : fragment_shaders_glsl.shaders) {
-        device.destroyShaderModule(module);
-    }
-
-    for (auto& [key, module] : fragment_shaders_spv.shaders) {
+    for (auto& [key, module] : fragment_shaders.shaders) {
        device.destroyShaderModule(module);
    }

@ -181,7 +166,8 @@ void PipelineCache::BindPipeline(const PipelineInfo& info) {

        const u64 info_hash_size = instance.IsExtendedDynamicStateSupported()
                                       ? offsetof(PipelineInfo, rasterization)
-                                       : offsetof(PipelineInfo, dynamic);
+                                       : offsetof(PipelineInfo, depth_stencil) +
+                                             offsetof(DepthStencilState, stencil_reference);

        u64 info_hash = Common::ComputeHash64(&info, info_hash_size);
        u64 pipeline_hash = Common::HashCombine(shader_hash, info_hash);
@ -202,30 +188,22 @@ bool PipelineCache::UseProgrammableVertexShader(const Pica::Regs& regs,
                                                Pica::Shader::ShaderSetup& setup,
                                                const VertexLayout& layout) {
    PicaVSConfig config{regs.vs, setup};
-
-    u32 emulated_attrib_loc = MAX_VERTEX_ATTRIBUTES;
    for (u32 i = 0; i < layout.attribute_count; i++) {
        const auto& attrib = layout.attributes[i];
-        const u32 location = attrib.location.Value();
-        const bool is_supported = IsAttribFormatSupported(attrib, instance);
-        ASSERT(is_supported || attrib.size == 3);
-
-        config.state.attrib_types[location] = attrib.type.Value();
-        config.state.emulated_attrib_locations[location] =
-                is_supported ? 0 : emulated_attrib_loc++;
+        config.state.attrib_types[attrib.location.Value()] = attrib.type.Value();
    }

-    auto [handle, result] =
-        programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
-                                        instance.GetDevice(), ShaderOptimization::High);
-    if (!handle) {
-        LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
-        return false;
-    }
+    scheduler.Record([this, config, setup = std::move(setup)](vk::CommandBuffer, vk::CommandBuffer) {
+        auto [handle, result] =
+            programmable_vertex_shaders.Get(config, setup, vk::ShaderStageFlagBits::eVertex,
+                                            instance.GetDevice(), ShaderOptimization::Debug);
+        if (!handle) {
+            LOG_ERROR(Render_Vulkan, "Failed to retrieve programmable vertex shader");
+            return;
+        }

-    scheduler.Record([this, handle = handle, hash = config.Hash()](vk::CommandBuffer, vk::CommandBuffer) {
        current_shaders[ProgramType::VS] = handle;
-        shader_hashes[ProgramType::VS] = hash;
+        shader_hashes[ProgramType::VS] = config.Hash();
    });

    return true;
@ -242,8 +220,8 @@ void PipelineCache::UseFixedGeometryShader(const Pica::Regs& regs) {
    const PicaFixedGSConfig gs_config{regs};

    scheduler.Record([this, gs_config](vk::CommandBuffer, vk::CommandBuffer) {
-        vk::ShaderModule handle = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
-                                                             instance.GetDevice(), ShaderOptimization::High);
+        auto [handle, _] = fixed_geometry_shaders.Get(gs_config, vk::ShaderStageFlagBits::eGeometry,
+                                                      instance.GetDevice(), ShaderOptimization::Debug);
        current_shaders[ProgramType::GS] = handle;
        shader_hashes[ProgramType::GS] = gs_config.Hash();
    });
@ -256,21 +234,12 @@ void PipelineCache::UseTrivialGeometryShader() {
    });
 }

-MICROPROFILE_DEFINE(Vulkan_FragmentGeneration, "Vulkan", "Fragment Shader Compilation", MP_RGB(255, 100, 100));
 void PipelineCache::UseFragmentShader(const Pica::Regs& regs) {
-    const PicaFSConfig config{regs, instance};
+    const PicaFSConfig config = PicaFSConfig::BuildFromRegs(regs);

    scheduler.Record([this, config](vk::CommandBuffer, vk::CommandBuffer) {
-        MICROPROFILE_SCOPE(Vulkan_FragmentGeneration);
-
-        vk::ShaderModule handle{};
-        if (Settings::values.spirv_shader_gen) {
-            handle = fragment_shaders_spv.Get(config, instance.GetDevice());
-        } else {
-            handle = fragment_shaders_glsl.Get(config, vk::ShaderStageFlagBits::eFragment,
-                                              instance.GetDevice(), ShaderOptimization::High);
-        }
-
+        auto [handle, result] = fragment_shaders.Get(config, vk::ShaderStageFlagBits::eFragment,
+                                                     instance.GetDevice(), ShaderOptimization::Debug);
        current_shaders[ProgramType::FS] = handle;
        shader_hashes[ProgramType::FS] = config.Hash();
    });
@ -305,17 +274,27 @@ void PipelineCache::BindSampler(u32 binding, vk::Sampler sampler) {
 }

 void PipelineCache::SetViewport(float x, float y, float width, float height) {
+    const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
    const vk::Viewport viewport{x, y, width, height, 0.f, 1.f};
-    scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
-        render_cmdbuf.setViewport(0, viewport);
-    });
+
+    if (viewport != current_viewport || is_dirty) {
+        scheduler.Record([viewport](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
+            render_cmdbuf.setViewport(0, viewport);
+        });
+        current_viewport = viewport;
+    }
 }

 void PipelineCache::SetScissor(s32 x, s32 y, u32 width, u32 height) {
+    const bool is_dirty = scheduler.IsStateDirty(StateFlags::Pipeline);
    const vk::Rect2D scissor{{x, y}, {width, height}};
-    scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
-        render_cmdbuf.setScissor(0, scissor);
-    });
+
+    if (scissor != current_scissor || is_dirty) {
+        scheduler.Record([scissor](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
+            render_cmdbuf.setScissor(0, scissor);
+        });
+        current_scissor = scissor;
+    }
 }

 void PipelineCache::ApplyDynamic(const PipelineInfo& info) {
@ -323,28 +302,23 @@ void PipelineCache::ApplyDynamic(const PipelineInfo& info) {

    PipelineInfo current = current_info;
    scheduler.Record([this, info, is_dirty, current](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
-        if (info.dynamic.stencil_compare_mask !=
-                current.dynamic.stencil_compare_mask ||
+        if (info.depth_stencil.stencil_compare_mask !=
+                current.depth_stencil.stencil_compare_mask ||
            is_dirty) {
            render_cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack,
-                                                 info.dynamic.stencil_compare_mask);
+                                                 info.depth_stencil.stencil_compare_mask);
        }

-        if (info.dynamic.stencil_write_mask != current.dynamic.stencil_write_mask ||
+        if (info.depth_stencil.stencil_write_mask != current.depth_stencil.stencil_write_mask ||
            is_dirty) {
            render_cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack,
-                                               info.dynamic.stencil_write_mask);
+                                               info.depth_stencil.stencil_write_mask);
        }

-        if (info.dynamic.stencil_reference != current.dynamic.stencil_reference ||
+        if (info.depth_stencil.stencil_reference != current.depth_stencil.stencil_reference ||
            is_dirty) {
            render_cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack,
-                                               info.dynamic.stencil_reference);
-        }
-
-        if (info.dynamic.blend_color != current.dynamic.blend_color || is_dirty) {
-            const Common::Vec4f color = PicaToVK::ColorRGBA8(info.dynamic.blend_color);
-            render_cmdbuf.setBlendConstants(color.AsArray());
+                                               info.depth_stencil.stencil_reference);
        }

        if (instance.IsExtendedDynamicStateSupported()) {
@ -419,12 +393,10 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
            .stage = ToVkShaderStage(i), .module = shader, .pName = "main"};
    }

-    /**
-    * Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
-    * increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
-    * as the input rate. Since one instance is all we render, the shader will always read the
-    * single attribute.
-    **/
+    // Vulkan doesn't intuitively support fixed attributes. To avoid duplicating the data and
+    // increasing data upload, when the fixed flag is true, we specify VK_VERTEX_INPUT_RATE_INSTANCE
+    // as the input rate. Since one instance is all we render, the shader will always read the
+    // single attribute.
    std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BINDINGS> bindings;
    for (u32 i = 0; i < info.vertex_layout.binding_count; i++) {
        const auto& binding = info.vertex_layout.bindings[i];
@ -435,37 +407,20 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
                                               : vk::VertexInputRate::eVertex};
    }

-    u32 emulated_attrib_count = 0;
-    std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES * 2> attributes;
+    // Populate vertex attribute structures
+    std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attributes;
    for (u32 i = 0; i < info.vertex_layout.attribute_count; i++) {
-        const VertexAttribute& attrib = info.vertex_layout.attributes[i];
-        const vk::Format format = ToVkAttributeFormat(attrib.type, attrib.size);
-        const bool is_supported = IsAttribFormatSupported(attrib, instance);
-        ASSERT_MSG(is_supported || attrib.size == 3);
-
-        attributes[i] = vk::VertexInputAttributeDescription{.location = attrib.location,
-                                                            .binding = attrib.binding,
-                                                            .format = is_supported ? format
-                                                                                   : ToVkAttributeFormat(attrib.type, 2),
-                                                            .offset = attrib.offset};
-
-        // When the requested 3-component vertex format is unsupported by the hardware
-        // is it emulated by breaking it into a vec2 + vec1. These are combined to a vec3
-        // by the vertex shader.
-        if (!is_supported) {
-            const u32 location = MAX_VERTEX_ATTRIBUTES + emulated_attrib_count++;
-            LOG_WARNING(Render_Vulkan, "\nEmulating attrib {} at location {}\n", attrib.location, location);
-            attributes[location] = vk::VertexInputAttributeDescription{.location = location,
-                                                                       .binding = attrib.binding,
-                                                                       .format = ToVkAttributeFormat(attrib.type, 1),
-                                                                       .offset = attrib.offset + AttribBytes(attrib.type, 2)};
-        }
+        const auto& attr = info.vertex_layout.attributes[i];
+        attributes[i] = vk::VertexInputAttributeDescription{.location = attr.location,
+                                                            .binding = attr.binding,
+                                                            .format = ToVkAttributeFormat(attr),
+                                                            .offset = attr.offset};
    }

    const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
        .vertexBindingDescriptionCount = info.vertex_layout.binding_count,
        .pVertexBindingDescriptions = bindings.data(),
-        .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count + emulated_attrib_count,
+        .vertexAttributeDescriptionCount = info.vertex_layout.attribute_count,
        .pVertexAttributeDescriptions = attributes.data()};

    const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
@ -494,7 +449,7 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {
        .colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask)};

    const vk::PipelineColorBlendStateCreateInfo color_blending = {
-        .logicOpEnable = !info.blending.blend_enable.Value() && !instance.NeedsLogicOpEmulation(),
+        .logicOpEnable = !info.blending.blend_enable.Value(),
        .logicOp = PicaToVK::LogicOp(info.blending.logic_op.Value()),
        .attachmentCount = 1,
        .pAttachments = &colorblend_attachment,
@ -505,7 +460,11 @@ vk::Pipeline PipelineCache::BuildPipeline(const PipelineInfo& info) {

    const vk::Rect2D scissor = {.offset = {0, 0}, .extent = {1, 1}};

+    vk::PipelineViewportDepthClipControlCreateInfoEXT depth_clip_control = {.negativeOneToOne =
+                                                                                true};
+
    const vk::PipelineViewportStateCreateInfo viewport_info = {
+        .pNext = &depth_clip_control,
        .viewportCount = 1,
        .pViewports = &viewport,
        .scissorCount = 1,
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -10,7 +10,7 @@
 #include "video_core/rasterizer_cache/pixel_format.h"
 #include "video_core/regs.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"
-#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
+#include "video_core/renderer_vulkan/vk_shader_gen.h"
 #include "video_core/shader/shader_cache.h"

 namespace Vulkan {
@ -29,16 +29,23 @@ union RasterizationState {
    BitField<4, 2, Pica::RasterizerRegs::CullMode> cull_mode;
 };

-union DepthStencilState {
-    u32 value = 0;
-    BitField<0, 1, u32> depth_test_enable;
-    BitField<1, 1, u32> depth_write_enable;
-    BitField<2, 1, u32> stencil_test_enable;
-    BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
-    BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
-    BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
-    BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
-    BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
+struct DepthStencilState {
+    union {
+        u32 value = 0;
+        BitField<0, 1, u32> depth_test_enable;
+        BitField<1, 1, u32> depth_write_enable;
+        BitField<2, 1, u32> stencil_test_enable;
+        BitField<3, 3, Pica::FramebufferRegs::CompareFunc> depth_compare_op;
+        BitField<6, 3, Pica::FramebufferRegs::StencilAction> stencil_fail_op;
+        BitField<9, 3, Pica::FramebufferRegs::StencilAction> stencil_pass_op;
+        BitField<12, 3, Pica::FramebufferRegs::StencilAction> stencil_depth_fail_op;
+        BitField<15, 3, Pica::FramebufferRegs::CompareFunc> stencil_compare_op;
+    };
+
+    // These are dynamic state so keep them separate
+    u8 stencil_reference;
+    u8 stencil_compare_mask;
+    u8 stencil_write_mask;
 };

 union BlendingState {
@ -54,13 +61,6 @@ union BlendingState {
    BitField<27, 4, Pica::FramebufferRegs::LogicOp> logic_op;
 };

-struct DynamicState {
-    u32 blend_color = 0;
-    u8 stencil_reference;
-    u8 stencil_compare_mask;
-    u8 stencil_write_mask;
-};
-
 union VertexBinding {
    u16 value = 0;
    BitField<0, 4, u16> binding;
@ -72,7 +72,7 @@ union VertexAttribute {
    u32 value = 0;
    BitField<0, 4, u32> binding;
    BitField<4, 4, u32> location;
-    BitField<8, 3, Pica::PipelineRegs::VertexAttributeFormat> type;
+    BitField<8, 3, AttribType> type;
    BitField<11, 3, u32> size;
    BitField<14, 11, u32> offset;
 };
@ -94,14 +94,13 @@ struct PipelineInfo {
    VideoCore::PixelFormat depth_attachment = VideoCore::PixelFormat::D24S8;
    RasterizationState rasterization{};
    DepthStencilState depth_stencil{};
-    DynamicState dynamic;

-    [[nodiscard]] bool IsDepthWriteEnabled() const noexcept {
+    bool IsDepthWriteEnabled() const {
        const bool has_stencil = depth_attachment == VideoCore::PixelFormat::D24S8;
        const bool depth_write =
            depth_stencil.depth_test_enable && depth_stencil.depth_write_enable;
        const bool stencil_write = has_stencil && depth_stencil.stencil_test_enable &&
-                                   dynamic.stencil_write_mask != 0;
+                                   depth_stencil.stencil_write_mask != 0;

        return depth_write || stencil_write;
    }
@ -116,12 +115,9 @@ using ProgrammableVertexShaders = Pica::Shader::ShaderDoubleCache<PicaVSConfig,
 using FixedGeometryShaders = Pica::Shader::ShaderCache<PicaFixedGSConfig, vk::ShaderModule,
                                                       &Compile, &GenerateFixedGeometryShader>;

-using FragmentShadersGLSL =
+using FragmentShaders =
    Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &Compile, &GenerateFragmentShader>;

-using FragmentShadersSPV =
-    Pica::Shader::ShaderCache<PicaFSConfig, vk::ShaderModule, &CompileSPV, &GenerateFragmentShaderSPV>;
-
 class Instance;
 class Scheduler;
 class RenderpassCache;
@ -129,6 +125,7 @@ class DescriptorManager;

 /**
 * Stores a collection of rasterizer pipelines used during rendering.
+ * In addition handles descriptor set management.
 */
 class PipelineCache {
 public:
@ -212,6 +209,8 @@ private:
    std::unordered_map<u64, vk::Pipeline, Common::IdentityHash<u64>> graphics_pipelines;
    vk::Pipeline current_pipeline{};
    PipelineInfo current_info{};
+    vk::Viewport current_viewport{};
+    vk::Rect2D current_scissor{};

    // Bound shader modules
    enum ProgramType : u32 { VS = 0, GS = 2, FS = 1 };
@ -220,8 +219,7 @@ private:
    std::array<u64, MAX_SHADER_STAGES> shader_hashes;
    ProgrammableVertexShaders programmable_vertex_shaders;
    FixedGeometryShaders fixed_geometry_shaders;
-    FragmentShadersGLSL fragment_shaders_glsl;
-    FragmentShadersSPV fragment_shaders_spv;
+    FragmentShaders fragment_shaders;
    vk::ShaderModule trivial_vertex_shader;
 };

--- a/src/video_core/renderer_vulkan/vk_platform.cpp
+++ b/src/video_core/renderer_vulkan/vk_platform.cpp
@ -8,6 +8,7 @@
 #elif defined(_WIN32)
 #define VK_USE_PLATFORM_WIN32_KHR
 #elif defined(__APPLE__)
+#define VK_USE_PLATFORM_MACOS_MVK
 #define VK_USE_PLATFORM_METAL_EXT
 #else
 #define VK_USE_PLATFORM_WAYLAND_KHR
@ -49,7 +50,9 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& e
            LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
            UNREACHABLE();
        }
-    } else if (window_info.type == Frontend::WindowSystemType::Wayland) {
+    }
+
+    if (window_info.type == Frontend::WindowSystemType::Wayland) {
        const vk::WaylandSurfaceCreateInfoKHR wayland_ci = {
            .display = static_cast<wl_display*>(window_info.display_connection),
            .surface = static_cast<wl_surface*>(window_info.render_surface)};
@ -60,33 +63,10 @@ vk::SurfaceKHR CreateSurface(vk::Instance instance, const Frontend::EmuWindow& e
            UNREACHABLE();
        }
    }
-#elif defined(VK_USE_PLATFORM_METAL_EXT)
-    if (window_info.type == Frontend::WindowSystemType::MacOS) {
-        const vk::MetalSurfaceCreateInfoEXT macos_ci = {
-          .pLayer = static_cast<const CAMetalLayer*>(window_info.render_surface)
-        };
-
-        if (instance.createMetalSurfaceEXT(&macos_ci, nullptr, &surface) != vk::Result::eSuccess) {
-          LOG_CRITICAL(Render_Vulkan, "Failed to initialize MacOS surface");
-          UNREACHABLE();
-        }
-    }
-#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
-    if (window_info.type == Frontend::WindowSystemType::Android) {
-        vk::AndroidSurfaceCreateInfoKHR android_ci = {
-            .window = reinterpret_cast<ANativeWindow*>(window_info.render_surface)
-        };
-
-        if (instance.createAndroidSurfaceKHR(&android_ci, nullptr, &surface) != vk::Result::eSuccess) {
-            LOG_CRITICAL(Render_Vulkan, "Failed to initialize Android surface");
-            UNREACHABLE();
-        }
-    }
 #endif

    if (!surface) {
        LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
-        UNREACHABLE();
    }

    return surface;
@ -118,14 +98,6 @@ std::vector<const char*> GetInstanceExtensions(Frontend::WindowSystemType window
    case Frontend::WindowSystemType::Wayland:
        extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
        break;
-#elif defined(VK_USE_PLATFORM_METAL_EXT)
-    case Frontend::WindowSystemType::MacOS:
-        extensions.push_back(VK_EXT_METAL_SURFACE_EXTENSION_NAME);
-        break;
-#elif defined(VK_USE_PLATFORM_ANDROID_KHR)
-    case Frontend::WindowSystemType::Android:
-        extensions.push_back(VK_KHR_ANDROID_SURFACE_EXTENSION_NAME);
-        break;
 #endif
    default:
        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -8,7 +8,6 @@
 #include "common/microprofile.h"
 #include "video_core/pica_state.h"
 #include "video_core/regs_framebuffer.h"
-#include "video_core/regs_pipeline.h"
 #include "video_core/regs_rasterizer.h"
 #include "video_core/renderer_vulkan/pica_to_vk.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -21,6 +20,74 @@

 namespace Vulkan {

+RasterizerVulkan::HardwareVertex::HardwareVertex(const Pica::Shader::OutputVertex& v,
+                                                 bool flip_quaternion) {
+    position[0] = v.pos.x.ToFloat32();
+    position[1] = v.pos.y.ToFloat32();
+    position[2] = v.pos.z.ToFloat32();
+    position[3] = v.pos.w.ToFloat32();
+    color[0] = v.color.x.ToFloat32();
+    color[1] = v.color.y.ToFloat32();
+    color[2] = v.color.z.ToFloat32();
+    color[3] = v.color.w.ToFloat32();
+    tex_coord0[0] = v.tc0.x.ToFloat32();
+    tex_coord0[1] = v.tc0.y.ToFloat32();
+    tex_coord1[0] = v.tc1.x.ToFloat32();
+    tex_coord1[1] = v.tc1.y.ToFloat32();
+    tex_coord2[0] = v.tc2.x.ToFloat32();
+    tex_coord2[1] = v.tc2.y.ToFloat32();
+    tex_coord0_w = v.tc0_w.ToFloat32();
+    normquat[0] = v.quat.x.ToFloat32();
+    normquat[1] = v.quat.y.ToFloat32();
+    normquat[2] = v.quat.z.ToFloat32();
+    normquat[3] = v.quat.w.ToFloat32();
+    view[0] = v.view.x.ToFloat32();
+    view[1] = v.view.y.ToFloat32();
+    view[2] = v.view.z.ToFloat32();
+
+    if (flip_quaternion) {
+        normquat = -normquat;
+    }
+}
+
+/**
+ * This maps to the following layout in GLSL code:
+ *  layout(location = 0) in vec4 vert_position;
+ *  layout(location = 1) in vec4 vert_color;
+ *  layout(location = 2) in vec2 vert_texcoord0;
+ *  layout(location = 3) in vec2 vert_texcoord1;
+ *  layout(location = 4) in vec2 vert_texcoord2;
+ *  layout(location = 5) in float vert_texcoord0_w;
+ *  layout(location = 6) in vec4 vert_normquat;
+ *  layout(location = 7) in vec3 vert_view;
+ */
+constexpr VertexLayout RasterizerVulkan::HardwareVertex::GetVertexLayout() {
+    VertexLayout layout{};
+    layout.attribute_count = 8;
+    layout.binding_count = 1;
+
+    // Define binding
+    layout.bindings[0].binding.Assign(0);
+    layout.bindings[0].fixed.Assign(0);
+    layout.bindings[0].stride.Assign(sizeof(HardwareVertex));
+
+    // Define attributes
+    constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
+    u32 offset = 0;
+
+    for (u32 loc = 0; loc < 8; loc++) {
+        VertexAttribute& attribute = layout.attributes[loc];
+        attribute.binding.Assign(0);
+        attribute.location.Assign(loc);
+        attribute.offset.Assign(offset);
+        attribute.type.Assign(AttribType::Float);
+        attribute.size.Assign(sizes[loc]);
+        offset += sizes[loc] * sizeof(float);
+    }
+
+    return layout;
+}
+
 constexpr u32 VERTEX_BUFFER_SIZE = 256 * 1024 * 1024;
 constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
 constexpr u32 UNIFORM_BUFFER_SIZE = 16 * 1024 * 1024;
@ -65,6 +132,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
    null_surface.Transition(vk::ImageLayout::eShaderReadOnlyOptimal, 0, 1);
    null_storage_surface.Transition(vk::ImageLayout::eGeneral, 0, 1);

+    uniform_block_data.lighting_lut_dirty.fill(true);
+
    uniform_buffer_alignment = instance.UniformMinAlignment();
    uniform_size_aligned_vs =
        Common::AlignUp<std::size_t>(sizeof(Pica::Shader::VSUniformData), uniform_buffer_alignment);
@ -72,8 +141,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
        Common::AlignUp<std::size_t>(sizeof(Pica::Shader::UniformData), uniform_buffer_alignment);

    // Define vertex layout for software shaders
-    MakeSoftwareVertexLayout();
-    pipeline_info.vertex_layout = software_layout;
+    pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();

    const SamplerInfo default_sampler_info = {
        .mag_filter = Pica::TexturingRegs::TextureConfig::TextureFilter::Linear,
@ -109,6 +177,7 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window, const Instan
 }

 RasterizerVulkan::~RasterizerVulkan() {
+    renderpass_cache.ExitRenderpass();
    scheduler.Finish();

    vk::Device device = instance.GetDevice();
@ -175,28 +244,108 @@ void RasterizerVulkan::SyncFixedState() {
    SyncDepthWriteMask();
 }

+/**
+ * This is a helper function to resolve an issue when interpolating opposite quaternions. See below
+ * for a detailed description of this issue (yuriks):
+ *
+ * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
+ * interpolate two quaternions that are opposite, instead of going from one rotation to another
+ * using the shortest path, you'll go around the longest path. You can test if two quaternions are
+ * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
+ * making Dot(Q1, -Q2) positive.
+ *
+ * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
+ * correct for most cases but can still rotate around the long way sometimes. An implementation
+ * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
+ * between each step would work for those cases at the cost of being more complex to implement.
+ *
+ * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
+ * these issues, making this basic implementation actually more accurate to the hardware.
+ */
+static bool AreQuaternionsOpposite(Common::Vec4<Pica::float24> qa, Common::Vec4<Pica::float24> qb) {
+    Common::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
+    Common::Vec4f b{qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32()};
+
+    return (Common::Dot(a, b) < 0.f);
+}
+
+void RasterizerVulkan::AddTriangle(const Pica::Shader::OutputVertex& v0,
+                                   const Pica::Shader::OutputVertex& v1,
+                                   const Pica::Shader::OutputVertex& v2) {
+    vertex_batch.emplace_back(v0, false);
+    vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
+    vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
+}
+
+static constexpr std::array vs_attrib_types = {
+    AttribType::Byte,  // VertexAttributeFormat::BYTE
+    AttribType::Ubyte, // VertexAttributeFormat::UBYTE
+    AttribType::Short, // VertexAttributeFormat::SHORT
+    AttribType::Float  // VertexAttributeFormat::FLOAT
+};
+
+struct VertexArrayInfo {
+    u32 vs_input_index_min;
+    u32 vs_input_index_max;
+    u32 vs_input_size;
+};
+
+RasterizerVulkan::VertexArrayInfo RasterizerVulkan::AnalyzeVertexArray(bool is_indexed) {
+    const auto& regs = Pica::g_state.regs;
+    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
+
+    u32 vertex_min;
+    u32 vertex_max;
+    if (is_indexed) {
+        const auto& index_info = regs.pipeline.index_array;
+        const PAddr address = vertex_attributes.GetPhysicalBaseAddress() + index_info.offset;
+        const u8* index_address_8 = VideoCore::g_memory->GetPhysicalPointer(address);
+        const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
+        const bool index_u16 = index_info.format != 0;
+
+        vertex_min = 0xFFFF;
+        vertex_max = 0;
+        const u32 size = regs.pipeline.num_vertices * (index_u16 ? 2 : 1);
+        res_cache.FlushRegion(address, size, nullptr);
+        for (u32 index = 0; index < regs.pipeline.num_vertices; ++index) {
+            const u32 vertex = index_u16 ? index_address_16[index] : index_address_8[index];
+            vertex_min = std::min(vertex_min, vertex);
+            vertex_max = std::max(vertex_max, vertex);
+        }
+    } else {
+        vertex_min = regs.pipeline.vertex_offset;
+        vertex_max = regs.pipeline.vertex_offset + regs.pipeline.num_vertices - 1;
+    }
+
+    const u32 vertex_num = vertex_max - vertex_min + 1;
+    u32 vs_input_size = 0;
+    for (const auto& loader : vertex_attributes.attribute_loaders) {
+        if (loader.component_count != 0) {
+            vs_input_size += loader.byte_count * vertex_num;
+        }
+    }
+
+    return {vertex_min, vertex_max, vs_input_size};
+}
+
 void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min,
                                        u32 vs_input_index_max) {
    auto [array_ptr, array_offset, invalidate] = vertex_buffer.Map(vs_input_size, 4);

-    /**
-    * The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
-    * how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
-    * address containing the vertex array data. The data for each attribute loader (i) can be found
-    * by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
-    * as something analogous to Vulkan bindings. The user can store attributes in separate loaders
-    * or interleave them in the same loader.
-    **/
+    // The Nintendo 3DS has 12 attribute loaders which are used to tell the GPU
+    // how to interpret vertex data. The program firsts sets GPUREG_ATTR_BUF_BASE to the base
+    // address containing the vertex array data. The data for each attribute loader (i) can be found
+    // by adding GPUREG_ATTR_BUFi_OFFSET to the base address. Attribute loaders can be thought
+    // as something analogous to Vulkan bindings. The user can store attributes in separate loaders
+    // or interleave them in the same loader.
    const auto& regs = Pica::g_state.regs;
    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
    PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); // GPUREG_ATTR_BUF_BASE

-    VertexLayout& layout = pipeline_info.vertex_layout;
-    layout.attribute_count = 0;
-    layout.binding_count = 0;
-    enable_attributes.fill(false);
+    std::array<bool, 16> enable_attributes{};
+    VertexLayout layout{};

-    u32 buffer_offset = 0;
+    u32 buffer_offset = array_offset;
    for (const auto& loader : vertex_attributes.attribute_loaders) {
        if (loader.component_count == 0 || loader.byte_count == 0) {
            continue;
@ -212,14 +361,16 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
                        offset, vertex_attributes.GetElementSizeInBytes(attribute_index));

                    const u32 input_reg = regs.vs.GetRegisterForAttribute(attribute_index);
-                    const Pica::PipelineRegs::VertexAttributeFormat format =
-                            vertex_attributes.GetFormat(attribute_index);
+                    const u32 attrib_format =
+                        static_cast<u32>(vertex_attributes.GetFormat(attribute_index));
+                    const AttribType type = vs_attrib_types[attrib_format];

+                    // Define the attribute
                    VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
                    attribute.binding.Assign(layout.binding_count);
                    attribute.location.Assign(input_reg);
                    attribute.offset.Assign(offset);
-                    attribute.type.Assign(format);
+                    attribute.type.Assign(type);
                    attribute.size.Assign(size);

                    enable_attributes[input_reg] = true;
@ -236,10 +387,10 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
        const PAddr data_addr =
            base_address + loader.data_offset + (vs_input_index_min * loader.byte_count);
        const u32 vertex_num = vs_input_index_max - vs_input_index_min + 1;
-        const u32 data_size = loader.byte_count * vertex_num;
+        u32 data_size = loader.byte_count * vertex_num;

-        res_cache.FlushRegion(data_addr, data_size);
-        std::memcpy(array_ptr + buffer_offset, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);
+        res_cache.FlushRegion(data_addr, data_size, nullptr);
+        std::memcpy(array_ptr, VideoCore::g_memory->GetPhysicalPointer(data_addr), data_size);

        // Create the binding associated with this loader
        VertexBinding& binding = layout.bindings[layout.binding_count];
@ -248,40 +399,20 @@ void RasterizerVulkan::SetupVertexArray(u32 vs_input_size, u32 vs_input_index_mi
        binding.stride.Assign(loader.byte_count);

        // Keep track of the binding offsets so we can bind the vertex buffer later
-        binding_offsets[layout.binding_count++] = array_offset + buffer_offset;
-        buffer_offset += Common::AlignUp(data_size, 16);
+        binding_offsets[layout.binding_count++] = buffer_offset;
+        data_size = Common::AlignUp(data_size, 16);
+        array_ptr += data_size;
+        buffer_offset += data_size;
    }

-    binding_offsets[layout.binding_count] = array_offset + buffer_offset;
-    vertex_buffer.Commit(buffer_offset);
-
-    // Assign the rest of the attributes to the last binding
-    SetupFixedAttribs();
-
-    // Bind the generated bindings
-    scheduler.Record([this, layout = pipeline_info.vertex_layout,
-                     offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
-        std::array<vk::Buffer, 16> buffers;
-        buffers.fill(vertex_buffer.GetHandle());
-        render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
-                                         offsets.data());
-    });
-}
-
-void RasterizerVulkan::SetupFixedAttribs() {
-    const auto& regs = Pica::g_state.regs;
-    const auto& vertex_attributes = regs.pipeline.vertex_attributes;
-    VertexLayout& layout = pipeline_info.vertex_layout;
-
-    auto [fixed_ptr, fixed_offset, _] = vertex_buffer.Map(16 * sizeof(Common::Vec4f));
-
    // Reserve the last binding for fixed and default attributes
    // Place the default attrib at offset zero for easy access
-    static const Common::Vec4f default_attrib{0.f, 0.f, 0.f, 1.f};
-    std::memcpy(fixed_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
+    constexpr Common::Vec4f default_attrib = Common::MakeVec(0.f, 0.f, 0.f, 1.f);
+    u32 offset = sizeof(Common::Vec4f);
+    std::memcpy(array_ptr, default_attrib.AsArray(), sizeof(Common::Vec4f));
+    array_ptr += sizeof(Common::Vec4f);

    // Find all fixed attributes and assign them to the last binding
-    u32 offset = sizeof(Common::Vec4f);
    for (std::size_t i = 0; i < 16; i++) {
        if (vertex_attributes.IsDefaultAttribute(i)) {
            const u32 reg = regs.vs.GetRegisterForAttribute(i);
@ -291,42 +422,56 @@ void RasterizerVulkan::SetupFixedAttribs() {
                                         attr.w.ToFloat32()};

                const u32 data_size = sizeof(float) * static_cast<u32>(data.size());
-                std::memcpy(fixed_ptr + offset, data.data(), data_size);
+                std::memcpy(array_ptr, data.data(), data_size);

                VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
                attribute.binding.Assign(layout.binding_count);
                attribute.location.Assign(reg);
                attribute.offset.Assign(offset);
-                attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
+                attribute.type.Assign(AttribType::Float);
                attribute.size.Assign(4);

                offset += data_size;
+                array_ptr += data_size;
                enable_attributes[reg] = true;
            }
        }
    }

    // Loop one more time to find unused attributes and assign them to the default one
-    // If the attribute is just disabled, shove the default attribute to avoid
-    // errors if the shader ever decides to use it.
+    // This needs to happen because i = 2 might be assigned to location = 3 so the loop
+    // above would skip setting it
    for (u32 i = 0; i < 16; i++) {
+        // If the attribute is just disabled, shove the default attribute to avoid
+        // errors if the shader ever decides to use it. The pipeline cache can discard
+        // this if needed since it has access to the usage mask from the code generator
        if (!enable_attributes[i]) {
            VertexAttribute& attribute = layout.attributes[layout.attribute_count++];
            attribute.binding.Assign(layout.binding_count);
            attribute.location.Assign(i);
            attribute.offset.Assign(0);
-            attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
+            attribute.type.Assign(AttribType::Float);
            attribute.size.Assign(4);
        }
    }

    // Define the fixed+default binding
    VertexBinding& binding = layout.bindings[layout.binding_count];
-    binding.binding.Assign(layout.binding_count++);
+    binding.binding.Assign(layout.binding_count);
    binding.fixed.Assign(1);
    binding.stride.Assign(offset);
+    binding_offsets[layout.binding_count++] = buffer_offset;
+    buffer_offset += offset;

-    vertex_buffer.Commit(offset);
+    pipeline_info.vertex_layout = layout;
+    vertex_buffer.Commit(buffer_offset - array_offset);
+
+    scheduler.Record([this, layout, offsets = binding_offsets](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
+        std::array<vk::Buffer, 16> buffers;
+        buffers.fill(vertex_buffer.GetHandle());
+        render_cmdbuf.bindVertexBuffers(0, layout.binding_count, buffers.data(),
+                                         offsets.data());
+    });
 }

 MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
@ -342,7 +487,7 @@ bool RasterizerVulkan::SetupGeometryShader() {
    const auto& regs = Pica::g_state.regs;

    if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
-        LOG_ERROR(Render_Vulkan, "Accelerate draw doesn't support geometry shader");
+        LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader");
        return false;
    }

@ -367,7 +512,7 @@ bool RasterizerVulkan::AccelerateDrawBatch(bool is_indexed) {
 bool RasterizerVulkan::AccelerateDrawBatchInternal(bool is_indexed) {
    const auto& regs = Pica::g_state.regs;

-    const auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);
+    auto [vs_input_index_min, vs_input_index_max, vs_input_size] = AnalyzeVertexArray(is_indexed);

    if (vs_input_size > VERTEX_BUFFER_SIZE) {
        LOG_WARNING(Render_Vulkan, "Too large vertex input size {}", vs_input_size);
@ -461,10 +606,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
    auto [color_surface, depth_surface, surfaces_rect] =
        res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled);

-    if (!color_surface && shadow_rendering) {
-        return true;
-    }
-
    pipeline_info.color_attachment =
        color_surface ? color_surface->pixel_format : VideoCore::PixelFormat::Invalid;
    pipeline_info.depth_attachment =
@ -671,7 +812,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {

    // Sync and bind the shader
    if (shader_dirty) {
-        pipeline_cache.UseFragmentShader(regs);
+        SetShader();
        shader_dirty = false;
    }

@ -740,7 +881,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
        succeeded = AccelerateDrawBatchInternal(is_indexed);
    } else {
        pipeline_info.rasterization.topology.Assign(Pica::PipelineRegs::TriangleTopology::List);
-        pipeline_info.vertex_layout = software_layout;
+        pipeline_info.vertex_layout = HardwareVertex::GetVertexLayout();
        pipeline_cache.UseTrivialVertexShader();
        pipeline_cache.UseTrivialGeometryShader();
        pipeline_cache.BindPipeline(pipeline_info);
@ -822,9 +963,6 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {

    // Blending
    case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
-        if (instance.NeedsLogicOpEmulation()) {
-            shader_dirty = true;
-        }
        SyncBlendEnabled();
        break;
    case PICA_REG_INDEX(framebuffer.output_merger.alpha_blending):
@ -945,9 +1083,6 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {

    // Logic op
    case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
-        if (instance.NeedsLogicOpEmulation()) {
-            shader_dirty = true;
-        }
        SyncLogicOp();
        break;

@ -1473,33 +1608,6 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
    return true;
 }

-void RasterizerVulkan::MakeSoftwareVertexLayout() {
-    constexpr std::array sizes = {4, 4, 2, 2, 2, 1, 4, 3};
-
-    software_layout = VertexLayout{
-        .binding_count = 1,
-        .attribute_count = 8
-    };
-
-    for (u32 i = 0; i < software_layout.binding_count; i++) {
-        VertexBinding& binding = software_layout.bindings[i];
-        binding.binding.Assign(i);
-        binding.fixed.Assign(0);
-        binding.stride.Assign(sizeof(HardwareVertex));
-    }
-
-    u32 offset = 0;
-    for (u32 i = 0; i < 8; i++) {
-        VertexAttribute& attribute = software_layout.attributes[i];
-        attribute.binding.Assign(0);
-        attribute.location.Assign(i);
-        attribute.offset.Assign(offset);
-        attribute.type.Assign(Pica::PipelineRegs::VertexAttributeFormat::FLOAT);
-        attribute.size.Assign(sizes[i]);
-        offset += sizes[i] * sizeof(float);
-    }
-}
-
 vk::Sampler RasterizerVulkan::CreateSampler(const SamplerInfo& info) {
    const bool use_border_color = instance.IsCustomBorderColorSupported() &&
                                  (info.wrap_s == SamplerInfo::TextureConfig::ClampToBorder ||
@ -1565,6 +1673,10 @@ void RasterizerVulkan::FlushBuffers() {
    texture_lf_buffer.Flush();
 }

+void RasterizerVulkan::SetShader() {
+    pipeline_cache.UseFragmentShader(Pica::g_state.regs);
+}
+
 void RasterizerVulkan::SyncClipEnabled() {
    uniform_block_data.data.enable_clip1 = Pica::g_state.regs.rasterizer.clip_enable != 0;
 }
@ -1584,6 +1696,26 @@ void RasterizerVulkan::SyncCullMode() {
    pipeline_info.rasterization.cull_mode.Assign(regs.rasterizer.cull_mode);
 }

+void RasterizerVulkan::SyncDepthScale() {
+    float depth_scale =
+        Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_range).ToFloat32();
+
+    if (depth_scale != uniform_block_data.data.depth_scale) {
+        uniform_block_data.data.depth_scale = depth_scale;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncDepthOffset() {
+    float depth_offset =
+        Pica::float24::FromRaw(Pica::g_state.regs.rasterizer.viewport_depth_near_plane).ToFloat32();
+
+    if (depth_offset != uniform_block_data.data.depth_offset) {
+        uniform_block_data.data.depth_offset = depth_offset;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerVulkan::SyncBlendEnabled() {
    pipeline_info.blending.blend_enable.Assign(
        Pica::g_state.regs.framebuffer.output_merger.alphablend_enable);
@ -1607,46 +1739,73 @@ void RasterizerVulkan::SyncBlendFuncs() {
 }

 void RasterizerVulkan::SyncBlendColor() {
+    const Common::Vec4f blend_color =
+        PicaToVK::ColorRGBA8(Pica::g_state.regs.framebuffer.output_merger.blend_const.raw);
+
+    scheduler.Record([blend_color](vk::CommandBuffer render_cmdbuf, vk::CommandBuffer) {
+        render_cmdbuf.setBlendConstants(blend_color.AsArray());
+    });
+}
+
+void RasterizerVulkan::SyncFogColor() {
    const auto& regs = Pica::g_state.regs;
-    pipeline_info.dynamic.blend_color = regs.framebuffer.output_merger.blend_const.raw;
+    uniform_block_data.data.fog_color = {
+        regs.texturing.fog_color.r.Value() / 255.0f,
+        regs.texturing.fog_color.g.Value() / 255.0f,
+        regs.texturing.fog_color.b.Value() / 255.0f,
+    };
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerVulkan::SyncProcTexNoise() {
+    const auto& regs = Pica::g_state.regs.texturing;
+    uniform_block_data.data.proctex_noise_f = {
+        Pica::float16::FromRaw(regs.proctex_noise_frequency.u).ToFloat32(),
+        Pica::float16::FromRaw(regs.proctex_noise_frequency.v).ToFloat32(),
+    };
+    uniform_block_data.data.proctex_noise_a = {
+        regs.proctex_noise_u.amplitude / 4095.0f,
+        regs.proctex_noise_v.amplitude / 4095.0f,
+    };
+    uniform_block_data.data.proctex_noise_p = {
+        Pica::float16::FromRaw(regs.proctex_noise_u.phase).ToFloat32(),
+        Pica::float16::FromRaw(regs.proctex_noise_v.phase).ToFloat32(),
+    };
+
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerVulkan::SyncProcTexBias() {
+    const auto& regs = Pica::g_state.regs.texturing;
+    uniform_block_data.data.proctex_bias =
+        Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
+            .ToFloat32();
+
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerVulkan::SyncAlphaTest() {
+    const auto& regs = Pica::g_state.regs;
+    if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
+        uniform_block_data.data.alphatest_ref = regs.framebuffer.output_merger.alpha_test.ref;
+        uniform_block_data.dirty = true;
+    }
 }

 void RasterizerVulkan::SyncLogicOp() {
    const auto& regs = Pica::g_state.regs;
-
-    const bool is_logic_op_emulated =
-            instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
-    const bool is_logic_op_noop =
-            regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
-    if (is_logic_op_emulated && is_logic_op_noop) {
-        // Color output is disabled by logic operation. We use color write mask to skip
-        // color but allow depth write.
-        pipeline_info.blending.color_write_mask.Assign(0);
-    } else {
-        pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
-    }
+    pipeline_info.blending.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
 }

 void RasterizerVulkan::SyncColorWriteMask() {
    const auto& regs = Pica::g_state.regs;
    const u32 color_mask = (regs.framebuffer.output_merger.depth_color_mask >> 8) & 0xF;
-
-    const bool is_logic_op_emulated =
-            instance.NeedsLogicOpEmulation() && !regs.framebuffer.output_merger.alphablend_enable;
-    const bool is_logic_op_noop =
-            regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp;
-    if (is_logic_op_emulated && is_logic_op_noop) {
-        // Color output is disabled by logic operation. We use color write mask to skip
-        // color but allow depth write. Return early to avoid overwriting this.
-        return;
-    }
-
    pipeline_info.blending.color_write_mask.Assign(color_mask);
 }

 void RasterizerVulkan::SyncStencilWriteMask() {
    const auto& regs = Pica::g_state.regs;
-    pipeline_info.dynamic.stencil_write_mask =
+    pipeline_info.depth_stencil.stencil_write_mask =
        (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0)
            ? static_cast<u32>(regs.framebuffer.output_merger.stencil_test.write_mask)
            : 0;
@ -1672,8 +1831,8 @@ void RasterizerVulkan::SyncStencilTest() {
    pipeline_info.depth_stencil.stencil_pass_op.Assign(stencil_test.action_depth_pass);
    pipeline_info.depth_stencil.stencil_depth_fail_op.Assign(stencil_test.action_depth_fail);
    pipeline_info.depth_stencil.stencil_compare_op.Assign(stencil_test.func);
-    pipeline_info.dynamic.stencil_reference = stencil_test.reference_value;
-    pipeline_info.dynamic.stencil_compare_mask = stencil_test.input_mask;
+    pipeline_info.depth_stencil.stencil_reference = stencil_test.reference_value;
+    pipeline_info.depth_stencil.stencil_compare_mask = stencil_test.input_mask;
 }

 void RasterizerVulkan::SyncDepthTest() {
@ -1689,6 +1848,132 @@ void RasterizerVulkan::SyncDepthTest() {
    pipeline_info.depth_stencil.depth_compare_op.Assign(compare_op);
 }

+void RasterizerVulkan::SyncCombinerColor() {
+    auto combiner_color =
+        PicaToVK::ColorRGBA8(Pica::g_state.regs.texturing.tev_combiner_buffer_color.raw);
+    if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
+        uniform_block_data.data.tev_combiner_buffer_color = combiner_color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncTevConstColor(std::size_t stage_index,
+                                         const Pica::TexturingRegs::TevStageConfig& tev_stage) {
+    const auto const_color = PicaToVK::ColorRGBA8(tev_stage.const_color);
+
+    if (const_color == uniform_block_data.data.const_color[stage_index]) {
+        return;
+    }
+
+    uniform_block_data.data.const_color[stage_index] = const_color;
+    uniform_block_data.dirty = true;
+}
+
+void RasterizerVulkan::SyncGlobalAmbient() {
+    auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.global_ambient);
+    if (color != uniform_block_data.data.lighting_global_ambient) {
+        uniform_block_data.data.lighting_global_ambient = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightSpecular0(int light_index) {
+    auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
+    if (color != uniform_block_data.data.light_src[light_index].specular_0) {
+        uniform_block_data.data.light_src[light_index].specular_0 = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightSpecular1(int light_index) {
+    auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
+    if (color != uniform_block_data.data.light_src[light_index].specular_1) {
+        uniform_block_data.data.light_src[light_index].specular_1 = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightDiffuse(int light_index) {
+    auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
+    if (color != uniform_block_data.data.light_src[light_index].diffuse) {
+        uniform_block_data.data.light_src[light_index].diffuse = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightAmbient(int light_index) {
+    auto color = PicaToVK::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
+    if (color != uniform_block_data.data.light_src[light_index].ambient) {
+        uniform_block_data.data.light_src[light_index].ambient = color;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightPosition(int light_index) {
+    const Common::Vec3f position = {
+        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
+        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
+        Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32()};
+
+    if (position != uniform_block_data.data.light_src[light_index].position) {
+        uniform_block_data.data.light_src[light_index].position = position;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightSpotDirection(int light_index) {
+    const auto& light = Pica::g_state.regs.lighting.light[light_index];
+    const auto spot_direction = Common::Vec3i{light.spot_x, light.spot_y, light.spot_z} / 2047.0f;
+
+    if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
+        uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
+    float dist_atten_bias =
+        Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
+            .ToFloat32();
+
+    if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
+        uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
+    float dist_atten_scale =
+        Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
+            .ToFloat32();
+
+    if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
+        uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncShadowBias() {
+    const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
+    float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
+    float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
+
+    if (constant != uniform_block_data.data.shadow_bias_constant ||
+        linear != uniform_block_data.data.shadow_bias_linear) {
+        uniform_block_data.data.shadow_bias_constant = constant;
+        uniform_block_data.data.shadow_bias_linear = linear;
+        uniform_block_data.dirty = true;
+    }
+}
+
+void RasterizerVulkan::SyncShadowTextureBias() {
+    int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
+    if (bias != uniform_block_data.data.shadow_texture_bias) {
+        uniform_block_data.data.shadow_texture_bias = bias;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerVulkan::SyncAndUploadLUTsLF() {
    constexpr std::size_t max_size =
        sizeof(Common::Vec2f) * 256 * Pica::LightingRegs::NumLightingSampler +
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@ -4,11 +4,16 @@

 #pragma once

+#include "common/vector_math.h"
 #include "core/hw/gpu.h"
 #include "video_core/rasterizer_accelerated.h"
+#include "video_core/regs_lighting.h"
+#include "video_core/regs_texturing.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
 #include "video_core/renderer_vulkan/vk_texture_runtime.h"
+#include "video_core/shader/shader.h"
+#include "video_core/shader/shader_uniforms.h"

 namespace Frontend {
 class EmuWindow;
@ -83,6 +88,8 @@ public:
    void LoadDiskResources(const std::atomic_bool& stop_loading,
                           const VideoCore::DiskResourceLoadCallback& callback) override;

+    void AddTriangle(const Pica::Shader::OutputVertex& v0, const Pica::Shader::OutputVertex& v1,
+                     const Pica::Shader::OutputVertex& v2) override;
    void DrawTriangles() override;
    void NotifyPicaRegisterChanged(u32 id) override;
    void FlushAll() override;
@ -112,9 +119,18 @@ private:
    /// Syncs the clip coefficients to match the PICA register
    void SyncClipCoef();

+    /// Sets the OpenGL shader in accordance with the current PICA register state
+    void SetShader();
+
    /// Syncs the cull mode to match the PICA register
    void SyncCullMode();

+    /// Syncs the depth scale to match the PICA register
+    void SyncDepthScale();
+
+    /// Syncs the depth offset to match the PICA register
+    void SyncDepthOffset();
+
    /// Syncs the blend enabled status to match the PICA register
    void SyncBlendEnabled();

@ -124,6 +140,18 @@ private:
    /// Syncs the blend color to match the PICA register
    void SyncBlendColor();

+    /// Syncs the fog states to match the PICA register
+    void SyncFogColor();
+
+    /// Sync the procedural texture noise configuration to match the PICA register
+    void SyncProcTexNoise();
+
+    /// Sync the procedural texture bias configuration to match the PICA register
+    void SyncProcTexBias();
+
+    /// Syncs the alpha test states to match the PICA register
+    void SyncAlphaTest();
+
    /// Syncs the logic op states to match the PICA register
    void SyncLogicOp();

@ -142,6 +170,46 @@ private:
    /// Syncs the depth test states to match the PICA register
    void SyncDepthTest();

+    /// Syncs the TEV combiner color buffer to match the PICA register
+    void SyncCombinerColor();
+
+    /// Syncs the TEV constant color to match the PICA register
+    void SyncTevConstColor(std::size_t tev_index,
+                           const Pica::TexturingRegs::TevStageConfig& tev_stage);
+
+    /// Syncs the lighting global ambient color to match the PICA register
+    void SyncGlobalAmbient();
+
+    /// Syncs the specified light's specular 0 color to match the PICA register
+    void SyncLightSpecular0(int light_index);
+
+    /// Syncs the specified light's specular 1 color to match the PICA register
+    void SyncLightSpecular1(int light_index);
+
+    /// Syncs the specified light's diffuse color to match the PICA register
+    void SyncLightDiffuse(int light_index);
+
+    /// Syncs the specified light's ambient color to match the PICA register
+    void SyncLightAmbient(int light_index);
+
+    /// Syncs the specified light's position to match the PICA register
+    void SyncLightPosition(int light_index);
+
+    /// Syncs the specified spot light direcition to match the PICA register
+    void SyncLightSpotDirection(int light_index);
+
+    /// Syncs the specified light's distance attenuation bias to match the PICA register
+    void SyncLightDistanceAttenuationBias(int light_index);
+
+    /// Syncs the specified light's distance attenuation scale to match the PICA register
+    void SyncLightDistanceAttenuationScale(int light_index);
+
+    /// Syncs the shadow rendering bias to match the PICA register
+    void SyncShadowBias();
+
+    /// Syncs the shadow texture bias to match the PICA register
+    void SyncShadowTextureBias();
+
    /// Syncs and uploads the lighting, fog and proctex LUTs
    void SyncAndUploadLUTs();
    void SyncAndUploadLUTsLF();
@ -155,21 +223,27 @@ private:
    /// Internal implementation for AccelerateDrawBatch
    bool AccelerateDrawBatchInternal(bool is_indexed);

+    /// Copies vertex data performing needed convertions and casts
+    void PaddedVertexCopy(u32 stride, u32 vertex_num, u8* data);
+
+    struct VertexArrayInfo {
+        u32 vs_input_index_min;
+        u32 vs_input_index_max;
+        u32 vs_input_size;
+    };
+
+    /// Retrieve the range and the size of the input vertex
+    VertexArrayInfo AnalyzeVertexArray(bool is_indexed);
+
    /// Setup vertex array for AccelerateDrawBatch
    void SetupVertexArray(u32 vs_input_size, u32 vs_input_index_min, u32 vs_input_index_max);

-    /// Setup the fixed attribute emulation in vulkan
-    void SetupFixedAttribs();
-
    /// Setup vertex shader for AccelerateDrawBatch
    bool SetupVertexShader();

    /// Setup geometry shader for AccelerateDrawBatch
    bool SetupGeometryShader();

-    /// Creates the vertex layout struct used for software shader pipelines
-    void MakeSoftwareVertexLayout();
-
    /// Creates a new sampler object
    vk::Sampler CreateSampler(const SamplerInfo& info);

@ -184,14 +258,44 @@ private:
    DescriptorManager& desc_manager;
    RasterizerCache res_cache;
    PipelineCache pipeline_cache;
+    bool shader_dirty = true;

-    VertexLayout software_layout;
+    /// Structure that the hardware rendered vertices are composed of
+    struct HardwareVertex {
+        HardwareVertex() = default;
+        HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion);
+
+        constexpr static VertexLayout GetVertexLayout();
+
+        Common::Vec4f position;
+        Common::Vec4f color;
+        Common::Vec2f tex_coord0;
+        Common::Vec2f tex_coord1;
+        Common::Vec2f tex_coord2;
+        float tex_coord0_w;
+        Common::Vec4f normquat;
+        Common::Vec3f view;
+    };
+
+    std::vector<HardwareVertex> vertex_batch;
    std::array<u64, 16> binding_offsets{};
-    std::array<bool, 16> enable_attributes{};
    vk::Sampler default_sampler;
    Surface null_surface;
    Surface null_storage_surface;

+    struct {
+        Pica::Shader::UniformData data{};
+        std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty{};
+        bool lighting_lut_dirty_any = true;
+        bool fog_lut_dirty = true;
+        bool proctex_noise_lut_dirty = true;
+        bool proctex_color_map_dirty = true;
+        bool proctex_alpha_map_dirty = true;
+        bool proctex_lut_dirty = true;
+        bool proctex_diff_lut_dirty = true;
+        bool dirty = true;
+    } uniform_block_data = {};
+
    std::array<SamplerInfo, 3> texture_samplers;
    SamplerInfo texture_cube_sampler;
    std::unordered_map<SamplerInfo, vk::Sampler> samplers;
@ -206,6 +310,15 @@ private:
    std::size_t uniform_buffer_alignment;
    std::size_t uniform_size_aligned_vs;
    std::size_t uniform_size_aligned_fs;
+
+    std::array<std::array<Common::Vec2f, 256>, Pica::LightingRegs::NumLightingSampler>
+        lighting_lut_data{};
+    std::array<Common::Vec2f, 128> fog_lut_data{};
+    std::array<Common::Vec2f, 128> proctex_noise_lut_data{};
+    std::array<Common::Vec2f, 128> proctex_color_map_data{};
+    std::array<Common::Vec2f, 128> proctex_alpha_map_data{};
+    std::array<Common::Vec4f, 256> proctex_lut_data{};
+    std::array<Common::Vec4f, 256> proctex_diff_lut_data{};
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_resource_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@ -121,12 +121,11 @@ void DescriptorPool::RefreshTick() {
 }

 void DescriptorPool::Allocate(std::size_t begin, std::size_t end) {
-    LOG_INFO(Render_Vulkan, "Allocating new descriptor pool");
    vk::DescriptorPool& pool = pools.emplace_back();

    // Choose a sane pool size good for most games
    static constexpr std::array<vk::DescriptorPoolSize, 5> pool_sizes = {{
-        {vk::DescriptorType::eUniformBuffer, 4096},
+        {vk::DescriptorType::eUniformBuffer, 2048},
        {vk::DescriptorType::eSampledImage, 4096},
        {vk::DescriptorType::eSampler, 4096},
        {vk::DescriptorType::eUniformTexelBuffer, 2048},
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@ -4,7 +4,7 @@
 #include <mutex>
 #include <utility>
 #include "common/microprofile.h"
-#include "core/settings.h"
+#include "common/thread.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
@ -25,29 +25,14 @@ void Scheduler::CommandChunk::ExecuteAll(vk::CommandBuffer render_cmdbuf, vk::Co
    last = nullptr;
 }

-Scheduler::Scheduler(const Instance& instance, RenderpassCache& renderpass_cache, RendererVulkan& renderer)
-    : instance{instance}, renderpass_cache{renderpass_cache}, renderer{renderer}, master_semaphore{instance},
-      command_pool{instance, master_semaphore}, stop_requested{false},
-      use_worker_thread{Settings::values.async_command_recording} {
+Scheduler::Scheduler(const Instance& instance, RendererVulkan& renderer)
+    : instance{instance}, renderer{renderer}, master_semaphore{instance}, command_pool{instance, master_semaphore} {
+    AcquireNewChunk();
    AllocateWorkerCommandBuffers();
-    if (use_worker_thread) {
-        AcquireNewChunk();
-        worker_thread = std::thread([this]() { WorkerThread(); });
-    }
+    worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); });
 }

-Scheduler::~Scheduler() {
-    stop_requested = true;
-
-    // Push a dummy chunk to unblock the thread
-    {
-        std::scoped_lock lock{work_mutex};
-        work_queue.push(std::move(chunk));
-    }
-
-    work_cv.notify_one();
-    worker_thread.join();
-}
+Scheduler::~Scheduler() = default;

 void Scheduler::Flush(vk::Semaphore signal, vk::Semaphore wait) {
    SubmitExecution(signal, wait);
@ -62,10 +47,6 @@ void Scheduler::Finish(vk::Semaphore signal, vk::Semaphore wait) {

 MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
 void Scheduler::WaitWorker() {
-    if (!use_worker_thread) {
-        return;
-    }
-
    MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
    DispatchWork();

@ -87,7 +68,9 @@ void Scheduler::DispatchWork() {
    AcquireNewChunk();
 }

-void Scheduler::WorkerThread() {
+void Scheduler::WorkerThread(std::stop_token stop_token) {
+    Common::SetCurrentThreadName("Vulkan Worker Thread");
+
    do {
        std::unique_ptr<CommandChunk> work;
        bool has_submit{false};
@ -96,8 +79,8 @@ void Scheduler::WorkerThread() {
            if (work_queue.empty()) {
                wait_cv.notify_all();
            }
-            work_cv.wait(lock, [this] { return !work_queue.empty() || stop_requested; });
-            if (stop_requested) {
+            work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); });
+            if (stop_token.stop_requested()) {
                continue;
            }
            work = std::move(work_queue.front());
@ -111,7 +94,7 @@ void Scheduler::WorkerThread() {
        }
        std::scoped_lock reserve_lock{reserve_mutex};
        chunk_reserve.push_back(std::move(work));
-    } while (!stop_requested);
+    } while (!stop_token.stop_requested());
 }

 void Scheduler::AllocateWorkerCommandBuffers() {
@ -126,16 +109,13 @@ void Scheduler::AllocateWorkerCommandBuffers() {
    render_cmdbuf.begin(begin_info);
 }

-MICROPROFILE_DEFINE(Vulkan_Submit, "Vulkan", "Submit Exectution", MP_RGB(255, 192, 255));
 void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wait_semaphore) {
    renderer.FlushBuffers();
    const u64 signal_value = master_semaphore.NextTick();
    state = StateFlags::AllDirty;

-    renderpass_cache.ExitRenderpass();
    Record([signal_semaphore, wait_semaphore, signal_value, this]
           (vk::CommandBuffer render_cmdbuf, vk::CommandBuffer upload_cmdbuf) {
-        MICROPROFILE_SCOPE(Vulkan_Submit);
        upload_cmdbuf.end();
        render_cmdbuf.end();

@ -182,12 +162,8 @@ void Scheduler::SubmitExecution(vk::Semaphore signal_semaphore, vk::Semaphore wa
        }
    });

-    if (!use_worker_thread) {
-        AllocateWorkerCommandBuffers();
-    } else {
-        chunk->MarkSubmit();
-        DispatchWork();
-    }
+    chunk->MarkSubmit();
+    DispatchWork();
 }

 void Scheduler::AcquireNewChunk() {
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@ -27,15 +27,13 @@ enum class StateFlags {
 DECLARE_ENUM_FLAG_OPERATORS(StateFlags)

 class Instance;
-class RenderpassCache;
 class RendererVulkan;

 /// The scheduler abstracts command buffer and fence management with an interface that's able to do
 /// OpenGL-like operations on Vulkan command buffers.
 class Scheduler {
 public:
-    explicit Scheduler(const Instance& instance, RenderpassCache& renderpass_cache,
-                       RendererVulkan& renderer);
+    explicit Scheduler(const Instance& instance, RendererVulkan& renderer);
    ~Scheduler();

    /// Sends the current execution context to the GPU.
@ -54,11 +52,6 @@ public:
    /// Records the command to the current chunk.
    template <typename T>
    void Record(T&& command) {
-        if (!use_worker_thread) {
-            command(render_cmdbuf, upload_cmdbuf);
-            return;
-        }
-
        if (chunk->Record(command)) {
            return;
        }
@ -185,7 +178,7 @@ private:
    };

 private:
-    void WorkerThread();
+    void WorkerThread(std::stop_token stop_token);

    void AllocateWorkerCommandBuffers();

@ -195,7 +188,6 @@ private:

 private:
    const Instance& instance;
-    RenderpassCache& renderpass_cache;
    RendererVulkan& renderer;
    MasterSemaphore master_semaphore;
    CommandPool command_pool;
@ -209,9 +201,7 @@ private:
    std::mutex work_mutex;
    std::condition_variable_any work_cv;
    std::condition_variable wait_cv;
-    std::thread worker_thread;
-    std::atomic_bool stop_requested;
-    bool use_worker_thread;
+    std::jthread worker_thread;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
@ -8,10 +8,8 @@
 #include "common/logging/log.h"
 #include "core/core.h"
 #include "video_core/pica_state.h"
-#include "video_core/regs_framebuffer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_shader_gen.h"
-#include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/video_core.h"

 using Pica::FramebufferRegs;
@ -101,29 +99,25 @@ out gl_PerVertex {
    return out;
 }

-PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
-    state.scissor_test_mode.Assign(regs.rasterizer.scissor_test.mode);
+PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
+    PicaFSConfig res{};

-    state.depthmap_enable.Assign(regs.rasterizer.depthmap_enable);
+    auto& state = res.state;

-    state.alpha_test_func.Assign(regs.framebuffer.output_merger.alpha_test.enable
+    state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
+
+    state.depthmap_enable = regs.rasterizer.depthmap_enable;
+
+    state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
                                ? regs.framebuffer.output_merger.alpha_test.func.Value()
-                                : FramebufferRegs::CompareFunc::Always);
+                                : FramebufferRegs::CompareFunc::Always;

-    state.texture0_type.Assign(regs.texturing.texture0.type);
+    state.texture0_type = regs.texturing.texture0.type;

-    state.texture2_use_coord1.Assign(regs.texturing.main_config.texture2_use_coord1 != 0);
+    state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;

-    // Emulate logic op in the shader if not supported. This is mostly for mobile GPUs
-    const bool emulate_logic_op = instance.NeedsLogicOpEmulation() &&
-            !Pica::g_state.regs.framebuffer.output_merger.alphablend_enable;
-
-    state.emulate_logic_op.Assign(emulate_logic_op);
-    if (emulate_logic_op) {
-        state.logic_op.Assign(regs.framebuffer.output_merger.logic_op);
-    } else {
-        state.logic_op.Assign(Pica::FramebufferRegs::LogicOp::NoOp);
-    }
+    state.alphablend_enable = {};
+    state.logic_op = {};

    // Copy relevant tev stages fields.
    // We don't sync const_color here because of the high variance, it is a
@ -138,95 +132,95 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
        state.tev_stages[i].scales_raw = tev_stage.scales_raw;
    }

-    state.fog_mode.Assign(regs.texturing.fog_mode);
-    state.fog_flip.Assign(regs.texturing.fog_flip != 0);
+    state.fog_mode = regs.texturing.fog_mode;
+    state.fog_flip = regs.texturing.fog_flip != 0;

-    state.combiner_buffer_input.Assign(regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
+    state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
                                  regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
-                                      << 4);
+                                      << 4;

    // Fragment lighting

-    state.lighting.enable.Assign(!regs.lighting.disable);
-    state.lighting.src_num.Assign(regs.lighting.max_light_index + 1);
+    state.lighting.enable = !regs.lighting.disable;
+    state.lighting.src_num = regs.lighting.max_light_index + 1;

-    for (u32 light_index = 0; light_index < state.lighting.src_num; ++light_index) {
-        u32 num = regs.lighting.light_enable.GetNum(light_index);
+    for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
+        unsigned num = regs.lighting.light_enable.GetNum(light_index);
        const auto& light = regs.lighting.light[num];
-        state.lighting.light[light_index].num.Assign(num);
-        state.lighting.light[light_index].directional.Assign(light.config.directional != 0);
-        state.lighting.light[light_index].two_sided_diffuse.Assign(light.config.two_sided_diffuse != 0);
-        state.lighting.light[light_index].geometric_factor_0.Assign(light.config.geometric_factor_0 != 0);
-        state.lighting.light[light_index].geometric_factor_1.Assign(light.config.geometric_factor_1 != 0);
-        state.lighting.light[light_index].dist_atten_enable.Assign(
-            !regs.lighting.IsDistAttenDisabled(num));
-        state.lighting.light[light_index].spot_atten_enable.Assign(
-            !regs.lighting.IsSpotAttenDisabled(num));
-        state.lighting.light[light_index].shadow_enable.Assign(!regs.lighting.IsShadowDisabled(num));
+        state.lighting.light[light_index].num = num;
+        state.lighting.light[light_index].directional = light.config.directional != 0;
+        state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
+        state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
+        state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
+        state.lighting.light[light_index].dist_atten_enable =
+            !regs.lighting.IsDistAttenDisabled(num);
+        state.lighting.light[light_index].spot_atten_enable =
+            !regs.lighting.IsSpotAttenDisabled(num);
+        state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
    }

-    state.lighting.lut_d0.enable.Assign(regs.lighting.config1.disable_lut_d0 == 0);
-    state.lighting.lut_d0.abs_input.Assign(regs.lighting.abs_lut_input.disable_d0 == 0);
-    state.lighting.lut_d0.type.Assign(regs.lighting.lut_input.d0.Value());
+    state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
+    state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
+    state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
    state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);

-    state.lighting.lut_d1.enable.Assign(regs.lighting.config1.disable_lut_d1 == 0);
-    state.lighting.lut_d1.abs_input.Assign(regs.lighting.abs_lut_input.disable_d1 == 0);
-    state.lighting.lut_d1.type.Assign(regs.lighting.lut_input.d1.Value());
+    state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
+    state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
+    state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
    state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);

    // this is a dummy field due to lack of the corresponding register
-    state.lighting.lut_sp.enable.Assign(1);
-    state.lighting.lut_sp.abs_input.Assign(regs.lighting.abs_lut_input.disable_sp == 0);
-    state.lighting.lut_sp.type.Assign(regs.lighting.lut_input.sp.Value());
+    state.lighting.lut_sp.enable = true;
+    state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
+    state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
    state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);

-    state.lighting.lut_fr.enable.Assign(regs.lighting.config1.disable_lut_fr == 0);
-    state.lighting.lut_fr.abs_input.Assign(regs.lighting.abs_lut_input.disable_fr == 0);
-    state.lighting.lut_fr.type.Assign(regs.lighting.lut_input.fr.Value());
+    state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
+    state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
+    state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
    state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);

-    state.lighting.lut_rr.enable.Assign(regs.lighting.config1.disable_lut_rr == 0);
-    state.lighting.lut_rr.abs_input.Assign(regs.lighting.abs_lut_input.disable_rr == 0);
-    state.lighting.lut_rr.type.Assign(regs.lighting.lut_input.rr.Value());
+    state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
+    state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
+    state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
    state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);

-    state.lighting.lut_rg.enable.Assign(regs.lighting.config1.disable_lut_rg == 0);
-    state.lighting.lut_rg.abs_input.Assign(regs.lighting.abs_lut_input.disable_rg == 0);
-    state.lighting.lut_rg.type.Assign(regs.lighting.lut_input.rg.Value());
+    state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
+    state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
+    state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
    state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);

-    state.lighting.lut_rb.enable.Assign(regs.lighting.config1.disable_lut_rb == 0);
-    state.lighting.lut_rb.abs_input.Assign(regs.lighting.abs_lut_input.disable_rb == 0);
-    state.lighting.lut_rb.type.Assign(regs.lighting.lut_input.rb.Value());
+    state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
+    state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
+    state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
    state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);

-    state.lighting.config.Assign(regs.lighting.config0.config);
-    state.lighting.enable_primary_alpha.Assign(regs.lighting.config0.enable_primary_alpha);
-    state.lighting.enable_secondary_alpha.Assign(regs.lighting.config0.enable_secondary_alpha);
-    state.lighting.bump_mode.Assign(regs.lighting.config0.bump_mode);
-    state.lighting.bump_selector.Assign(regs.lighting.config0.bump_selector);
-    state.lighting.bump_renorm.Assign(regs.lighting.config0.disable_bump_renorm == 0);
-    state.lighting.clamp_highlights.Assign(regs.lighting.config0.clamp_highlights != 0);
+    state.lighting.config = regs.lighting.config0.config;
+    state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
+    state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
+    state.lighting.bump_mode = regs.lighting.config0.bump_mode;
+    state.lighting.bump_selector = regs.lighting.config0.bump_selector;
+    state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
+    state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;

-    state.lighting.enable_shadow.Assign(regs.lighting.config0.enable_shadow != 0);
-    state.lighting.shadow_primary.Assign(regs.lighting.config0.shadow_primary != 0);
-    state.lighting.shadow_secondary.Assign(regs.lighting.config0.shadow_secondary != 0);
-    state.lighting.shadow_invert.Assign(regs.lighting.config0.shadow_invert != 0);
-    state.lighting.shadow_alpha.Assign(regs.lighting.config0.shadow_alpha != 0);
-    state.lighting.shadow_selector.Assign(regs.lighting.config0.shadow_selector);
+    state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
+    state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
+    state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
+    state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
+    state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
+    state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;

-    state.proctex.enable.Assign(regs.texturing.main_config.texture3_enable);
+    state.proctex.enable = regs.texturing.main_config.texture3_enable;
    if (state.proctex.enable) {
-        state.proctex.coord.Assign(regs.texturing.main_config.texture3_coordinates);
-        state.proctex.u_clamp.Assign(regs.texturing.proctex.u_clamp);
-        state.proctex.v_clamp.Assign(regs.texturing.proctex.v_clamp);
-        state.proctex.color_combiner.Assign(regs.texturing.proctex.color_combiner);
-        state.proctex.alpha_combiner.Assign(regs.texturing.proctex.alpha_combiner);
-        state.proctex.separate_alpha.Assign(regs.texturing.proctex.separate_alpha);
-        state.proctex.noise_enable.Assign(regs.texturing.proctex.noise_enable);
-        state.proctex.u_shift.Assign(regs.texturing.proctex.u_shift);
-        state.proctex.v_shift.Assign(regs.texturing.proctex.v_shift);
+        state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
+        state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
+        state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
+        state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
+        state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
+        state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
+        state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
+        state.proctex.u_shift = regs.texturing.proctex.u_shift;
+        state.proctex.v_shift = regs.texturing.proctex.v_shift;
        state.proctex.lut_width = regs.texturing.proctex_lut.width;
        state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
        state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
@ -234,16 +228,17 @@ PicaFSConfig::PicaFSConfig(const Pica::Regs& regs, const Instance& instance) {
        state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
        state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
        state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
-        state.proctex.lut_filter.Assign(regs.texturing.proctex_lut.filter);
+        state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
    }

-    state.shadow_rendering.Assign(regs.framebuffer.output_merger.fragment_operation_mode ==
-                             FramebufferRegs::FragmentOperationMode::Shadow);
+    state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
+                             FramebufferRegs::FragmentOperationMode::Shadow;

-    state.shadow_texture_orthographic.Assign(regs.texturing.shadow.orthographic != 0);
+    state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
+
+    return res;
 }

-
 void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
    program_hash = setup.GetProgramCodeHash();
    swizzle_hash = setup.GetSwizzleDataHash();
@ -498,33 +493,33 @@ static void AppendColorCombiner(std::string& out, TevStageConfig::Operation oper
    using Operation = TevStageConfig::Operation;
    switch (operation) {
    case Operation::Replace:
-        out += "color_results_1";
+        out += fmt::format("{}[0]", variable_name);
        break;
    case Operation::Modulate:
-        out += "color_results_1 * color_results_2";
+        out += fmt::format("{0}[0] * {0}[1]", variable_name);
        break;
    case Operation::Add:
-        out += "color_results_1 + color_results_2";
+        out += fmt::format("{0}[0] + {0}[1]", variable_name);
        break;
    case Operation::AddSigned:
-        out += "color_results_1 + color_results_2 - vec3(0.5)";
+        out += fmt::format("{0}[0] + {0}[1] - vec3(0.5)", variable_name);
        break;
    case Operation::Lerp:
-        out += "color_results_1 * color_results_3 + color_results_2 * (vec3(1.0) - color_results_3)";
+        out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (vec3(1.0) - {0}[2])", variable_name);
        break;
    case Operation::Subtract:
-        out += "color_results_1 - color_results_2";
+        out += fmt::format("{0}[0] - {0}[1]", variable_name);
        break;
    case Operation::MultiplyThenAdd:
-        out += "color_results_1 * color_results_2 + color_results_3";
+        out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
        break;
    case Operation::AddThenMultiply:
-        out += "min(color_results_1 + color_results_2, vec3(1.0)) * color_results_3";
+        out += fmt::format("min({0}[0] + {0}[1], vec3(1.0)) * {0}[2]", variable_name);
        break;
    case Operation::Dot3_RGB:
    case Operation::Dot3_RGBA:
        out +=
-            "vec3(dot(color_results_1 - vec3(0.5), color_results_2 - vec3(0.5)) * 4.0)";
+            fmt::format("vec3(dot({0}[0] - vec3(0.5), {0}[1] - vec3(0.5)) * 4.0)", variable_name);
        break;
    default:
        out += "vec3(0.0)";
@ -541,28 +536,28 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
    using Operation = TevStageConfig::Operation;
    switch (operation) {
    case Operation::Replace:
-        out += "alpha_results_1";
+        out += fmt::format("{}[0]", variable_name);
        break;
    case Operation::Modulate:
-        out += "alpha_results_1 * alpha_results_2";
+        out += fmt::format("{0}[0] * {0}[1]", variable_name);
        break;
    case Operation::Add:
-        out += "alpha_results_1 + alpha_results_2";
+        out += fmt::format("{0}[0] + {0}[1]", variable_name);
        break;
    case Operation::AddSigned:
-        out += "alpha_results_1 + alpha_results_2 - 0.5";
+        out += fmt::format("{0}[0] + {0}[1] - 0.5", variable_name);
        break;
    case Operation::Lerp:
-        out += "alpha_results_1 * alpha_results_3 + alpha_results_2 * (1.0 - alpha_results_3)";
+        out += fmt::format("{0}[0] * {0}[2] + {0}[1] * (1.0 - {0}[2])", variable_name);
        break;
    case Operation::Subtract:
-        out += "alpha_results_1 - alpha_results_2";
+        out += fmt::format("{0}[0] - {0}[1]", variable_name);
        break;
    case Operation::MultiplyThenAdd:
-        out += "alpha_results_1 * alpha_results_2 + alpha_results_3";
+        out += fmt::format("{0}[0] * {0}[1] + {0}[2]", variable_name);
        break;
    case Operation::AddThenMultiply:
-        out += "min(alpha_results_1 + alpha_results_2, 1.0) * alpha_results_3";
+        out += fmt::format("min({0}[0] + {0}[1], 1.0) * {0}[2]", variable_name);
        break;
    default:
        out += "0.0";
@ -608,34 +603,38 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned
    if (!IsPassThroughTevStage(stage)) {
        const std::string index_name = std::to_string(index);

-        out += fmt::format("color_results_1 = ", index_name);
+        out += fmt::format("vec3 color_results_{}_1 = ", index_name);
        AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name);
-        out += fmt::format(";\ncolor_results_2 = ", index_name);
+        out += fmt::format(";\nvec3 color_results_{}_2 = ", index_name);
        AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name);
-        out += fmt::format(";\ncolor_results_3 = ", index_name);
+        out += fmt::format(";\nvec3 color_results_{}_3 = ", index_name);
        AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name);
+        out += fmt::format(";\nvec3 color_results_{}[3] = vec3[3](color_results_{}_1, "
+                           "color_results_{}_2, color_results_{}_3);\n",
+                           index_name, index_name, index_name, index_name);

        // Round the output of each TEV stage to maintain the PICA's 8 bits of precision
-        out += fmt::format(";\nvec3 color_output_{} = byteround(", index_name);
-        AppendColorCombiner(out, stage.color_op, "color_results");
+        out += fmt::format("vec3 color_output_{} = byteround(", index_name);
+        AppendColorCombiner(out, stage.color_op, "color_results_" + index_name);
        out += ");\n";

        if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
            // result of Dot3_RGBA operation is also placed to the alpha component
            out += fmt::format("float alpha_output_{0} = color_output_{0}[0];\n", index_name);
        } else {
-            out += fmt::format("alpha_results_1 = ", index_name);
+            out += fmt::format("float alpha_results_{}[3] = float[3](", index_name);
            AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1,
                                index_name);
-            out += fmt::format(";\nalpha_results_2 = ", index_name);
+            out += ", ";
            AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2,
                                index_name);
-            out += fmt::format(";\nalpha_results_3 = ", index_name);
+            out += ", ";
            AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3,
                                index_name);
+            out += ");\n";

-            out += fmt::format(";\nfloat alpha_output_{} = byteround(", index_name);
-            AppendAlphaCombiner(out, stage.alpha_op, "alpha_results");
+            out += fmt::format("float alpha_output_{} = byteround(", index_name);
+            AppendAlphaCombiner(out, stage.alpha_op, "alpha_results_" + index_name);
            out += ");\n";
        }

@ -1471,14 +1470,6 @@ vec4 secondary_fragment_color = vec4(0.0);
           "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"
           "vec4 last_tex_env_out = vec4(0.0);\n";

-    out += "vec3 color_results_1 = vec3(0.0);\n"
-           "vec3 color_results_2 = vec3(0.0);\n"
-           "vec3 color_results_3 = vec3(0.0);\n";
-
-    out += "float alpha_results_1 = 0.0;\n"
-           "float alpha_results_2 = 0.0;\n"
-           "float alpha_results_3 = 0.0;\n";
-
    for (std::size_t index = 0; index < state.tev_stages.size(); ++index) {
        WriteTevStage(out, config, static_cast<u32>(index));
    }
@ -1547,30 +1538,6 @@ do {
        out += "color = byteround(last_tex_env_out);\n";
    }

-    if (state.emulate_logic_op) {
-        switch (state.logic_op) {
-        case FramebufferRegs::LogicOp::Clear:
-            out += "color = vec4(0);\n";
-            break;
-        case FramebufferRegs::LogicOp::Set:
-            out += "color = vec4(1);\n";
-            break;
-        case FramebufferRegs::LogicOp::Copy:
-            // Take the color output as-is
-            break;
-        case FramebufferRegs::LogicOp::CopyInverted:
-            out += "color = ~color;\n";
-            break;
-        case FramebufferRegs::LogicOp::NoOp:
-            // We need to discard the color, but not necessarily the depth. This is not possible
-            // with fragment shader alone, so we emulate this behavior with the color mask.
-            break;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unhandled logic_op {:x}", static_cast<u32>(state.logic_op.Value()));
-            UNIMPLEMENTED();
-        }
-    }
-
    out += '}';
    return out;
 }
@ -1605,7 +1572,6 @@ void main() {
    normquat = vert_normquat;
    view = vert_view;
    gl_Position = vert_position;
-    gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;

    gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
    if (enable_clip1) {
@ -1661,18 +1627,18 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
        if (used_regs[i]) {
            std::string_view prefix;
            switch (config.state.attrib_types[i]) {
-            case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
+            case AttribType::Float:
                prefix = "";
                break;
-            case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
-            case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
+            case AttribType::Byte:
+            case AttribType::Short:
                prefix = "i";
                break;
-            case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
+            case AttribType::Ubyte:
                prefix = "u";
                break;
            default:
-                LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
+                LOG_CRITICAL(Render_Vulkan, "Unknown attrib type {}", config.state.attrib_types[i]);
                UNREACHABLE();
            }

@ -1680,42 +1646,12 @@ layout (set = 0, binding = 0, std140) uniform vs_config {
                fmt::format("layout(location = {0}) in {1}vec4 vs_in_typed_reg{0};\n", i, prefix);
        }
    }
-
-    // Some 3-component attributes might be emulated by breaking them to vec2 + scalar.
-    // Define them here and combine them below
-    for (std::size_t i = 0; i < used_regs.size(); ++i) {
-        if (const u32 location = config.state.emulated_attrib_locations[i]; location != 0 && used_regs[i]) {
-            std::string_view type;
-            switch (config.state.attrib_types[i]) {
-            case Pica::PipelineRegs::VertexAttributeFormat::FLOAT:
-                type = "float";
-                break;
-            case Pica::PipelineRegs::VertexAttributeFormat::BYTE:
-            case Pica::PipelineRegs::VertexAttributeFormat::SHORT:
-                type = "int";
-                break;
-            case Pica::PipelineRegs::VertexAttributeFormat::UBYTE:
-                type = "uint";
-                break;
-            default:
-                LOG_CRITICAL(Render_Vulkan, "Unknown attrib format {}", config.state.attrib_types[i]);
-                UNREACHABLE();
-            }
-
-            out += fmt::format("layout(location = {}) in {} vs_in_typed_reg{}_part2;\n", location, type, i);
-        }
-    }
-
    out += '\n';

    // cast input registers to float to avoid computational errors
    for (std::size_t i = 0; i < used_regs.size(); ++i) {
        if (used_regs[i]) {
-            if (config.state.emulated_attrib_locations[i] != 0) {
-                out += fmt::format("vec4 vs_in_reg{0} = vec4(vec2(vs_in_typed_reg{0}), float(vs_in_typed_reg{0}_part2), 0.f);\n", i);
-            } else {
-                out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
-            }
+            out += fmt::format("vec4 vs_in_reg{0} = vec4(vs_in_typed_reg{0});\n", i);
        }
    }
    out += '\n';
@ -1775,7 +1711,6 @@ struct Vertex {
           semantic(VSOutputAttributes::POSITION_Z) + ", " +
           semantic(VSOutputAttributes::POSITION_W) + ");\n";
    out += "    gl_Position = vtx_pos;\n";
-    out += "    gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;\n";
    out += "#if !defined(CITRA_GLES) || defined(GL_EXT_clip_cull_distance)\n";
    out += "    gl_ClipDistance[0] = -vtx_pos.z;\n"; // fixed PICA clipping plane z <= 0
    out += "    gl_ClipDistance[1] = dot(clip_coef, vtx_pos);\n";
--- a/src/video_core/renderer_vulkan/vk_shader_gen.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.h
@ -8,12 +8,11 @@
 #include <optional>
 #include "common/hash.h"
 #include "video_core/regs.h"
-#include "video_core/regs_pipeline.h"
 #include "video_core/shader/shader.h"

 namespace Vulkan {

-class Instance;
+enum class AttribType : u32 { Float = 0, Int = 1, Short = 2, Byte = 3, Ubyte = 4 };

 enum Attributes {
    ATTRIBUTE_POSITION,
@ -44,85 +43,77 @@ struct TevStageConfigRaw {
 };

 struct PicaFSConfigState {
-    union {
-        BitField<0, 3, Pica::FramebufferRegs::CompareFunc> alpha_test_func;
-        BitField<3, 2, Pica::RasterizerRegs::ScissorMode> scissor_test_mode;
-        BitField<5, 3, Pica::TexturingRegs::TextureConfig::TextureType> texture0_type;
-        BitField<8, 1, u32> texture2_use_coord1;
-        BitField<9, 8, u32> combiner_buffer_input;
-        BitField<17, 1, Pica::RasterizerRegs::DepthBuffering> depthmap_enable;
-        BitField<18, 3, Pica::TexturingRegs::FogMode> fog_mode;
-        BitField<21, 1, u32> fog_flip;
-        BitField<22, 1, u32> emulate_logic_op;
-        BitField<23, 4, Pica::FramebufferRegs::LogicOp> logic_op;
-        BitField<27, 1, u32> shadow_rendering;
-        BitField<28, 1, u32> shadow_texture_orthographic;
-    };
-
+    Pica::FramebufferRegs::CompareFunc alpha_test_func;
+    Pica::RasterizerRegs::ScissorMode scissor_test_mode;
+    Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
+    bool texture2_use_coord1;
    std::array<TevStageConfigRaw, 6> tev_stages;
+    u8 combiner_buffer_input;
+
+    Pica::RasterizerRegs::DepthBuffering depthmap_enable;
+    Pica::TexturingRegs::FogMode fog_mode;
+    bool fog_flip;
+    bool alphablend_enable;
+    Pica::FramebufferRegs::LogicOp logic_op;

    struct {
-        union {
-            BitField<0, 3, u16> num;
-            BitField<3, 1, u16> directional;
-            BitField<4, 1, u16> two_sided_diffuse;
-            BitField<5, 1, u16> dist_atten_enable;
-            BitField<6, 1, u16> spot_atten_enable;
-            BitField<7, 1, u16> geometric_factor_0;
-            BitField<8, 1, u16> geometric_factor_1;
-            BitField<9, 1, u16> shadow_enable;
+        struct {
+            unsigned num;
+            bool directional;
+            bool two_sided_diffuse;
+            bool dist_atten_enable;
+            bool spot_atten_enable;
+            bool geometric_factor_0;
+            bool geometric_factor_1;
+            bool shadow_enable;
        } light[8];

-        union {
-            BitField<0, 1, u32> enable;
-            BitField<1, 4, u32> src_num;
-            BitField<5, 2, Pica::LightingRegs::LightingBumpMode> bump_mode;
-            BitField<7, 2, u32> bump_selector;
-            BitField<9, 1, u32> bump_renorm;
-            BitField<10, 1, u32> clamp_highlights;
-            BitField<11, 4, Pica::LightingRegs::LightingConfig> config;
-            BitField<15, 1, u32> enable_primary_alpha;
-            BitField<16, 1, u32> enable_secondary_alpha;
-            BitField<17, 1, u32> enable_shadow;
-            BitField<18, 1, u32> shadow_primary;
-            BitField<19, 1, u32> shadow_secondary;
-            BitField<20, 1, u32> shadow_invert;
-            BitField<21, 1, u32> shadow_alpha;
-            BitField<22, 2, u32> shadow_selector;
-        };
+        bool enable;
+        unsigned src_num;
+        Pica::LightingRegs::LightingBumpMode bump_mode;
+        unsigned bump_selector;
+        bool bump_renorm;
+        bool clamp_highlights;
+
+        Pica::LightingRegs::LightingConfig config;
+        bool enable_primary_alpha;
+        bool enable_secondary_alpha;
+
+        bool enable_shadow;
+        bool shadow_primary;
+        bool shadow_secondary;
+        bool shadow_invert;
+        bool shadow_alpha;
+        unsigned shadow_selector;

        struct {
-            union {
-                BitField<0, 1, u32> enable;
-                BitField<1, 1, u32> abs_input;
-                BitField<2, 3, Pica::LightingRegs::LightingLutInput> type;
-            };
+            bool enable;
+            bool abs_input;
+            Pica::LightingRegs::LightingLutInput type;
            float scale;
        } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
    } lighting;

    struct {
-        union {
-            BitField<0, 1, u32> enable;
-            BitField<1, 2, u32> coord;
-            BitField<3, 3, Pica::TexturingRegs::ProcTexClamp> u_clamp;
-            BitField<6, 3, Pica::TexturingRegs::ProcTexClamp> v_clamp;
-            BitField<9, 4, Pica::TexturingRegs::ProcTexCombiner> color_combiner;
-            BitField<13, 4, Pica::TexturingRegs::ProcTexCombiner> alpha_combiner;
-            BitField<17, 3, Pica::TexturingRegs::ProcTexFilter> lut_filter;
-            BitField<20, 1, u32> separate_alpha;
-            BitField<21, 1, u32> noise_enable;
-            BitField<22, 2, Pica::TexturingRegs::ProcTexShift> u_shift;
-            BitField<24, 2, Pica::TexturingRegs::ProcTexShift> v_shift;
-        };
-        u8 lut_width;
-        u8 lut_offset0;
-        u8 lut_offset1;
-        u8 lut_offset2;
-        u8 lut_offset3;
-        u8 lod_min;
-        u8 lod_max;
+        bool enable;
+        u32 coord;
+        Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
+        Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
+        bool separate_alpha;
+        bool noise_enable;
+        Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
+        u32 lut_width;
+        u32 lut_offset0;
+        u32 lut_offset1;
+        u32 lut_offset2;
+        u32 lut_offset3;
+        u32 lod_min;
+        u32 lod_max;
+        Pica::TexturingRegs::ProcTexFilter lut_filter;
    } proctex;
+
+    bool shadow_rendering;
+    bool shadow_texture_orthographic;
 };

 /**
@ -134,7 +125,9 @@ struct PicaFSConfigState {
 * two separate shaders sharing the same key.
 */
 struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
-    PicaFSConfig(const Pica::Regs& regs, const Instance& instance);
+
+    /// Construct a PicaFSConfig with the given Pica register configuration.
+    static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);

    bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
        return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
@ -156,8 +149,7 @@ struct PicaShaderConfigCommon {
    u64 swizzle_hash;
    u32 main_offset;
    bool sanitize_mul;
-    std::array<Pica::PipelineRegs::VertexAttributeFormat, 16> attrib_types;
-    std::array<u8, 16> emulated_attrib_locations;
+    std::array<AttribType, 16> attrib_types;

    u32 num_outputs;

--- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.cpp
@ -1,958 +0,0 @@
-// Copyright 2022 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "common/microprofile.h"
-#include "video_core/regs.h"
-#include "video_core/renderer_vulkan/vk_shader_gen_spv.h"
-#include "video_core/shader/shader_uniforms.h"
-
-using Pica::FramebufferRegs;
-using Pica::LightingRegs;
-using Pica::RasterizerRegs;
-using Pica::TexturingRegs;
-using TevStageConfig = TexturingRegs::TevStageConfig;
-
-namespace Vulkan {
-
-FragmentModule::FragmentModule(const PicaFSConfig& config) : Sirit::Module{0x00010300}, config{config} {
-    DefineArithmeticTypes();
-    DefineUniformStructs();
-    DefineInterface();
-    DefineEntryPoint();
-}
-
-FragmentModule::~FragmentModule() = default;
-
-void FragmentModule::Generate() {
-    AddLabel(OpLabel());
-
-    rounded_primary_color = Byteround(OpLoad(vec_ids.Get(4), primary_color_id), 4);
-    primary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
-    secondary_fragment_color = ConstF32(0.f, 0.f, 0.f, 0.f);
-
-    // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
-    if (config.state.alpha_test_func == Pica::FramebufferRegs::CompareFunc::Never) {
-        OpKill();
-        OpFunctionEnd();
-        return;
-    }
-
-    // Write shader bytecode to emulate all enabled PICA lights
-    if (config.state.lighting.enable) {
-        WriteLighting();
-    }
-
-    combiner_buffer = ConstF32(0.f, 0.f, 0.f, 0.f);
-    next_combiner_buffer = GetShaderDataMember(vec_ids.Get(4), ConstS32(27));
-    last_tex_env_out = ConstF32(0.f, 0.f, 0.f, 0.f);
-
-    // Write shader bytecode to emulate PICA TEV stages
-    for (std::size_t index = 0; index < config.state.tev_stages.size(); ++index) {
-        WriteTevStage(static_cast<s32>(index));
-    }
-
-    if (WriteAlphaTestCondition(config.state.alpha_test_func)) {
-        return;
-    }
-
-    // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
-    // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
-    // do our own transformation according to PICA specification.
-    WriteDepth();
-
-    // Write output color
-    OpStore(color_id, Byteround(last_tex_env_out, 4));
-    OpReturn();
-    OpFunctionEnd();
-}
-
-void FragmentModule::WriteDepth() {
-    const Id input_pointer_id{TypePointer(spv::StorageClass::Input, f32_id)};
-    const Id gl_frag_coord_z{OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(2u)))};
-    const Id z_over_w{OpFma(f32_id, ConstF32(2.f), gl_frag_coord_z, ConstF32(-1.f))};
-    const Id depth_scale{GetShaderDataMember(f32_id, ConstS32(2))};
-    const Id depth_offset{GetShaderDataMember(f32_id, ConstS32(3))};
-    const Id depth{OpFma(f32_id, z_over_w, depth_scale, depth_offset)};
-    if (config.state.depthmap_enable == Pica::RasterizerRegs::DepthBuffering::WBuffering) {
-        const Id gl_frag_coord_w{OpLoad(f32_id, OpAccessChain(input_pointer_id, gl_frag_coord_id, ConstU32(3u)))};
-        const Id depth_over_w{OpFDiv(f32_id, depth, gl_frag_coord_w)};
-        OpStore(gl_frag_depth_id, depth_over_w);
-    } else {
-        OpStore(gl_frag_depth_id, depth);
-    }
-}
-
-void FragmentModule::WriteLighting() {
-    const auto& lighting = config.state.lighting;
-
-    // Define lighting globals
-    Id diffuse_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
-    Id specular_sum{ConstF32(0.f, 0.f, 0.f, 1.f)};
-    Id light_vector{ConstF32(0.f, 0.f, 0.f)};
-    Id spot_dir{ConstF32(0.f, 0.f, 0.f)};
-    Id half_vector{ConstF32(0.f, 0.f, 0.f)};
-    Id dot_product{ConstF32(0.f)};
-    Id clamp_highlights{ConstF32(1.f)};
-    Id geo_factor{ConstF32(1.f)};
-    Id surface_normal{};
-    Id surface_tangent{};
-
-    // Compute fragment normals and tangents
-    const auto Perturbation = [&]() -> Id {
-        const Id texel{SampleTexture(lighting.bump_selector)};
-        const Id texel_rgb{OpVectorShuffle(vec_ids.Get(3), texel, texel, 0, 1, 2)};
-        const Id rgb_mul_two{OpVectorTimesScalar(vec_ids.Get(3), texel_rgb, ConstF32(2.f))};
-        return OpFSub(vec_ids.Get(3), rgb_mul_two, ConstF32(1.f, 1.f, 1.f));
-    };
-
-    if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
-        // Bump mapping is enabled using a normal map
-        surface_normal = Perturbation();
-
-        // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
-        // precision result
-        if (lighting.bump_renorm) {
-            const Id normal_x{OpCompositeExtract(f32_id, surface_normal, 0)};
-            const Id normal_y{OpCompositeExtract(f32_id, surface_normal, 1)};
-            const Id y_mul_y{OpFMul(f32_id, normal_y, normal_y)};
-            const Id val{OpFSub(f32_id, ConstF32(1.f), OpFma(f32_id, normal_x, normal_x, y_mul_y))};
-            const Id normal_z{OpSqrt(f32_id, OpFMax(f32_id, val, ConstF32(0.f)))};
-            surface_normal = OpCompositeConstruct(vec_ids.Get(3), normal_x, normal_y, normal_z);
-        }
-
-        // The tangent vector is not perturbed by the normal map and is just a unit vector.
-        surface_tangent = ConstF32(1.f, 0.f, 0.f);
-    } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
-        // Bump mapping is enabled using a tangent map
-        surface_tangent = Perturbation();
-
-        // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
-        // computation below, which is also confirmed on 3DS. So we don't bother recomputing here
-        // even if 'renorm' is enabled.
-
-        // The normal vector is not perturbed by the tangent map and is just a unit vector.
-        surface_normal = ConstF32(0.f, 0.f, 1.f);
-    } else {
-        // No bump mapping - surface local normal and tangent are just unit vectors
-        surface_normal = ConstF32(0.f, 0.f, 1.f);
-        surface_tangent = ConstF32(1.f, 0.f, 0.f);
-    }
-
-    // Rotate the vector v by the quaternion q
-    const auto QuaternionRotate = [this](Id q, Id v) -> Id {
-        const Id q_xyz{OpVectorShuffle(vec_ids.Get(3), q, q, 0, 1, 2)};
-        const Id q_xyz_cross_v{OpCross(vec_ids.Get(3), q_xyz, v)};
-        const Id q_w{OpCompositeExtract(f32_id, q, 3)};
-        const Id val1{OpFAdd(vec_ids.Get(3), q_xyz_cross_v, OpVectorTimesScalar(vec_ids.Get(3), v, q_w))};
-        const Id val2{OpVectorTimesScalar(vec_ids.Get(3), OpCross(vec_ids.Get(3), q_xyz, val1), ConstF32(2.f))};
-        return OpFAdd(vec_ids.Get(3), v, val2);
-    };
-
-    // Rotate the surface-local normal by the interpolated normal quaternion to convert it to
-    // eyespace.
-    const Id normalized_normquat{OpNormalize(vec_ids.Get(4), OpLoad(vec_ids.Get(4), normquat_id))};
-    const Id normal{QuaternionRotate(normalized_normquat, surface_normal)};
-    const Id tangent{QuaternionRotate(normalized_normquat, surface_tangent)};
-
-    Id shadow{ConstF32(1.f, 1.f, 1.f, 1.f)};
-    if (lighting.enable_shadow) {
-        shadow = SampleTexture(lighting.shadow_selector);
-        if (lighting.shadow_invert) {
-            shadow = OpFSub(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f, 1.f), shadow);
-        }
-    }
-
-    const auto LookupLightingLUTUnsigned = [this](Id lut_index, Id pos) -> Id {
-        const Id pos_int{OpConvertFToS(i32_id, OpFMul(f32_id, pos, ConstF32(256.f)))};
-        const Id index{OpSClamp(i32_id, pos_int, ConstS32(0), ConstS32(255))};
-        const Id neg_index{OpFNegate(f32_id, OpConvertSToF(f32_id, index))};
-        const Id delta{OpFma(f32_id, pos, ConstF32(256.f), neg_index)};
-        return LookupLightingLUT(lut_index, index, delta);
-    };
-
-    const auto LookupLightingLUTSigned = [this](Id lut_index, Id pos) -> Id {
-        const Id pos_int{OpConvertFToS(i32_id, OpFMul(f32_id, pos, ConstF32(128.f)))};
-        const Id index{OpSClamp(i32_id, pos_int, ConstS32(-128), ConstS32(127))};
-        const Id neg_index{OpFNegate(f32_id, OpConvertSToF(f32_id, index))};
-        const Id delta{OpFma(f32_id, pos, ConstF32(128.f), neg_index)};
-        const Id increment{OpSelect(i32_id, OpSLessThan(bool_id, index, ConstS32(0)), ConstS32(256), ConstS32(0))};
-        return LookupLightingLUT(lut_index, OpIAdd(i32_id, index, increment), delta);
-    };
-
-    // Samples the specified lookup table for specular lighting
-    const Id view{OpLoad(vec_ids.Get(3), view_id)};
-    const auto GetLutValue = [&](LightingRegs::LightingSampler sampler, u32 light_num,
-                                 LightingRegs::LightingLutInput input, bool abs) -> Id {
-        Id index{};
-        switch (input) {
-        case LightingRegs::LightingLutInput::NH:
-            index = OpDot(f32_id, normal, OpNormalize(vec_ids.Get(3), half_vector));
-            break;
-        case LightingRegs::LightingLutInput::VH:
-            index = OpDot(f32_id, OpNormalize(vec_ids.Get(3), view), OpNormalize(vec_ids.Get(3), half_vector));
-            break;
-        case LightingRegs::LightingLutInput::NV:
-            index = OpDot(f32_id, normal, OpNormalize(vec_ids.Get(3), view));
-            break;
-        case LightingRegs::LightingLutInput::LN:
-            index = OpDot(f32_id, light_vector, normal);
-            break;
-        case LightingRegs::LightingLutInput::SP:
-            index = OpDot(f32_id, light_vector, spot_dir);
-            break;
-        case LightingRegs::LightingLutInput::CP:
-            // CP input is only available with configuration 7
-            if (lighting.config == LightingRegs::LightingConfig::Config7) {
-                // Note: even if the normal vector is modified by normal map, which is not the
-                // normal of the tangent plane anymore, the half angle vector is still projected
-                // using the modified normal vector.
-                const Id normalized_half_vector{OpNormalize(vec_ids.Get(3), half_vector)};
-                const Id normal_dot_half_vector{OpDot(f32_id, normal, normalized_half_vector)};
-                const Id normal_mul_dot{OpVectorTimesScalar(vec_ids.Get(3), normal, normal_dot_half_vector)};
-                const Id half_angle_proj{OpFSub(vec_ids.Get(3), normalized_half_vector, normal_mul_dot)};
-
-                // Note: the half angle vector projection is confirmed not normalized before the dot
-                // product. The result is in fact not cos(phi) as the name suggested.
-                index = OpDot(f32_id, half_angle_proj, tangent);
-            } else {
-                index = ConstF32(0.f);
-            }
-            break;
-        default:
-            LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input {}", (int)input);
-            UNIMPLEMENTED();
-            index = ConstF32(0.f);
-            break;
-        }
-
-        const Id sampler_index{ConstU32(static_cast<u32>(sampler))};
-        if (abs) {
-            // LUT index is in the range of (0.0, 1.0)
-            index = lighting.light[light_num].two_sided_diffuse
-                        ? OpFAbs(f32_id, index)
-                        : OpFMax(f32_id, index, ConstF32(0.f));
-            return LookupLightingLUTUnsigned(sampler_index, index);
-        } else {
-            // LUT index is in the range of (-1.0, 1.0)
-            return LookupLightingLUTSigned(sampler_index, index);
-        }
-    };
-
-    // Write the code to emulate each enabled light
-    for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
-        const auto& light_config = lighting.light[light_index];
-
-        const auto GetLightMember = [&](s32 member) -> Id {
-            const Id member_type = member < 6 ? vec_ids.Get(3) : f32_id;
-            const Id light_num{ConstS32(static_cast<s32>(lighting.light[light_index].num.Value()))};
-            return GetShaderDataMember(member_type, ConstS32(25), light_num, ConstS32(member));
-        };
-
-        // Compute light vector (directional or positional)
-        const Id light_position{GetLightMember(4)};
-        if (light_config.directional) {
-            light_vector = OpNormalize(vec_ids.Get(3), light_position);
-        } else {
-            light_vector = OpNormalize(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), light_position, view));
-        }
-
-        spot_dir = GetLightMember(5);
-        half_vector = OpFAdd(vec_ids.Get(3), OpNormalize(vec_ids.Get(3), view), light_vector);
-
-        // Compute dot product of light_vector and normal, adjust if lighting is one-sided or
-        // two-sided
-        if (light_config.two_sided_diffuse) {
-            dot_product = OpFAbs(f32_id, OpDot(f32_id, light_vector, normal));
-        } else {
-            dot_product = OpFMax(f32_id, OpDot(f32_id, light_vector, normal), ConstF32(0.f));
-        }
-
-        // If enabled, clamp specular component if lighting result is zero
-        if (lighting.clamp_highlights) {
-            clamp_highlights = OpFSign(f32_id, dot_product);
-        }
-
-        // If enabled, compute spot light attenuation value
-        Id spot_atten{ConstF32(1.f)};
-        if (light_config.spot_atten_enable &&
-            LightingRegs::IsLightingSamplerSupported(
-                lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
-            const Id value{GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
-                            light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input)};
-            spot_atten = OpFMul(f32_id, ConstF32(lighting.lut_sp.scale), value);
-        }
-
-        // If enabled, compute distance attenuation value
-        Id dist_atten{ConstF32(1.f)};
-        if (light_config.dist_atten_enable) {
-            const Id dist_atten_scale{GetLightMember(7)};
-            const Id dist_atten_bias{GetLightMember(6)};
-            const Id min_view_min_pos{OpFSub(vec_ids.Get(3), OpFNegate(vec_ids.Get(3), view), light_position)};
-            const Id index{OpFma(f32_id, dist_atten_scale, OpLength(f32_id, min_view_min_pos), dist_atten_bias)};
-            const Id clamped_index{OpFClamp(f32_id, index, ConstF32(0.f), ConstF32(1.f))};
-            const Id sampler{ConstS32(static_cast<s32>(LightingRegs::DistanceAttenuationSampler(light_config.num)))};
-            dist_atten = LookupLightingLUTUnsigned(sampler, clamped_index);
-        }
-
-        if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
-            geo_factor = OpDot(f32_id, half_vector, half_vector);
-            const Id dot_div_geo{OpFMin(f32_id, OpFDiv(f32_id, dot_product, geo_factor), ConstF32(1.f))};
-            const Id is_geo_factor_zero{OpFOrdEqual(bool_id, geo_factor, ConstF32(0.f))};
-            geo_factor = OpSelect(f32_id, is_geo_factor_zero, ConstF32(0.f), dot_div_geo);
-        }
-
-        // Specular 0 component
-        Id d0_lut_value{ConstF32(1.f)};
-        if (lighting.lut_d0.enable &&
-            LightingRegs::IsLightingSamplerSupported(
-                lighting.config, LightingRegs::LightingSampler::Distribution0)) {
-            // Lookup specular "distribution 0" LUT value
-            const Id value{GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
-                            lighting.lut_d0.type, lighting.lut_d0.abs_input)};
-            d0_lut_value = OpFMul(f32_id, ConstF32(lighting.lut_d0.scale), value);
-        }
-
-        Id specular_0{OpVectorTimesScalar(vec_ids.Get(3), GetLightMember(0), d0_lut_value)};
-        if (light_config.geometric_factor_0) {
-            specular_0 = OpVectorTimesScalar(vec_ids.Get(3), specular_0, geo_factor);
-        }
-
-        // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
-        Id refl_value_r{ConstF32(1.f)};
-        if (lighting.lut_rr.enable &&
-            LightingRegs::IsLightingSamplerSupported(lighting.config,
-                                                     LightingRegs::LightingSampler::ReflectRed)) {
-            const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
-                           lighting.lut_rr.type, lighting.lut_rr.abs_input)};
-
-            refl_value_r = OpFMul(f32_id, ConstF32(lighting.lut_rr.scale), value);
-        }
-
-        // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
-        Id refl_value_g{refl_value_r};
-        if (lighting.lut_rg.enable &&
-            LightingRegs::IsLightingSamplerSupported(lighting.config,
-                                                     LightingRegs::LightingSampler::ReflectGreen)) {
-            const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
-                           lighting.lut_rg.type, lighting.lut_rg.abs_input)};
-
-            refl_value_g = OpFMul(f32_id, ConstF32(lighting.lut_rg.scale), value);
-        }
-
-        // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
-        Id refl_value_b{refl_value_r};
-        if (lighting.lut_rb.enable &&
-            LightingRegs::IsLightingSamplerSupported(lighting.config,
-                                                     LightingRegs::LightingSampler::ReflectBlue)) {
-            const Id value{GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
-                           lighting.lut_rb.type, lighting.lut_rb.abs_input)};
-            refl_value_b = OpFMul(f32_id, ConstF32(lighting.lut_rb.scale), value);
-        }
-
-        // Specular 1 component
-        Id d1_lut_value{ConstF32(1.f)};
-        if (lighting.lut_d1.enable &&
-            LightingRegs::IsLightingSamplerSupported(
-                lighting.config, LightingRegs::LightingSampler::Distribution1)) {
-            // Lookup specular "distribution 1" LUT value
-            const Id value{GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
-                            lighting.lut_d1.type, lighting.lut_d1.abs_input)};
-            d1_lut_value = OpFMul(f32_id, ConstF32(lighting.lut_d1.scale), value);
-        }
-
-        const Id refl_value{OpCompositeConstruct(vec_ids.Get(3), refl_value_r, refl_value_g, refl_value_b)};
-        const Id light_specular_1{GetLightMember(1)};
-        Id specular_1{OpFMul(vec_ids.Get(3), OpVectorTimesScalar(vec_ids.Get(3), refl_value, d1_lut_value), light_specular_1)};
-        if (light_config.geometric_factor_1) {
-            specular_1 = OpVectorTimesScalar(vec_ids.Get(3), specular_1, geo_factor);
-        }
-
-        // Fresnel
-        // Note: only the last entry in the light slots applies the Fresnel factor
-        if (light_index == lighting.src_num - 1 && lighting.lut_fr.enable &&
-            LightingRegs::IsLightingSamplerSupported(lighting.config,
-                                                     LightingRegs::LightingSampler::Fresnel)) {
-            // Lookup fresnel LUT value
-            Id value{GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
-                     lighting.lut_fr.type, lighting.lut_fr.abs_input)};
-            value = OpFMul(f32_id, ConstF32(lighting.lut_fr.scale), value);
-
-            // Enabled for diffuse lighting alpha component
-            if (lighting.enable_primary_alpha) {
-                diffuse_sum = OpCompositeInsert(vec_ids.Get(4), value, diffuse_sum, 3);
-            }
-
-            // Enabled for the specular lighting alpha component
-            if (lighting.enable_secondary_alpha) {
-                specular_sum = OpCompositeInsert(vec_ids.Get(4), value, specular_sum, 3);
-            }
-        }
-
-        const bool shadow_primary_enable = lighting.shadow_primary && light_config.shadow_enable;
-        const bool shadow_secondary_enable = lighting.shadow_secondary && light_config.shadow_enable;
-        const Id shadow_rgb{OpVectorShuffle(vec_ids.Get(3), shadow, shadow, 0, 1, 2)};
-
-        const Id light_diffuse{GetLightMember(2)};
-        const Id light_ambient{GetLightMember(3)};
-        const Id diffuse_mul_dot{OpVectorTimesScalar(vec_ids.Get(3),light_diffuse, dot_product)};
-
-        // Compute primary fragment color (diffuse lighting) function
-        Id diffuse_sum_rgb{OpFAdd(vec_ids.Get(3), diffuse_mul_dot, light_ambient)};
-        diffuse_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), diffuse_sum_rgb, dist_atten);
-        diffuse_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), diffuse_sum_rgb, spot_atten);
-        if (shadow_primary_enable) {
-            diffuse_sum_rgb = OpFMul(vec_ids.Get(3), diffuse_sum_rgb, shadow_rgb);
-        }
-
-        // Compute secondary fragment color (specular lighting) function
-        const Id specular_01{OpFAdd(vec_ids.Get(3), specular_0, specular_1)};
-        Id specular_sum_rgb{OpVectorTimesScalar(vec_ids.Get(3), specular_01, clamp_highlights)};
-        specular_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), specular_sum_rgb, dist_atten);
-        specular_sum_rgb = OpVectorTimesScalar(vec_ids.Get(3), specular_sum_rgb, spot_atten);
-        if (shadow_secondary_enable) {
-            specular_sum_rgb = OpFMul(vec_ids.Get(3), specular_sum_rgb, shadow_rgb);
-        }
-
-        // Accumulate the fragment colors
-        const Id diffuse_sum_rgba{PadVectorF32(diffuse_sum_rgb, vec_ids.Get(4), 0.f)};
-        const Id specular_sum_rgba{PadVectorF32(specular_sum_rgb, vec_ids.Get(4), 0.f)};
-        diffuse_sum = OpFAdd(vec_ids.Get(4), diffuse_sum, diffuse_sum_rgba);
-        specular_sum = OpFAdd(vec_ids.Get(4), specular_sum, specular_sum_rgba);
-    }
-
-    // Apply shadow attenuation to alpha components if enabled
-    if (lighting.shadow_alpha) {
-        const Id shadow_a{OpCompositeExtract(f32_id, shadow, 3)};
-        const Id shadow_a_vec{OpCompositeConstruct(vec_ids.Get(4), ConstF32(1.f, 1.f, 1.f), shadow_a)};
-        if (lighting.enable_primary_alpha) {
-            diffuse_sum = OpFMul(vec_ids.Get(4), diffuse_sum, shadow_a_vec);
-        }
-        if (lighting.enable_secondary_alpha) {
-            specular_sum = OpFMul(vec_ids.Get(4), specular_sum, shadow_a_vec);
-        }
-    }
-
-    // Sum final lighting result
-    const Id lighting_global_ambient{GetShaderDataMember(vec_ids.Get(3), ConstS32(24))};
-    const Id lighting_global_ambient_rgba{PadVectorF32(lighting_global_ambient, vec_ids.Get(4), 0.f)};
-    const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
-    const Id one_vec{ConstF32(1.f, 1.f, 1.f, 1.f)};
-    diffuse_sum = OpFAdd(vec_ids.Get(4), diffuse_sum, lighting_global_ambient_rgba);
-    primary_fragment_color = OpFClamp(vec_ids.Get(4), diffuse_sum, zero_vec, one_vec);
-    secondary_fragment_color = OpFClamp(vec_ids.Get(4), specular_sum, zero_vec, one_vec);
-}
-
-void FragmentModule::WriteTevStage(s32 index) {
-    const TexturingRegs::TevStageConfig stage =
-        static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
-
-    // Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
-    const auto IsPassThroughTevStage = [](const TevStageConfig& stage) {
-        return (stage.color_op == TevStageConfig::Operation::Replace &&
-                stage.alpha_op == TevStageConfig::Operation::Replace &&
-                stage.color_source1 == TevStageConfig::Source::Previous &&
-                stage.alpha_source1 == TevStageConfig::Source::Previous &&
-                stage.color_modifier1 == TevStageConfig::ColorModifier::SourceColor &&
-                stage.alpha_modifier1 == TevStageConfig::AlphaModifier::SourceAlpha &&
-                stage.GetColorMultiplier() == 1 && stage.GetAlphaMultiplier() == 1);
-    };
-
-    if (!IsPassThroughTevStage(stage)) {
-        color_results_1 = AppendColorModifier(stage.color_modifier1, stage.color_source1, index);
-        color_results_2 = AppendColorModifier(stage.color_modifier2, stage.color_source2, index);
-        color_results_3 = AppendColorModifier(stage.color_modifier3, stage.color_source3, index);
-
-        // Round the output of each TEV stage to maintain the PICA's 8 bits of precision
-        Id color_output{Byteround(AppendColorCombiner(stage.color_op), 3)};
-        Id alpha_output{};
-
-        if (stage.color_op == TevStageConfig::Operation::Dot3_RGBA) {
-            // result of Dot3_RGBA operation is also placed to the alpha component
-            alpha_output = OpCompositeExtract(f32_id, color_output, 0);
-        } else {
-            alpha_results_1 = AppendAlphaModifier(stage.alpha_modifier1, stage.alpha_source1, index);
-            alpha_results_2 = AppendAlphaModifier(stage.alpha_modifier2, stage.alpha_source2, index);
-            alpha_results_3 = AppendAlphaModifier(stage.alpha_modifier3, stage.alpha_source3, index);
-
-            alpha_output = Byteround(AppendAlphaCombiner(stage.alpha_op));
-        }
-
-        color_output = OpVectorTimesScalar(vec_ids.Get(3), color_output, ConstF32(static_cast<float>(stage.GetColorMultiplier())));
-        color_output = OpFClamp(vec_ids.Get(3), color_output, ConstF32(0.f, 0.f, 0.f), ConstF32(1.f, 1.f, 1.f));
-        alpha_output = OpFMul(f32_id, alpha_output, ConstF32(static_cast<float>(stage.GetAlphaMultiplier())));
-        alpha_output = OpFClamp(f32_id, alpha_output, ConstF32(0.f), ConstF32(1.f));
-        last_tex_env_out = OpCompositeConstruct(vec_ids.Get(4), color_output, alpha_output);
-    }
-
-    combiner_buffer = next_combiner_buffer;
-    if (config.TevStageUpdatesCombinerBufferColor(index)) {
-        next_combiner_buffer = OpVectorShuffle(vec_ids.Get(4), last_tex_env_out, next_combiner_buffer, 0, 1, 2, 7);
-    }
-
-    if (config.TevStageUpdatesCombinerBufferAlpha(index)) {
-        next_combiner_buffer = OpVectorShuffle(vec_ids.Get(4), next_combiner_buffer, last_tex_env_out, 0, 1, 2, 7);
-    }
-}
-
-bool FragmentModule::WriteAlphaTestCondition(FramebufferRegs::CompareFunc func) {
-    using CompareFunc = FramebufferRegs::CompareFunc;
-
-    const auto Compare = [this, func](Id alpha, Id alphatest_ref) {
-        switch (func) {
-        case CompareFunc::Equal:
-            return OpINotEqual(bool_id, alpha, alphatest_ref);
-        case CompareFunc::NotEqual:
-            return OpIEqual(bool_id, alpha, alphatest_ref);
-        case CompareFunc::LessThan:
-            return OpSGreaterThanEqual(bool_id, alpha, alphatest_ref);
-        case CompareFunc::LessThanOrEqual:
-            return OpSGreaterThan(bool_id, alpha, alphatest_ref);
-        case CompareFunc::GreaterThan:
-            return OpSLessThanEqual(bool_id, alpha, alphatest_ref);
-        case CompareFunc::GreaterThanOrEqual:
-            return OpSLessThan(bool_id, alpha, alphatest_ref);
-        default:
-            return Id{};
-        }
-    };
-
-    switch (func) {
-    case CompareFunc::Never: // Kill the fragment
-        OpKill();
-        OpFunctionEnd();
-        return true;
-    case CompareFunc::Always: // Do nothing
-        return false;
-    case CompareFunc::Equal:
-    case CompareFunc::NotEqual:
-    case CompareFunc::LessThan:
-    case CompareFunc::LessThanOrEqual:
-    case CompareFunc::GreaterThan:
-    case CompareFunc::GreaterThanOrEqual: {
-        const Id alpha_scaled{OpFMul(f32_id, OpCompositeExtract(f32_id, last_tex_env_out, 3), ConstF32(255.f))};
-        const Id alpha_int{OpConvertFToS(i32_id, alpha_scaled)};
-        const Id alphatest_ref{GetShaderDataMember(i32_id, ConstS32(1))};
-        const Id alpha_comp_ref{Compare(alpha_int, alphatest_ref)};
-        const Id kill_label{OpLabel()};
-        const Id keep_label{OpLabel()};
-        OpSelectionMerge(keep_label, spv::SelectionControlMask::MaskNone);
-        OpBranchConditional(alpha_comp_ref, kill_label, keep_label);
-        AddLabel(kill_label);
-        OpKill();
-        AddLabel(keep_label);
-        return false;
-    }
-    default:
-        return false;
-        LOG_CRITICAL(Render_Vulkan, "Unknown alpha test condition {}", func);
-        break;
-    }
-}
-
-Id FragmentModule::SampleTexture(u32 texture_unit) {
-    const PicaFSConfigState& state = config.state;
-    const Id zero_vec{ConstF32(0.f, 0.f, 0.f, 0.f)};
-
-    // PICA's LOD formula for 2D textures.
-    // This LOD formula is the same as the LOD lower limit defined in OpenGL.
-    // f(x, y) >= max{m_u, m_v, m_w}
-    // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
-    const auto SampleLod = [this](Id tex_id, Id tex_sampler_id, Id texcoord_id) {
-        const Id tex{OpLoad(image2d_id, tex_id)};
-        const Id tex_sampler{OpLoad(sampler_id, tex_sampler_id)};
-        const Id sampled_image{OpSampledImage(TypeSampledImage(image2d_id), tex, tex_sampler)};
-        const Id tex_image{OpImage(image2d_id, sampled_image)};
-        const Id tex_size{OpImageQuerySizeLod(ivec_ids.Get(2), tex_image, ConstS32(0))};
-        const Id texcoord{OpLoad(vec_ids.Get(2), texcoord_id)};
-        const Id coord{OpFMul(vec_ids.Get(2), texcoord, OpConvertSToF(vec_ids.Get(2), tex_size))};
-        const Id abs_dfdx_coord{OpFAbs(vec_ids.Get(2), OpDPdx(vec_ids.Get(2), coord))};
-        const Id abs_dfdy_coord{OpFAbs(vec_ids.Get(2), OpDPdy(vec_ids.Get(2), coord))};
-        const Id d{OpFMax(vec_ids.Get(2), abs_dfdx_coord, abs_dfdy_coord)};
-        const Id dx_dy_max{OpFMax(f32_id, OpCompositeExtract(f32_id, d, 0), OpCompositeExtract(f32_id, d, 1))};
-        const Id lod{OpLog2(f32_id, dx_dy_max)};
-        return OpImageSampleExplicitLod(vec_ids.Get(4), sampled_image, texcoord, spv::ImageOperandsMask::Lod, lod);
-    };
-
-    const auto Sample = [this](Id tex_id, Id tex_sampler_id, bool projection) {
-        const Id tex{OpLoad(image2d_id, tex_id)};
-        const Id tex_sampler{OpLoad(sampler_id, tex_sampler_id)};
-        const Id sampled_image{OpSampledImage(TypeSampledImage(image2d_id), tex, tex_sampler)};
-        const Id texcoord0{OpLoad(vec_ids.Get(2), texcoord0_id)};
-        const Id texcoord0_w{OpLoad(f32_id, texcoord0_w_id)};
-        const Id coord{OpCompositeConstruct(vec_ids.Get(3), OpCompositeExtract(f32_id, texcoord0, 0),
-                                                            OpCompositeExtract(f32_id, texcoord0, 1),
-                                                            texcoord0_w)};
-        if (projection) {
-            return OpImageSampleProjImplicitLod(vec_ids.Get(4), sampled_image, coord);
-        } else {
-            return OpImageSampleImplicitLod(vec_ids.Get(4), sampled_image, coord);
-        }
-    };
-
-    switch (texture_unit) {
-    case 0:
-        // Only unit 0 respects the texturing type
-        switch (state.texture0_type) {
-        case Pica::TexturingRegs::TextureConfig::Texture2D:
-            return SampleLod(tex0_id, tex0_sampler_id, texcoord0_id);
-        case Pica::TexturingRegs::TextureConfig::Projection2D:
-            return Sample(tex0_id, tex0_sampler_id, true);
-        case Pica::TexturingRegs::TextureConfig::TextureCube:
-            return Sample(tex_cube_id, tex_cube_sampler_id, false);
-        //case Pica::TexturingRegs::TextureConfig::Shadow2D:
-            //return "shadowTexture(texcoord0, texcoord0_w)";
-        //case Pica::TexturingRegs::TextureConfig::ShadowCube:
-            //return "shadowTextureCube(texcoord0, texcoord0_w)";
-        case Pica::TexturingRegs::TextureConfig::Disabled:
-            return zero_vec;
-        default:
-            LOG_CRITICAL(Render_Vulkan, "Unhandled texture type {:x}", state.texture0_type);
-            UNIMPLEMENTED();
-            return zero_vec;
-        }
-    case 1:
-        return SampleLod(tex1_id, tex1_sampler_id, texcoord1_id);
-    case 2:
-        if (state.texture2_use_coord1)
-            return SampleLod(tex2_id, tex2_sampler_id, texcoord1_id);
-        else
-            return SampleLod(tex2_id, tex2_sampler_id, texcoord2_id);
-    case 3:
-        if (false && state.proctex.enable) {
-            //return "ProcTex()";
-        } else {
-            LOG_DEBUG(Render_Vulkan, "Using Texture3 without enabling it");
-            return zero_vec;
-        }
-    default:
-        UNREACHABLE();
-        return void_id;
-    }
-}
-
-Id FragmentModule::Byteround(Id variable_id, u32 size) {
-    if (size > 1) {
-        const Id scaled_vec_id{OpVectorTimesScalar(vec_ids.Get(size), variable_id, ConstF32(255.f))};
-        const Id rounded_id{OpRound(vec_ids.Get(size), scaled_vec_id)};
-        return OpVectorTimesScalar(vec_ids.Get(size), rounded_id, ConstF32(1.f / 255.f));
-    } else {
-        const Id rounded_id{OpRound(f32_id, OpFMul(f32_id, variable_id, ConstF32(255.f)))};
-        return OpFMul(f32_id, rounded_id, ConstF32(1.f / 255.f));
-    }
-}
-
-Id FragmentModule::LookupLightingLUT(Id lut_index, Id index, Id delta) {
-    // Only load the texture buffer lut once
-    if (!Sirit::ValidId(texture_buffer_lut_lf)) {
-        const Id sampled_image{TypeSampledImage(image_buffer_id)};
-        texture_buffer_lut_lf = OpLoad(sampled_image, texture_buffer_lut_lf_id);
-    }
-
-    const Id lut_index_x{OpShiftRightArithmetic(i32_id, lut_index, ConstS32(2))};
-    const Id lut_index_y{OpBitwiseAnd(i32_id, lut_index, ConstS32(3))};
-    const Id lut_offset{GetShaderDataMember(i32_id, ConstS32(19), lut_index_x, lut_index_y)};
-    const Id coord{OpIAdd(i32_id, lut_offset, index)};
-    const Id entry{OpImageFetch(vec_ids.Get(4), OpImage(image_buffer_id, texture_buffer_lut_lf), coord)};
-    const Id entry_r{OpCompositeExtract(f32_id, entry, 0)};
-    const Id entry_g{OpCompositeExtract(f32_id, entry, 1)};
-    return OpFma(f32_id, entry_g, delta, entry_r);
-}
-
-Id FragmentModule::AppendSource(TevStageConfig::Source source, s32 index) {
-    using Source = TevStageConfig::Source;
-    switch (source) {
-    case Source::PrimaryColor:
-        return rounded_primary_color;
-    case Source::PrimaryFragmentColor:
-        return primary_fragment_color;
-    case Source::SecondaryFragmentColor:
-        return secondary_fragment_color;
-    case Source::Texture0:
-        return SampleTexture(0);
-    case Source::Texture1:
-        return SampleTexture(1);
-    case Source::Texture2:
-        return SampleTexture(2);
-    case Source::Texture3:
-        return SampleTexture(3);
-    case Source::PreviousBuffer:
-        return combiner_buffer;
-    case Source::Constant:
-        return GetShaderDataMember(vec_ids.Get(4), ConstS32(26), ConstS32(index));
-    case Source::Previous:
-        return last_tex_env_out;
-    default:
-        LOG_CRITICAL(Render_Vulkan, "Unknown source op {}", source);
-        return ConstF32(0.f, 0.f, 0.f, 0.f);
-    }
-}
-
-Id FragmentModule::AppendColorModifier(TevStageConfig::ColorModifier modifier,
-                                       TevStageConfig::Source source, s32 index) {
-    using ColorModifier = TevStageConfig::ColorModifier;
-    const Id source_color{AppendSource(source, index)};
-    const Id one_vec{ConstF32(1.f, 1.f, 1.f)};
-
-    const auto Shuffle = [&](s32 r, s32 g, s32 b) -> Id {
-        return OpVectorShuffle(vec_ids.Get(3), source_color, source_color, r, g, b);
-    };
-
-    switch (modifier) {
-    case ColorModifier::SourceColor:
-        return Shuffle(0, 1, 2);
-    case ColorModifier::OneMinusSourceColor:
-        return OpFSub(vec_ids.Get(3), one_vec, Shuffle(0, 1, 2));
-    case ColorModifier::SourceRed:
-        return Shuffle(0, 0, 0);
-    case ColorModifier::OneMinusSourceRed:
-        return OpFSub(vec_ids.Get(3), one_vec, Shuffle(0, 0, 0));
-    case ColorModifier::SourceGreen:
-        return Shuffle(1, 1, 1);
-    case ColorModifier::OneMinusSourceGreen:
-        return OpFSub(vec_ids.Get(3), one_vec, Shuffle(1, 1, 1));
-    case ColorModifier::SourceBlue:
-        return Shuffle(2, 2, 2);
-    case ColorModifier::OneMinusSourceBlue:
-        return OpFSub(vec_ids.Get(3), one_vec, Shuffle(2, 2, 2));
-    case ColorModifier::SourceAlpha:
-        return Shuffle(3, 3, 3);
-    case ColorModifier::OneMinusSourceAlpha:
-        return OpFSub(vec_ids.Get(3), one_vec, Shuffle(3, 3, 3));
-    default:
-        LOG_CRITICAL(Render_Vulkan, "Unknown color modifier op {}", modifier);
-        return one_vec;
-    }
-}
-
-Id FragmentModule::AppendAlphaModifier(TevStageConfig::AlphaModifier modifier,
-                                       TevStageConfig::Source source, s32 index) {
-    using AlphaModifier = TevStageConfig::AlphaModifier;
-    const Id source_color{AppendSource(source, index)};
-    const Id one_f32{ConstF32(1.f)};
-
-    const auto Component = [&](s32 c) -> Id {
-        return OpCompositeExtract(f32_id, source_color, c);
-    };
-
-    switch (modifier) {
-    case AlphaModifier::SourceAlpha:
-        return Component(3);
-    case AlphaModifier::OneMinusSourceAlpha:
-        return OpFSub(f32_id, one_f32, Component(3));
-    case AlphaModifier::SourceRed:
-        return Component(0);
-    case AlphaModifier::OneMinusSourceRed:
-        return OpFSub(f32_id, one_f32, Component(0));
-    case AlphaModifier::SourceGreen:
-        return Component(1);
-    case AlphaModifier::OneMinusSourceGreen:
-        return OpFSub(f32_id, one_f32, Component(1));
-    case AlphaModifier::SourceBlue:
-        return Component(2);
-    case AlphaModifier::OneMinusSourceBlue:
-        return OpFSub(f32_id, one_f32, Component(2));
-    default:
-        LOG_CRITICAL(Render_Vulkan, "Unknown alpha modifier op {}", modifier);
-        return one_f32;
-    }
-}
-
-Id FragmentModule::AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation) {
-    using Operation = TevStageConfig::Operation;
-    const Id half_vec{ConstF32(0.5f, 0.5f, 0.5f)};
-    const Id one_vec{ConstF32(1.f, 1.f, 1.f)};
-    const Id zero_vec{ConstF32(0.f, 0.f, 0.f)};
-    Id color{};
-
-    switch (operation) {
-    case Operation::Replace:
-        color = color_results_1;
-        break;
-    case Operation::Modulate:
-        color = OpFMul(vec_ids.Get(3), color_results_1, color_results_2);
-        break;
-    case Operation::Add:
-        color = OpFAdd(vec_ids.Get(3), color_results_1, color_results_2);
-        break;
-    case Operation::AddSigned:
-        color = OpFSub(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), color_results_1, color_results_2), half_vec);
-        break;
-    case Operation::Lerp:
-        color = OpFMix(vec_ids.Get(3), color_results_2, color_results_1, color_results_3);
-        break;
-    case Operation::Subtract:
-        color = OpFSub(vec_ids.Get(3), color_results_1, color_results_2);
-        break;
-    case Operation::MultiplyThenAdd:
-        color = OpFma(vec_ids.Get(3), color_results_1, color_results_2, color_results_3);
-        break;
-    case Operation::AddThenMultiply:
-        color = OpFMin(vec_ids.Get(3), OpFAdd(vec_ids.Get(3), color_results_1, color_results_2), one_vec);
-        color = OpFMul(vec_ids.Get(3), color, color_results_3);
-        break;
-    case Operation::Dot3_RGB:
-    case Operation::Dot3_RGBA:
-        color = OpDot(f32_id, OpFSub(vec_ids.Get(3), color_results_1, half_vec),
-                              OpFSub(vec_ids.Get(3), color_results_2, half_vec));
-        color = OpFMul(f32_id, color, ConstF32(4.f));
-        color = OpCompositeConstruct(vec_ids.Get(3), color, color, color);
-        break;
-    default:
-        color = zero_vec;
-        LOG_CRITICAL(Render_Vulkan, "Unknown color combiner operation: {}", operation);
-        break;
-    }
-
-    // Clamp result to 0.0, 1.0
-    return OpFClamp(vec_ids.Get(3), color, zero_vec, one_vec);
-}
-
-Id FragmentModule::AppendAlphaCombiner(TevStageConfig::Operation operation) {
-    using Operation = TevStageConfig::Operation;
-    Id color{};
-
-    switch (operation) {
-    case Operation::Replace:
-        color = alpha_results_1;
-        break;
-    case Operation::Modulate:
-        color = OpFMul(f32_id, alpha_results_1, alpha_results_2);
-        break;
-    case Operation::Add:
-        color = OpFAdd(f32_id, alpha_results_1, alpha_results_2);
-        break;
-    case Operation::AddSigned:
-        color = OpFSub(f32_id, OpFAdd(f32_id, alpha_results_1, alpha_results_2), ConstF32(0.5f));
-        break;
-    case Operation::Lerp:
-        color = OpFMix(f32_id, alpha_results_2, alpha_results_1, alpha_results_3);
-        break;
-    case Operation::Subtract:
-        color = OpFSub(f32_id, alpha_results_1, alpha_results_2);
-        break;
-    case Operation::MultiplyThenAdd:
-        color = OpFma(f32_id, alpha_results_1, alpha_results_2, alpha_results_3);
-        break;
-    case Operation::AddThenMultiply:
-        color = OpFMin(f32_id, OpFAdd(f32_id, alpha_results_1, alpha_results_2), ConstF32(1.f));
-        color = OpFMul(f32_id, color, alpha_results_3);
-        break;
-    default:
-        color = ConstF32(0.f);
-        LOG_CRITICAL(Render_Vulkan, "Unknown alpha combiner operation: {}", operation);
-        break;
-    }
-
-    return OpFClamp(f32_id, color, ConstF32(0.f), ConstF32(1.f));
-}
-
-void FragmentModule::DefineArithmeticTypes() {
-    void_id = Name(TypeVoid(), "void_id");
-    bool_id = Name(TypeBool(), "bool_id");
-    f32_id = Name(TypeFloat(32), "f32_id");
-    i32_id = Name(TypeSInt(32), "i32_id");
-    u32_id = Name(TypeUInt(32), "u32_id");
-
-    for (u32 size = 2; size <= 4; size++) {
-        const u32 i = size - 2;
-        vec_ids.ids[i] = Name(TypeVector(f32_id, size), fmt::format("vec{}_id", size));
-        ivec_ids.ids[i] = Name(TypeVector(i32_id, size), fmt::format("ivec{}_id", size));
-        uvec_ids.ids[i] = Name(TypeVector(u32_id, size), fmt::format("uvec{}_id", size));
-    }
-}
-
-void FragmentModule::DefineEntryPoint() {
-    AddCapability(spv::Capability::Shader);
-    AddCapability(spv::Capability::SampledBuffer);
-    AddCapability(spv::Capability::ImageQuery);
-    SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
-
-    const Id main_type{TypeFunction(TypeVoid())};
-    const Id main_func{OpFunction(TypeVoid(), spv::FunctionControlMask::MaskNone, main_type)};
-    AddEntryPoint(spv::ExecutionModel::Fragment, main_func, "main", primary_color_id, texcoord0_id,
-                  texcoord1_id, texcoord2_id, texcoord0_w_id, normquat_id, view_id, color_id,
-                  gl_frag_coord_id, gl_frag_depth_id);
-    AddExecutionMode(main_func, spv::ExecutionMode::OriginUpperLeft);
-    AddExecutionMode(main_func, spv::ExecutionMode::DepthReplacing);
-}
-
-void FragmentModule::DefineUniformStructs() {
-    const Id light_src_struct_id{TypeStruct(vec_ids.Get(3), vec_ids.Get(3), vec_ids.Get(3), vec_ids.Get(3),
-                                      vec_ids.Get(3), vec_ids.Get(3), f32_id, f32_id)};
-
-    const Id light_src_array_id{TypeArray(light_src_struct_id, ConstU32(NUM_LIGHTS))};
-    const Id lighting_lut_array_id{TypeArray(ivec_ids.Get(4), ConstU32(NUM_LIGHTING_SAMPLERS / 4))};
-    const Id const_color_array_id{TypeArray(vec_ids.Get(4), ConstU32(NUM_TEV_STAGES))};
-
-    const Id shader_data_struct_id{TypeStruct(i32_id, i32_id, f32_id, f32_id, f32_id, f32_id, i32_id,
-                                              i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id, i32_id,
-                                              i32_id, f32_id, i32_id, u32_id, lighting_lut_array_id, vec_ids.Get(3),
-                                              vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(2), vec_ids.Get(3),
-                                              light_src_array_id, const_color_array_id, vec_ids.Get(4), vec_ids.Get(4))};
-
-    constexpr std::array light_src_offsets{0u, 16u, 32u, 48u, 64u, 80u, 92u, 96u};
-    constexpr std::array shader_data_offsets{0u, 4u, 8u, 12u, 16u, 20u, 24u, 28u, 32u, 36u, 40u, 44u, 48u,
-                                             52u, 56u, 60u, 64u, 68u, 72u, 80u, 176u, 192u, 200u, 208u,
-                                             224u, 240u, 1136u, 1232u, 1248u};
-
-    Decorate(lighting_lut_array_id, spv::Decoration::ArrayStride, 16u);
-    Decorate(light_src_array_id, spv::Decoration::ArrayStride, 112u);
-    Decorate(const_color_array_id, spv::Decoration::ArrayStride, 16u);
-    for (u32 i = 0; i < static_cast<u32>(light_src_offsets.size()); i++) {
-        MemberDecorate(light_src_struct_id, i, spv::Decoration::Offset, light_src_offsets[i]);
-    }
-    for (u32 i = 0; i < static_cast<u32>(shader_data_offsets.size()); i++) {
-        MemberDecorate(shader_data_struct_id, i, spv::Decoration::Offset, shader_data_offsets[i]);
-    }
-    Decorate(shader_data_struct_id, spv::Decoration::Block);
-
-    shader_data_id = AddGlobalVariable(TypePointer(spv::StorageClass::Uniform, shader_data_struct_id),
-                                       spv::StorageClass::Uniform);
-    Decorate(shader_data_id, spv::Decoration::DescriptorSet, 0);
-    Decorate(shader_data_id, spv::Decoration::Binding, 1);
-}
-
-void FragmentModule::DefineInterface() {
-    // Define interface block
-    primary_color_id = DefineInput(vec_ids.Get(4), 1);
-    texcoord0_id = DefineInput(vec_ids.Get(2), 2);
-    texcoord1_id = DefineInput(vec_ids.Get(2), 3);
-    texcoord2_id = DefineInput(vec_ids.Get(2), 4);
-    texcoord0_w_id = DefineInput(f32_id, 5);
-    normquat_id = DefineInput(vec_ids.Get(4), 6);
-    view_id = DefineInput(vec_ids.Get(3), 7);
-    color_id = DefineOutput(vec_ids.Get(4), 0);
-
-    // Define the texture unit samplers/uniforms
-    image_buffer_id = TypeImage(f32_id, spv::Dim::Buffer, 0, 0, 0, 1, spv::ImageFormat::Unknown);
-    image2d_id = TypeImage(f32_id, spv::Dim::Dim2D, 0, 0, 0, 1, spv::ImageFormat::Unknown);
-    image_cube_id = TypeImage(f32_id, spv::Dim::Cube, 0, 0, 0, 1, spv::ImageFormat::Unknown);
-    sampler_id = TypeSampler();
-
-    texture_buffer_lut_lf_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 2);
-    texture_buffer_lut_rg_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 3);
-    texture_buffer_lut_rgba_id = DefineUniformConst(TypeSampledImage(image_buffer_id), 0, 4);
-    tex0_id = DefineUniformConst(image2d_id, 1, 0);
-    tex1_id = DefineUniformConst(image2d_id, 1, 1);
-    tex2_id = DefineUniformConst(image2d_id, 1, 2);
-    tex_cube_id = DefineUniformConst(image_cube_id, 1, 3);
-    tex0_sampler_id = DefineUniformConst(sampler_id, 2, 0);
-    tex1_sampler_id = DefineUniformConst(sampler_id, 2, 1);
-    tex2_sampler_id = DefineUniformConst(sampler_id, 2, 2);
-    tex_cube_sampler_id = DefineUniformConst(sampler_id, 2, 3);
-
-    // Define built-ins
-    gl_frag_coord_id = DefineVar(vec_ids.Get(4), spv::StorageClass::Input);
-    gl_frag_depth_id = DefineVar(f32_id, spv::StorageClass::Output);
-    Decorate(gl_frag_coord_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragCoord);
-    Decorate(gl_frag_depth_id, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
-}
-
-std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config) {
-    FragmentModule module{config};
-    module.Generate();
-    return module.Assemble();
-}
-
-} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen_spv.h
@ -1,227 +0,0 @@
-// Copyright 2022 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <sirit/sirit.h>
-#include "video_core/renderer_vulkan/vk_shader_gen.h"
-
-namespace Vulkan {
-
-using Sirit::Id;
-
-struct VectorIds {
-    /// Returns the type id of the vector with the provided size
-    [[nodiscard]] constexpr Id Get(u32 size) const {
-        return ids[size - 2];
-    }
-
-    std::array<Id, 3> ids;
-};
-
-class FragmentModule : public Sirit::Module {
-    static constexpr u32 NUM_TEV_STAGES = 6;
-    static constexpr u32 NUM_LIGHTS = 8;
-    static constexpr u32 NUM_LIGHTING_SAMPLERS = 24;
-public:
-    FragmentModule(const PicaFSConfig& config);
-    ~FragmentModule();
-
-    /// Emits SPIR-V bytecode corresponding to the provided pica fragment configuration
-    void Generate();
-
-    /// Undos the vulkan perspective transformation and applies the pica one
-    void WriteDepth();
-
-    /// Writes the code to emulate fragment lighting
-    void WriteLighting();
-
-    /// Writes the code to emulate the specified TEV stage
-    void WriteTevStage(s32 index);
-
-    /// Writes the if-statement condition used to evaluate alpha testing.
-    /// Returns true if the fragment was discarded
-    [[nodiscard]] bool WriteAlphaTestCondition(Pica::FramebufferRegs::CompareFunc func);
-
-    /// Samples the current fragment texel from the provided texture unit
-    [[nodiscard]] Id SampleTexture(u32 texture_unit);
-
-    /// Rounds the provided variable to the nearest 1/255th
-    [[nodiscard]] Id Byteround(Id variable_id, u32 size = 1);
-
-    /// Lookups the lighting LUT at the provided lut_index
-    [[nodiscard]] Id LookupLightingLUT(Id lut_index, Id index, Id delta);
-
-    /// Writes the specified TEV stage source component(s)
-    [[nodiscard]] Id AppendSource(Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
-
-    /// Writes the color components to use for the specified TEV stage color modifier
-    [[nodiscard]] Id AppendColorModifier(Pica::TexturingRegs::TevStageConfig::ColorModifier modifier,
-                                         Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
-
-    /// Writes the alpha component to use for the specified TEV stage alpha modifier
-    [[nodiscard]] Id AppendAlphaModifier(Pica::TexturingRegs::TevStageConfig::AlphaModifier modifier,
-                                         Pica::TexturingRegs::TevStageConfig::Source source, s32 index);
-
-    /// Writes the combiner function for the color components for the specified TEV stage operation
-    [[nodiscard]] Id AppendColorCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
-
-    /// Writes the combiner function for the alpha component for the specified TEV stage operation
-    [[nodiscard]] Id AppendAlphaCombiner(Pica::TexturingRegs::TevStageConfig::Operation operation);
-
-    /// Loads the member specified from the shader_data uniform struct
-    template <typename... Ids>
-    [[nodiscard]] Id GetShaderDataMember(Id type, Ids... ids) {
-        const Id uniform_ptr{TypePointer(spv::StorageClass::Uniform, type)};
-        return OpLoad(type, OpAccessChain(uniform_ptr, shader_data_id, ids...));
-    }
-
-    /// Pads the provided vector by inserting args at the end
-    template <typename... Args>
-    [[nodiscard]] Id PadVectorF32(Id vector, Id pad_type_id, Args&&... args) {
-        return OpCompositeConstruct(pad_type_id, vector, ConstF32(args...));
-    }
-
-    /// Defines a input variable
-    [[nodiscard]] Id DefineInput(Id type, u32 location) {
-        const Id input_id{DefineVar(type, spv::StorageClass::Input)};
-        Decorate(input_id, spv::Decoration::Location, location);
-        return input_id;
-    }
-
-    /// Defines a input variable
-    [[nodiscard]] Id DefineOutput(Id type, u32 location) {
-        const Id output_id{DefineVar(type, spv::StorageClass::Output)};
-        Decorate(output_id, spv::Decoration::Location, location);
-        return output_id;
-    }
-
-    /// Defines a uniform constant variable
-    [[nodiscard]] Id DefineUniformConst(Id type, u32 set, u32 binding) {
-        const Id uniform_id{DefineVar(type, spv::StorageClass::UniformConstant)};
-        Decorate(uniform_id, spv::Decoration::DescriptorSet, set);
-        Decorate(uniform_id, spv::Decoration::Binding, binding);
-        return uniform_id;
-    }
-
-    [[nodiscard]] Id DefineVar(Id type, spv::StorageClass storage_class) {
-        const Id pointer_type_id{TypePointer(storage_class, type)};
-        return AddGlobalVariable(pointer_type_id, storage_class);
-    }
-
-    /// Returns the id of a signed integer constant of value
-    [[nodiscard]] Id ConstU32(u32 value) {
-        return Constant(u32_id, value);
-    }
-
-    template <typename... Args>
-    [[nodiscard]] Id ConstU32(Args&&... values) {
-        constexpr auto size = sizeof...(values);
-        static_assert(size >= 2 && size <= 4);
-        const std::array constituents{Constant(u32_id, values)...};
-        return ConstantComposite(uvec_ids.Get(size), constituents);
-    }
-
-    /// Returns the id of a signed integer constant of value
-    [[nodiscard]] Id ConstS32(s32 value) {
-        return Constant(i32_id, value);
-    }
-
-    template <typename... Args>
-    [[nodiscard]] Id ConstS32(Args&&... values) {
-        constexpr auto size = sizeof...(values);
-        static_assert(size >= 2 && size <= 4);
-        const std::array constituents{Constant(i32_id, values)...};
-        return ConstantComposite(ivec_ids.Get(size), constituents);
-    }
-
-    /// Returns the id of a float constant of value
-    [[nodiscard]] Id ConstF32(float value) {
-        return Constant(f32_id, value);
-    }
-
-    template <typename... Args>
-    [[nodiscard]] Id ConstF32(Args... values) {
-        constexpr auto size = sizeof...(values);
-        static_assert(size >= 2 && size <= 4);
-        const std::array constituents{Constant(f32_id, values)...};
-        return ConstantComposite(vec_ids.Get(size), constituents);
-    }
-
-private:
-    void DefineArithmeticTypes();
-    void DefineEntryPoint();
-    void DefineUniformStructs();
-    void DefineInterface();
-
-private:
-    PicaFSConfig config;
-    Id void_id{};
-    Id bool_id{};
-    Id f32_id{};
-    Id i32_id{};
-    Id u32_id{};
-
-    VectorIds vec_ids{};
-    VectorIds ivec_ids{};
-    VectorIds uvec_ids{};
-
-    Id image2d_id{};
-    Id image_cube_id{};
-    Id image_buffer_id{};
-    Id sampler_id{};
-    Id shader_data_id{};
-
-    Id primary_color_id{};
-    Id texcoord0_id{};
-    Id texcoord1_id{};
-    Id texcoord2_id{};
-    Id texcoord0_w_id{};
-    Id normquat_id{};
-    Id view_id{};
-    Id color_id{};
-
-    Id gl_frag_coord_id{};
-    Id gl_frag_depth_id{};
-
-    Id tex0_id{};
-    Id tex1_id{};
-    Id tex2_id{};
-    Id tex_cube_id{};
-    Id tex0_sampler_id{};
-    Id tex1_sampler_id{};
-    Id tex2_sampler_id{};
-    Id tex_cube_sampler_id{};
-    Id texture_buffer_lut_lf_id{};
-    Id texture_buffer_lut_rg_id{};
-    Id texture_buffer_lut_rgba_id{};
-
-    Id texture_buffer_lut_lf{};
-
-    Id rounded_primary_color{};
-    Id primary_fragment_color{};
-    Id secondary_fragment_color{};
-    Id combiner_buffer{};
-    Id next_combiner_buffer{};
-    Id last_tex_env_out{};
-
-    Id color_results_1{};
-    Id color_results_2{};
-    Id color_results_3{};
-    Id alpha_results_1{};
-    Id alpha_results_2{};
-    Id alpha_results_3{};
-};
-
-/**
- * Generates the SPIR-V fragment shader program source code for the current Pica state
- * @param config ShaderCacheKey object generated for the current Pica state, used for the shader
- *               configuration (NOTE: Use state in this struct only, not the Pica registers!)
- * @param separable_shader generates shader that can be used for separate shader object
- * @returns String of the shader source code
- */
-std::vector<u32> GenerateFragmentShaderSPV(const PicaFSConfig& config);
-
-} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_util.cpp
@ -6,7 +6,6 @@
 #include <glslang/Include/ResourceLimits.h>
 #include <glslang/Public/ShaderLang.h>
 #include "common/assert.h"
-#include "common/microprofile.h"
 #include "common/logging/log.h"
 #include "video_core/renderer_vulkan/vk_shader_util.h"

@ -179,8 +178,6 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
                       includer)) {
        LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(),
                     shader->getInfoDebugLog());
-        LOG_CRITICAL(Render_Vulkan, "{}", code);
-        ASSERT(false);
        return VK_NULL_HANDLE;
    }

@ -218,22 +215,10 @@ vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, v
        LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
    }

-    return CompileSPV(out_code, device);
-}
+    const vk::ShaderModuleCreateInfo shader_info = {.codeSize = out_code.size() * sizeof(u32),
+                                                    .pCode = out_code.data()};

-MICROPROFILE_DEFINE(Vulkan_SPVCompilation, "Vulkan", "SPIR-V Shader Compilation", MP_RGB(100, 255, 52));
-vk::ShaderModule CompileSPV(std::vector<u32> code, vk::Device device) {
-    MICROPROFILE_SCOPE(Vulkan_SPVCompilation);
-    const vk::ShaderModuleCreateInfo shader_info = {.codeSize = code.size() * sizeof(u32),
-                                                    .pCode = code.data()};
-    try {
-        return device.createShaderModule(shader_info);
-    } catch (vk::SystemError& err) {
-        LOG_CRITICAL(Render_Vulkan, "{}", err.what());
-        UNREACHABLE();
-    }
-
-    return VK_NULL_HANDLE;
+    return device.createShaderModule(shader_info);
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_util.h
+++ b/src/video_core/renderer_vulkan/vk_shader_util.h
@ -13,6 +13,4 @@ enum class ShaderOptimization { High = 0, Debug = 1 };
 vk::ShaderModule Compile(std::string_view code, vk::ShaderStageFlagBits stage, vk::Device device,
                         ShaderOptimization level);

-vk::ShaderModule CompileSPV(std::vector<u32> code, vk::Device device);
-
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@ -5,7 +5,6 @@
 #include <algorithm>
 #include "common/alignment.h"
 #include "common/assert.h"
-#include "common/microprofile.h"
 #include "common/logging/log.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_stream_buffer.h"
@ -15,40 +14,30 @@

 namespace Vulkan {

-[[nodiscard]] vk::AccessFlags MakeAccessFlags(vk::BufferUsageFlagBits usage) {
+inline auto ToVkAccessStageFlags(vk::BufferUsageFlagBits usage) {
+    std::pair<vk::AccessFlags, vk::PipelineStageFlags> result{};
    switch (usage) {
    case vk::BufferUsageFlagBits::eVertexBuffer:
-        return vk::AccessFlagBits::eVertexAttributeRead;
+        result = std::make_pair(vk::AccessFlagBits::eVertexAttributeRead,
+                                vk::PipelineStageFlagBits::eVertexInput);
+        break;
    case vk::BufferUsageFlagBits::eIndexBuffer:
-        return vk::AccessFlagBits::eIndexRead;
+        result =
+            std::make_pair(vk::AccessFlagBits::eIndexRead, vk::PipelineStageFlagBits::eVertexInput);
    case vk::BufferUsageFlagBits::eUniformBuffer:
-        return vk::AccessFlagBits::eUniformRead;
+        result = std::make_pair(vk::AccessFlagBits::eUniformRead,
+                                vk::PipelineStageFlagBits::eVertexShader |
+                                    vk::PipelineStageFlagBits::eGeometryShader |
+                                    vk::PipelineStageFlagBits::eFragmentShader);
    case vk::BufferUsageFlagBits::eUniformTexelBuffer:
-        return vk::AccessFlagBits::eShaderRead;
+        result = std::make_pair(vk::AccessFlagBits::eShaderRead,
+                                vk::PipelineStageFlagBits::eFragmentShader);
+        break;
    default:
        LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
-        UNREACHABLE();
    }
-    return vk::AccessFlagBits::eNone;
-}

-[[nodiscard]] vk::PipelineStageFlags MakePipelineStage(vk::BufferUsageFlagBits usage) {
-    switch (usage) {
-    case vk::BufferUsageFlagBits::eVertexBuffer:
-        return vk::PipelineStageFlagBits::eVertexInput;
-    case vk::BufferUsageFlagBits::eIndexBuffer:
-        return vk::PipelineStageFlagBits::eVertexInput;
-    case vk::BufferUsageFlagBits::eUniformBuffer:
-        return vk::PipelineStageFlagBits::eVertexShader |
-               vk::PipelineStageFlagBits::eGeometryShader |
-               vk::PipelineStageFlagBits::eFragmentShader;
-    case vk::BufferUsageFlagBits::eUniformTexelBuffer:
-        return vk::PipelineStageFlagBits::eFragmentShader;
-    default:
-        LOG_CRITICAL(Render_Vulkan, "Unknown usage flag {}", usage);
-        UNREACHABLE();
-    }
-    return vk::PipelineStageFlagBits::eNone;
+    return result;
 }

 StagingBuffer::StagingBuffer(const Instance& instance, u32 size, bool readback)
@ -131,90 +120,98 @@ StreamBuffer::~StreamBuffer() {

 std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
    ASSERT(size <= total_size && alignment <= total_size);
+    Bucket& bucket = buckets[bucket_index];

    if (alignment > 0) {
-        buffer_offset = Common::AlignUp(buffer_offset, alignment);
+        bucket.cursor = Common::AlignUp(bucket.cursor, alignment);
    }

-    bool invalidate = false;
-    const u32 new_offset = buffer_offset + size;
-    if (u32 new_index = new_offset / bucket_size; new_index != bucket_index) {
-        if (new_index >= BUCKET_COUNT) {
-            if (readback) {
-                Invalidate();
-            } else {
-                Flush();
-            }
-            buffer_offset = 0;
-            flush_offset = 0;
-            new_index = 0;
-            invalidate = true;
-        }
-        ticks[bucket_index] = scheduler.CurrentTick();
-        scheduler.Wait(ticks[new_index]);
-        bucket_index = new_index;
+    // If we reach bucket boundaries move over to the next one
+    if (bucket.cursor + size > bucket_size) {
+        bucket.gpu_tick = scheduler.CurrentTick();
+        Flush();
+        MoveNextBucket();
+        return Map(size, alignment);
    }

+    const bool invalidate = std::exchange(bucket.invalid, false);
+    const u32 buffer_offset = bucket_index * bucket_size + bucket.cursor;
    u8* mapped = reinterpret_cast<u8*>(staging.mapped.data() + buffer_offset);
+
    return std::make_tuple(mapped, buffer_offset, invalidate);
 }

 void StreamBuffer::Commit(u32 size) {
-    buffer_offset += size;
+    buckets[bucket_index].cursor += size;
 }

 void StreamBuffer::Flush() {
    if (readback) {
+        LOG_WARNING(Render_Vulkan, "Cannot flush read only buffer");
        return;
    }

-    const u32 flush_size = buffer_offset - flush_offset;
-    ASSERT(flush_size <= total_size);
-    ASSERT(flush_offset + flush_size <= total_size);
+    Bucket& bucket = buckets[bucket_index];
+    const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
+    const u32 flush_size = bucket.cursor - bucket.flush_cursor;
+    ASSERT(flush_size <= bucket_size);

    if (flush_size > 0) [[likely]] {
+        // Ensure all staging writes are visible to the host memory domain
        VmaAllocator allocator = instance.GetAllocator();
-        vmaFlushAllocation(allocator, staging.allocation, flush_offset, flush_size);
+        vmaFlushAllocation(allocator, staging.allocation, flush_start, flush_size);
        if (gpu_buffer) {
-            scheduler.Record([this, flush_offset = flush_offset, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
+            scheduler.Record([this, flush_start, flush_size](vk::CommandBuffer, vk::CommandBuffer upload_cmdbuf) {
                const vk::BufferCopy copy_region = {
-                    .srcOffset = flush_offset, .dstOffset = flush_offset, .size = flush_size};
+                    .srcOffset = flush_start, .dstOffset = flush_start, .size = flush_size};

                upload_cmdbuf.copyBuffer(staging.buffer, gpu_buffer, copy_region);

+                auto [access_mask, stage_mask] = ToVkAccessStageFlags(usage);
                const vk::BufferMemoryBarrier buffer_barrier = {
                    .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
-                    .dstAccessMask = MakeAccessFlags(usage),
+                    .dstAccessMask = access_mask,
                    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
                    .buffer = gpu_buffer,
-                    .offset = flush_offset,
+                    .offset = flush_start,
                    .size = flush_size};

-                upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
-                                              MakePipelineStage(usage),
+                upload_cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
                                              vk::DependencyFlagBits::eByRegion, {}, buffer_barrier,
                                              {});
            });
        }
-        flush_offset = buffer_offset;
+        bucket.flush_cursor += flush_size;
    }
 }

 void StreamBuffer::Invalidate() {
    if (!readback) {
+        LOG_WARNING(Render_Vulkan, "Cannot invalidate write only buffer");
        return;
    }

-    const u32 flush_size = buffer_offset - flush_offset;
-    ASSERT(flush_size <= total_size);
-    ASSERT(flush_offset + flush_size <= total_size);
+    Bucket& bucket = buckets[bucket_index];
+    const u32 flush_start = bucket_index * bucket_size + bucket.flush_cursor;
+    const u32 flush_size = bucket.cursor - bucket.flush_cursor;
+    ASSERT(flush_size <= bucket_size);

    if (flush_size > 0) [[likely]] {
+        // Ensure the staging memory can be read by the host
        VmaAllocator allocator = instance.GetAllocator();
-        vmaInvalidateAllocation(allocator, staging.allocation, flush_offset, flush_size);
-        flush_offset = buffer_offset;
+        vmaInvalidateAllocation(allocator, staging.allocation, flush_start, flush_size);
+        bucket.flush_cursor += flush_size;
    }
 }

+void StreamBuffer::MoveNextBucket() {
+    bucket_index = (bucket_index + 1) % BUCKET_COUNT;
+    Bucket& next_bucket = buckets[bucket_index];
+    scheduler.Wait(next_bucket.gpu_tick);
+    next_bucket.cursor = 0;
+    next_bucket.flush_cursor = 0;
+    next_bucket.invalid = true;
+}
+
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@ -71,6 +71,17 @@ public:
        return views[index];
    }

+private:
+    /// Moves to the next bucket
+    void MoveNextBucket();
+
+    struct Bucket {
+        bool invalid = false;
+        u32 gpu_tick = 0;
+        u32 cursor = 0;
+        u32 flush_cursor = 0;
+    };
+
 private:
    const Instance& instance;
    Scheduler& scheduler;
@ -79,14 +90,12 @@ private:
    VmaAllocation allocation{};
    vk::BufferUsageFlagBits usage;
    std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
-    std::size_t view_count = 0;
+    std::array<Bucket, BUCKET_COUNT> buckets;
+    u32 view_count = 0;
    u32 total_size = 0;
    u32 bucket_size = 0;
-    u32 buffer_offset = 0;
-    u32 flush_offset = 0;
    u32 bucket_index = 0;
    bool readback = false;
-    std::array<u64, BUCKET_COUNT> ticks{};
 };

 } // namespace Vulkan
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
GPUCode	cfd7bfbb20	renderer_vulkan: Scheduler and presentation rewrite * This commit ports yuzu's async scheduler replacing our older and crummier version Commands are recorded by the scheduler and processed by a separate worker thread * Shader compilation and queue submission are also moved to that thread to reduce stutters	2022-10-23 18:15:03 +03:00
emufan4568	fbe471693d	externals: Update vulkan-headers	2022-10-21 18:09:47 +03:00
emufan4568	4670114077	common: Remove concepts usage	2022-10-21 18:07:38 +03:00
Vitor Kiguchi	b5d4473bbb	citra_qt: Include unordered_map to fix macOS compilation	2022-10-19 20:42:33 +03:00
Vitor Kiguchi	97455ae5f8	renderer_vulkan: Fix shader hash type	2022-10-19 20:41:18 +03:00
emufan4568	69cfff9022	code: Remove usages of std::ranges * MacOS is still runining my C++ 20 fun	2022-10-19 20:39:33 +03:00
emufan4568	7dd5049ae1	renderer_vulkan: Prefer immediate over mailbox present mode	2022-10-19 20:37:42 +03:00
emufan4568	1396d39cf7	renderer_vulkan: Bump vertex buffer size * So software shaders don't crash	2022-10-19 20:05:13 +03:00
emufan4568	55220d70df	renderer_vulkan: Add more microprofile targets	2022-10-18 22:27:12 +03:00
emufan4568	aebb7f2d06	renderer_vulkan: Improve StreamBuffer API and use it in TextureRuntime * Also use separate upload and download buffers optimized for write and readback respectively. This gives a huge 20+ FPS boost in most games which were bottlenecked by slow reads	2022-10-18 22:17:30 +03:00
emufan4568	c3956ee207	renderer_vulkan: Fix allocation caching bug	2022-10-17 23:15:06 +03:00
emufan4568	9b582f2ba5	renderer_opengl: Port scaled upload/download code from vulkan	2022-10-17 21:30:32 +03:00
emufan4568	1f16ecad1a	renderer_vulkan: Include algorithm in vk_common * Appears to be a bug in vulkan-hpp	2022-10-17 20:06:36 +03:00
emufan4568	2d5e588d89	renderer_vulkan: Use linear filtering when possible * Fixes blocky artifacts in Samus Returns	2022-10-17 19:44:27 +03:00
emufan4568	d551e2adc3	renderer_vulkan: Abstract descriptor management * The pipeline cache was starting to get cluttered	2022-10-17 19:36:03 +03:00
emufan4568	561398bbcd	renderer_vulkan: Bump descriptor set allocation limit	2022-10-17 17:41:43 +03:00
emufan4568	23cff45251	renderer_vulkan: Fix storage descriptor binding and respect color mask * RGBA8 surfaces now expose an additional R32Uint view used for storage descriptors. The format is guaranteed by the spec to support atomic loads/stores. This requires the mutable flag which incurs a performance cost, but might be better than breaking the current renderpass each draw when rendering shadows, especially on mobile * Color mask is also implemented which fixes Street Fighter and Monster Hunter Stories	2022-10-17 14:51:47 +03:00
emufan4568	0223fa756c	renderer_vulkan: Implement depth uploads with blit	2022-10-16 18:06:12 +03:00
emufan4568	b7fa091db0	renderer_vulkan: Use intermediate copy when framebuffer is used both as attachment and shader input	2022-10-16 17:41:22 +03:00
emufan4568	46ae192c05	renderer_vulkan: Respect disk shader option	2022-10-16 15:04:54 +03:00
emufan4568	e059fc5f4f	renderer_vulkan: Fix staging buffer size	2022-10-16 14:47:03 +03:00
emufan4568	0fe4225b22	renderer_vulkan: Catch and log more runtime errors * Also add the ability to enable command buffer dumping which is very useful	2022-10-16 13:09:51 +03:00
emufan4568	3a60a6687d	renderer_vulkan: Batch allocate descriptor sets * Less driver calls should lead to better performance	2022-10-16 11:03:48 +03:00
emufan4568	74b8081114	renderer_vulkan: Emulate border color if possible	2022-10-16 10:14:08 +03:00
emufan4568	69db7d9d0d	renderer_vulkan: Implement scaled uploads and downloads * This commit includes large changes to have textures are handling. Instead of using ImageAlloc, Surface is used instead which provides multiple benefits: automatic recycling on destruction and ability to use the TextureRuntime interface to simplify operations * Layout tracking is also implemented which allows transitioning of individual mip levels without errors * This fixes graphical errors in multiple games which relied on framebuffer uploads	2022-10-16 09:47:29 +03:00
emufan4568	caf596e5eb	renderer_vulkan: Fix renderpass issues * The cache didn't take into account the framebuffer and render area used, so if these changed the renderpass wouldn't restart. This caused graphical bugs in Pokemon X/Y	2022-10-15 01:34:26 +03:00
emufan4568	9950e2aab5	renderer_vulkan: Update stencil compare mask	2022-10-15 00:11:05 +03:00
emufan4568	13fe59ae55	renderer_opengl: Fix spotlight in Luigi's Mansion	2022-10-14 23:29:39 +03:00
emufan4568	d700e2c4cc	citra_qt: Fix graphics api indicator alignment	2022-10-14 23:25:54 +03:00
emufan4568	59aeeca8ca	renderer_opengl: Fix OpenGLES issues * Always request a 4.4 context until I figure out how to get Qt to cooperate * Use RGBA for BGR since the converted table will do that conversion	2022-10-14 21:03:11 +03:00
emufan4568	54414d5a8f	renderer_vulkan: Report perf stats	2022-10-14 21:03:11 +03:00
emufan4568	6086bfabca	renderer_vulkan: Better error handling	2022-10-14 21:03:11 +03:00
emufan4568	069df7741d	renderer_vulkan: Allow direct allocation of images	2022-10-14 21:03:11 +03:00
emufan4568	8ec86d07d7	renderer_vulkan: Fix incorrect depth format detection * Intel iGPUs don't support blit on all depth/stencil formats which caused issues since the runtime checks for this while the renderpass cache does not	2022-10-14 21:03:11 +03:00
emufan4568	c625a5a0b4	renderer_vulkan: Actually minize state changes * Keep track of the current state and only update it when needed. Previously games would set the same state over and over cluttering renderdoc logs	2022-10-14 21:03:11 +03:00
emufan4568	04a188c96d	renderer_vulkan: Fix broken sync without timeline semaphores	2022-10-14 21:03:11 +03:00
emufan4568	8152881f06	renderer_vulkan: Allocate descriptor sets during reinterpretation	2022-10-14 21:03:11 +03:00
emufan4568	0f4dc90acc	renderer_vulkan: Enable logic ops and fix swapchain resizing	2022-10-14 21:03:11 +03:00
emufan4568	d77574100d	renderer_vulkan: Clear stencil with renderpass * Fixes outline retension in pokemon games	2022-10-14 21:03:11 +03:00
emufan4568	c10cf4414f	renderer_vulkan: Fix pipeline cache crashes	2022-10-14 21:03:11 +03:00
GPUCode	4d7a00f324	renderer_vulkan: Optimize tiled format convertion + fix vertex buffer alignment * Integrate format convertion to the morton copy function, removing the need for an intermediate copy and convertion pass. This should be beneficial for performance especially since most games use tiled textures * Also bump vertex buffer size to avoid crashes with hardware shaders and provide correct offset on normal draws which fixes glitches in pokemon Y * Reduce the local group size to 8 in the D24S8 compute shader which fixes graphical issues in the afformentioned pokemon games at native resolution * Set LOD to 0 instead of 0.25 to fix another glitch in pokemon y	2022-10-14 21:03:11 +03:00
GPUCode	e56d069ed5	renderer_opengl: Fix broken texture copy * Resolves graphical bugs in Professor Layton vs Ace Attorney when using OpenGL	2022-10-14 21:03:11 +03:00
GPUCode	e33adc1b11	renderer_vulkan: Pipeline cache fixes * Delete cache file if found invalid * Name it after the vendor/device ids so each physical devices gets a separate cache	2022-10-14 21:03:11 +03:00
GPUCode	11de7700aa	video_core: Fix renderpass cache bug and introduce RGBA -> BGR converter	2022-10-14 21:03:11 +03:00
GPUCode	b0fc94f155	renderer_opengl: Specify precision in compute shader and add RGB5A1 converter * Fixes OpenGLES crash	2022-10-14 21:03:11 +03:00
GPUCode	0d4e530805	renderer_vulkan: Complete hardware shader support * With these changes all commercial games I tested work fine and get a massive performance boost	2022-10-14 21:03:11 +03:00
GPUCode	9a1cf869f9	renderer_vulkan: Begin hardware shader support * Still experimental and works only with homebrew	2022-10-14 21:03:11 +03:00
GPUCode	523120e03d	citra: Fix build issues with MinGW and MSVC	2022-10-14 21:03:11 +03:00
GPUCode	7f26562dce	renderer_vulkan: Fix warnings and cleanup	2022-10-14 21:03:11 +03:00
GPUCode	f750da1508	code: Run clang-format	2022-10-14 21:03:11 +03:00
GPUCode	711b699689	code: Address build issues	2022-10-14 21:03:11 +03:00
GPUCode	7f7408b81e	video_core: Re-implement format reinterpretation * Same as before but D24S8 to RGBA8 is switched to a compute shader which should provide better throughput and is much simpler to implement in Vulkan	2022-10-14 21:03:11 +03:00
GPUCode	2da4c9ca90	citra_qt: Add physical device selection dialog	2022-10-14 21:03:11 +03:00
GPUCode	20ccb995b1	code: Resolve unused variable warnings	2022-10-14 21:03:11 +03:00
GPUCode	601aac2a26	renderer_opengl: Unbind unused framebuffer targets * Fixes graphical glitches in many games for some reason	2022-10-14 21:03:11 +03:00
GPUCode	924257b2cc	renderer_opengl: Emulate texture copy with blit for now	2022-10-14 21:03:11 +03:00
GPUCode	9e34ff40ed	renderer_opengl: Address buffer overflow	2022-10-14 21:03:11 +03:00
GPUCode	ba3c84168a	video_core: Small code improvements	2022-10-14 21:03:11 +03:00
GPUCode	77d6be6bde	renderer_vulkan: Don't sample from mipmaps when using texture cubes * Mipmaps for texture cubes are unimplemented in the rasterizer cache, so sampling from mipmaps will return nothing	2022-10-14 21:03:11 +03:00
GPUCode	a6e2bcd986	citra_qt: Switch all strings to multiarg	2022-10-14 21:03:11 +03:00
GPUCode	2c30889fdc	code: Address more compiler warnings	2022-10-14 21:03:11 +03:00
GPUCode	67a76bec5c	citra_qt: Fix more warnings/deprecated functions	2022-10-14 21:03:11 +03:00
GPUCode	98b7c33f62	input_common: Small fix	2022-10-14 21:03:11 +03:00
GPUCode	dbfa06c6b1	citra_qt: Improve graphics API intergration * Add renderer debug option which toggles debug output in OpenGL/validation layers in Vulkan * Fix many warnings and replace deprecated Qt functionailty with newer alternatives	2022-10-14 21:03:11 +03:00
GPUCode	88d0b7de13	rasterizer_cache: Code cleanup * Merge utils and types to a single header	2022-10-14 21:03:11 +03:00
GPUCode	de149e3ee9	texture_decode: Prefer std::memcpy where possible	2022-10-14 21:03:11 +03:00
GPUCode	99877f9465	renderer_vulkan: Rework format handling * This is a pretty large commit that aims to solve some issues with the current format system * The instance now builds at application initialization an array of format traits for each pixel format that includes information such as blit/attachment/storage support and fallback formats * The runtime doesn't ask the instance for formats but receives these traits and can dedice on its own what to build For now we do the same as before, we require both blit and attachment support * Morton swizzling also sees many bug fixes. The previous code was very hacky and didn't work for partial texture updates. It was also inconsistent, as it would take a tiled_buffer and write to the middle of linear * Now the functions have been greatly simplified and adjusted to work better with std::span. This fixes out of bounds errors and texture glitches (like the display in Mario Kart 7)	2022-10-14 21:03:11 +03:00
GPUCode	075090569f	renderer_vulkan: Handle scheduler switches properly	2022-10-14 21:03:10 +03:00
GPUCode	f6af97fc16	vk_platform: Fix wayland build	2022-10-14 21:03:10 +03:00
GPUCode	5fa4a32cf6	renderer_vulkan: Rewrite stream buffer + other fixes * Emulate blend color and clip planes correctly * Don't hack the depth in the vertex shader, use VK_EXT_depth_clip_control for now to set the range to -1, 1 * Rewrite the stream buffer to remove flickering problems. The new implementation doesn't try to be smart about holding memory. It divides the allocation in SCHEDULER_COMMAND_COUNT buckets and automatically switches between them based on the current slot index	2022-10-14 21:03:10 +03:00
GPUCode	fd77483a5f	vk_rasterizer: Bump vertex buffer size * Helps with the stuttering, indicating the issue is with the vertex buffer somehow	2022-10-14 21:03:10 +03:00
GPUCode	1bf1217a18	pica_to_vk: Set cull mode correctly	2022-10-14 21:03:10 +03:00
GPUCode	40db7b90fa	renderer_vulkan: Minimize state changes * Store current renderpass/pipelines and only rebind when they change * Enable extended dynamic state support and only apply them when they change	2022-10-14 21:03:10 +03:00
GPUCode	8e1a23d971	rasterizer_cache: Explicitely pass end_offset to swizzle functions * This addresses overflow issues	2022-10-14 21:03:10 +03:00
GPUCode	9e8c403793	renderer_vulkan: Implement partial color/depth clears	2022-10-14 21:03:10 +03:00
GPUCode	0e047a7a6e	renderer_vulkan: Add second screen and remove renderpass breakage	2022-10-14 21:03:10 +03:00
GPUCode	66158841cb	renderer_vulkan: Improve task scheduler synchronization * Use multiple semaphores for swapchain sync and improve the Submit API	2022-10-14 21:03:10 +03:00
GPUCode	634e6427a8	renderer_vulkan: Use timeline semaphores if available	2022-10-14 21:03:10 +03:00
GPUCode	c1f46ed710	renderer_vulkan: Pipeline cache fixes	2022-10-14 21:03:10 +03:00
GPUCode	4776e21dd9	renderer_vulkan: Isolate surface creation to vk_platform.cpp * Also cleanup the init code somewhat	2022-10-14 21:03:10 +03:00
GPUCode	9e7b3bfa16	renderer_vulkan: Add ABGR -> RGBA byteswap * Vulkan doesn't support VK_FORMAT_R8G8B8A8_UNORM_PACK32 unfortunately. Fixes graphical issues on the gpusprites demo	2022-10-14 21:03:10 +03:00
GPUCode	62d561c004	externals: Trim down glslang build * When the install option is turned on, glslang will override the install dir which causes SDL2 to fail	2022-10-14 21:03:10 +03:00
GPUCode	34a0571dc3	common: math_util: Include <compare>	2022-10-14 21:03:10 +03:00
GPUCode	e46970a84a	cmake: Lower cmake requirement to 3.14	2022-10-14 21:03:10 +03:00
GPUCode	ccb1872604	renderer_vulkan: Address more validation errors and stop memory leakage * The transition settings are temporary until I write a proper layout tracking system	2022-10-14 21:03:10 +03:00
emufan4568	1cd0b04399	renderer_vulkan: Fix some validation errors * Temporarily add glm until I figure out how to fix the alignment	2022-10-14 21:03:10 +03:00
emufan4568	ab3a228e5e	renderer_vulkan: Implement renderer and rasterizer classes * Also WIP. Vulkan crashes when allocating command buffers, need to investigate...	2022-10-14 21:03:10 +03:00
emufan4568	7ae0d0ef27	renderer_vulkan: Add experimental Vulkan renderer * Stil extremelly WIP and missing the rasterizer/renderer classes	2022-10-14 21:03:10 +03:00
emufan4568	19c82a76a3	externals: Add vulkan headers and vma	2022-10-14 21:03:10 +03:00
emufan4568	366cdc854f	rasterizer_cache: Refactor texture cube interface * Reuse our Surface class instead of having a separate one, to avoid reimplementing stuff in the backend	2022-10-14 21:03:10 +03:00
emufan4568	d2fd8030dd	gl_texture_runtime: Clean up texture upload/download code * Improve readability and code clarity	2022-10-14 21:03:10 +03:00
emufan4568	a932a9f662	rasterizer_cache: Use Common::Rectangle everywhere * Make a nice alias for it and use it instead of having Rect2D/Region2D. Makes the new design less intrusive to the current cache	2022-10-14 21:03:10 +03:00
emufan4568	ca81c5a5f3	rasterizer_cache: Make into template * This is the final step, now RasterizerCache is compltely decoupled from OpenGL (technically not yet, but that's talking details). For now texture filtering and some GLES paths have been disabled and will be reimplemented in the following commits	2022-10-14 21:03:10 +03:00
emufan4568	5d62b033df	rasterizer_cache: Use PBO staging buffer cache for texture uploads/downloads	2022-10-14 21:03:10 +03:00
emufan4568	36d584cf3c	rasterizer_cache: Reorder methods	2022-10-14 21:03:10 +03:00
emufan4568	7eb590153b	rasterizer_cache: Remove remnants of cached_pages	2022-10-14 21:03:10 +03:00
emufan4568	e99ef32c6b	rasterizer_cache: Fix texture cube blitting * The target was GL_TEXTURE_2D instead of GL_TEXTURE_CUBE_MAP_*	2022-10-14 21:03:10 +03:00
emufan4568	066bdcfc40	morton_swizzle: Implement texture formats in UNSWIZZLE_TABLE * I can now remove that loop that has been messing with my OCD	2022-10-14 21:03:10 +03:00
emufan4568	dd1c06a55b	morton_swizzle: Use tiled_buffer instead of reading data from g_memory * It's much safer and removes hardcoded global state usage	2022-10-14 21:03:10 +03:00
emufan4568	fa7edc4a9c	rasterizer_accelerated: Zero intialize cached_pages * Resolves random crashes because count takes random values	2022-10-14 21:03:10 +03:00
emufan4568	994b27ab5b	texture_runtime: Add staging buffer lock mechanism	2022-10-14 21:03:10 +03:00
emufan4568	77a99506cb	cached_surface: Remove custom texture logic * Makes things more complicated and is in the way. It's probably already broken by recent changes, so I'll need to reimplement it anyway	2022-10-14 21:03:10 +03:00
emufan4568	5f8a884c2c	renderer_opengl: Add driver class to report info/bugs	2022-10-14 21:03:10 +03:00
emufan4568	a7cfe99ca1	rasterizer_cache: Add staging buffer cache for uploads/downloads * In addition bump context version to 4.4 to enforce ARB_buffer_storage and use EXT_buffer_storage for GLES which is support on many mobile devices	2022-10-14 21:03:10 +03:00
emufan4568	424ed2df04	rasterizer_cache: Improve TextureRuntime API * This makes every operation more explicit and mimics more the Vulkan API	2022-10-14 21:03:09 +03:00
emufan4568	e22e641736	frame_dumper: Switch to std::jthread	2022-10-14 21:02:45 +03:00
emufan4568	c080ed35c2	renderer_opengl: Encapsulate sync objects in OGLSync	2022-10-14 21:02:45 +03:00
emufan4568	6dacd66f40	code: Use std::numbers::pi	2022-10-14 21:02:45 +03:00
Kyle Kienapfel	25a6da50ef	code: dodge PAGE_SIZE #define Some header files, specifically for OSX and Musl libc define PAGE_SIZE to be a number This is great except in citra we're using PAGE_SIZE as a variable Specific example `static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;` PAGE_SIZE PAGE_BITS PAGE_MASK are all similar variables. Simply deleted the underscores, and then added CITRA_ prefix	2022-10-14 21:02:45 +03:00
emufan4568	38a5cc634f	core: memory: Refactor block functions and general cleanup * Also drop usage of std::vector in CopyBlock in favour of a plain std::array. Now all block functions use the common WalkBlockImpl the implementation of which is very similar to yuzu	2022-10-14 21:02:45 +03:00
emufan4568	f26d00fbb4	morton_swizzle: Optimize and use std::span	2022-10-14 21:02:45 +03:00
emufan4568	b03c3b0d7d	morton_swizzle: Avoid buffer underflow * Check the y coordinate before decrementing linear_buffer	2022-10-14 21:02:45 +03:00
emufan4568	1f450d6d1d	morton_swizzle: Move out of bounds texture check out of the decode loop * Running relative expensive checks like this on a hot path causes small but measurable performance loss. Tested SMD wit this and it doesn't crash	2022-10-14 21:02:45 +03:00
emufan4568	e464507b7a	rasterizer_cache: Use SurfaceType instead of Aspect * It was doing pointless enum conversions when both enums described the same thing	2022-10-14 21:02:45 +03:00
emufan4568	7158952ae7	rasterizer_cache: Separate texture swizzling to utils	2022-10-14 21:02:45 +03:00
emufan4568	841dee8ed8	rasterizer_cache: Remove OpenGL references from morton_swizzle	2022-10-14 21:02:45 +03:00
emufan4568	01e53fe9d2	rasterizer_cache: microprofile: Rename OpenGL to RasterizerCache	2022-10-14 21:02:45 +03:00
emufan4568	9762e24696	citra_qt: Forbid renderer change during runtime * It's an endless source of problems and isn't usefull	2022-10-14 21:02:45 +03:00
emufan4568	937c7e67a4	rasterizer_cache: Touch up MatchFlags comments	2022-10-14 21:02:45 +03:00
emufan4568	1d4f8db60d	rasterizer_cache: Drop OpenGL postfix	2022-10-14 21:02:45 +03:00
emufan4568	bc0c9f6eb7	rasterizer_cache: Shorten filenames and general cleanup * AllocateSurfaceTexture now takes the PixelFormat directly as FormatTuple is an OpenGL struct and will be moved there	2022-10-14 21:02:42 +03:00
emufan4568	667d978480	video_core: Move UpdatePagesCachedCount to RasterizerAccelerated	2022-10-14 21:01:19 +03:00
emufan4568	389d1862bb	citra_qt: Prepare GUI for Vulkan support	2022-10-14 21:01:19 +03:00