video_core: Rewrite to backend system

* Still doesn't build this is just a massive code dump from all the recent progress
2022-08-08 00:00:52 +03:00
parent ff9b0dfe2f
commit 810df95b81
81 changed files with 6111 additions and 2832 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -157,6 +157,7 @@ set(REQUIRED_LIBRARIES
    inih
    lodepng
    glslang
+    robin-hood-hashing
    zstd
 )

@ -297,6 +298,7 @@ set(REQUIRED_PACKAGES
    zstd
    unofficial-enet
    lodepng
+    robin_hood
 )

 foreach(PACKAGE ${REQUIRED_PACKAGES})
--- a/CMakeModules/VcpkgCmakeUtils.cmake
+++ b/CMakeModules/VcpkgCmakeUtils.cmake
@ -9,7 +9,7 @@ endif()
 # Configure vcpkg
 set(VCPKG_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/externals/vcpkg")
 if (WIN32)
-    execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat")
+    #execute_process(COMMAND cmd /C "${VCPKG_DIRECTORY}/bootstrap-vcpkg.bat")
    set(VCPKG_EXECUTABLE "${VCPKG_DIRECTORY}/vcpkg.exe")
 else()
    execute_process(COMMAND bash "${VCPKG_DIRECTORY}/bootstrap-vcpkg.sh")
--- a/src/audio_core/lle/lle.cpp
+++ b/src/audio_core/lle/lle.cpp
@ -450,7 +450,7 @@ void DspLle::SetServiceToInterrupt(std::weak_ptr<Service::DSP::DSP_DSP> dsp) {
                return;
            if (pipe == 0) {
                // pipe 0 is for debug. 3DS automatically drains this pipe and discards the data
-                impl->ReadPipe(pipe, impl->GetPipeReadableSize(pipe));
+                impl->ReadPipe(static_cast<u8>(pipe), impl->GetPipeReadableSize(pipe));
            } else {
                std::lock_guard lock(HLE::g_hle_lock);
                if (auto locked = dsp.lock()) {
--- a/src/citra_qt/configuration/config.cpp
+++ b/src/citra_qt/configuration/config.cpp
@ -944,16 +944,14 @@ void Config::SaveMultiplayerValues() {
    // Write ban list
    qt_config->beginWriteArray(QStringLiteral("username_ban_list"));
    for (std::size_t i = 0; i < UISettings::values.ban_list.first.size(); ++i) {
-        int index = static_cast<int>(i);
-        qt_config->setArrayIndex(index);
+        qt_config->setArrayIndex(static_cast<int>(i));
        WriteSetting(QStringLiteral("username"),
                     QString::fromStdString(UISettings::values.ban_list.first[i]));
    }
    qt_config->endArray();
    qt_config->beginWriteArray(QStringLiteral("ip_ban_list"));
    for (std::size_t i = 0; i < UISettings::values.ban_list.second.size(); ++i) {
-        int index = static_cast<int>(i);
-        qt_config->setArrayIndex(index);
+        qt_config->setArrayIndex(static_cast<int>(i));
        WriteSetting(QStringLiteral("ip"),
                     QString::fromStdString(UISettings::values.ban_list.second[i]));
    }
--- a/src/citra_qt/configuration/configure_camera.cpp
+++ b/src/citra_qt/configuration/configure_camera.cpp
@ -256,8 +256,7 @@ void ConfigureCamera::SetConfiguration() {
    int index = GetSelectedCameraIndex();
    for (std::size_t i = 0; i < Implementations.size(); i++) {
        if (Implementations[i] == camera_name[index]) {
-            int current_index = static_cast<int>(i);
-            ui->image_source->setCurrentIndex(current_index);
+            ui->image_source->setCurrentIndex(static_cast<int>(i));
        }
    }
    if (camera_name[index] == "image") {
--- a/src/citra_qt/debugger/ipc/recorder.cpp
+++ b/src/citra_qt/debugger/ipc/recorder.cpp
@ -76,7 +76,7 @@ void IPCRecorderWidget::OnEntryUpdated(IPCDebugger::RequestRecord record) {
    QTreeWidgetItem entry{
        {QString::number(record.id), GetStatusStr(record), service, GetFunctionName(record)}};

-    const int row_id = record.id - id_offset;
+    const std::size_t row_id = record.id - id_offset;
    if (ui->main->invisibleRootItem()->childCount() > row_id) {
        records[row_id] = record;
        (*ui->main->invisibleRootItem()->child(row_id)) = entry;
--- a/src/citra_qt/debugger/ipc/recorder.h
+++ b/src/citra_qt/debugger/ipc/recorder.h
@ -45,7 +45,7 @@ private:
    // The offset between record id and row id, assuming record ids are assigned
    // continuously and only the 'Clear' action can be performed, this is enough.
    // The initial value is 1, which means record 1 = row 0.
-    int id_offset = 1;
+    std::size_t id_offset = 1;
    std::vector<IPCDebugger::RequestRecord> records;
 };

--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@ -60,6 +60,7 @@ add_library(common STATIC
    detached_tasks.cpp
    detached_tasks.h
    bit_field.h
+    bit_field_array.h
    bit_set.h
    cityhash.cpp
    cityhash.h
@ -72,6 +73,7 @@ add_library(common STATIC
    file_util.h
    flag.h
    hash.h
+    intrusive_ptr.h
    linear_disk_cache.h
    logging/backend.cpp
    logging/backend.h
@ -87,6 +89,8 @@ add_library(common STATIC
    microprofile.h
    microprofileui.h
    misc.cpp
+    object_pool.cpp
+    object_pool.h
    param_package.cpp
    param_package.h
    quaternion.h
--- a/src/common/bit_field.h
+++ b/src/common/bit_field.h
@ -36,6 +36,18 @@
 #include "common/common_funcs.h"
 #include "common/swap.h"

+// User defined types to need to specialize this
+template <typename T>
+struct MakeUnsigned {
+    using type = std::make_unsigned_t<T>;
+};
+
+// Ensure that user defined types are sane
+template <class T>
+concept ValidType = requires(T t) {
+    static_cast<typename MakeUnsigned<T>::type>(t);
+};
+
 /*
 * Abstract bitfield class
 *
@ -110,6 +122,7 @@
 */
 #pragma pack(1)
 template <std::size_t Position, std::size_t Bits, typename T, typename EndianTag = LETag>
+    requires ValidType<T>
 struct BitField {
 private:
    // UnderlyingType is T for non-enum types and the underlying type of T if
@ -120,7 +133,7 @@ private:
                                                       std::enable_if<true, T>>::type;

    // We store the value as the unsigned type to avoid undefined behaviour on value shifting
-    using StorageType = std::make_unsigned_t<UnderlyingType>;
+    using StorageType = typename MakeUnsigned<UnderlyingType>::type;

    using StorageTypeWithEndian = typename AddEndian<StorageType, EndianTag>::type;

@ -199,3 +212,38 @@ private:

 template <std::size_t Position, std::size_t Bits, typename T>
 using BitFieldBE = BitField<Position, Bits, T, BETag>;
+
+/**
+ * Abstract bit flag class. This is basically a specialization  of BitField for single-bit fields.
+ * Instead of being cast to the underlying type, it acts like a boolean.
+ */
+#pragma pack(1)
+template <std::size_t Position, typename T, typename EndianTag = LETag>
+struct BitFlag : protected BitField<Position, 1, T, EndianTag> {
+private:
+    BitFlag(T val) = delete;
+
+    using ParentType = BitField<Position, 1, T>;
+
+public:
+    BitFlag() = default;
+    BitFlag& operator=(const BitFlag&) = delete;
+
+    constexpr BitFlag& operator=(bool val) {
+        Assign(val);
+        return *this;
+    }
+
+    constexpr void Assign(bool value) {
+        ParentType::Assign(value);
+    }
+
+    [[nodiscard]] constexpr operator bool() const {
+        return Value();
+    }
+
+    [[nodiscard]] constexpr bool Value() const {
+        return ParentType::Value() != 0;
+    }
+};
+#pragma pack()
--- a/src/common/bit_field_array.h
+++ b/src/common/bit_field_array.h
@ -0,0 +1,287 @@
+#pragma once
+
+#include <cstddef>
+#include <limits>
+#include <type_traits>
+#include "common/swap.h"
+
+// Language limitations require the following to make these formattable
+// (formatter<BitFieldArray<position, bits, size, T>::Ref> is not legal)
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayConstRef;
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayRef;
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayConstIterator;
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayIterator;
+
+#pragma pack(1)
+template <std::size_t position, std::size_t bits, std::size_t size, typename T,
+          // StorageType is T for non-enum types and the underlying type of T if
+          // T is an enumeration. Note that T is wrapped within an enable_if in the
+          // former case to workaround compile errors which arise when using
+          // std::underlying_type<T>::type directly.
+          typename StorageType = typename std::conditional_t<
+              std::is_enum<T>::value, std::underlying_type<T>, std::enable_if<true, T>>::type>
+struct BitFieldArray
+{
+  using Ref = BitFieldArrayRef<position, bits, size, T, StorageType>;
+  using ConstRef = BitFieldArrayConstRef<position, bits, size, T, StorageType>;
+  using Iterator = BitFieldArrayIterator<position, bits, size, T, StorageType>;
+  using ConstIterator = BitFieldArrayConstIterator<position, bits, size, T, StorageType>;
+
+private:
+  // This constructor might be considered ambiguous:
+  // Would it initialize the storage or just the bitfield?
+  // Hence, delete it. Use the assignment operator to set bitfield values!
+  BitFieldArray(T val) = delete;
+
+public:
+  // Force default constructor to be created
+  // so that we can use this within unions
+  constexpr BitFieldArray() = default;
+
+  // Initializer list constructor
+  constexpr BitFieldArray(std::initializer_list<T> items) : storage(StorageType{}) {
+      u32 index = 0;
+      for (auto& item : items) {
+        SetValue(index++, item);
+    }
+  }
+
+  // We explicitly delete the copy assignment operator here, because the
+  // default copy assignment would copy the full storage value, rather than
+  // just the bits relevant to this particular bit field.
+  // Ideally, we would just implement the copy assignment to copy only the
+  // relevant bits, but we're prevented from doing that because the savestate
+  // code expects that this class is trivially copyable.
+  BitFieldArray& operator=(const BitFieldArray&) = delete;
+
+public:
+  constexpr bool IsSigned() const { return std::is_signed<T>(); }
+  constexpr std::size_t StartBit() const { return position; }
+  constexpr std::size_t NumBits() const { return bits; }
+  constexpr std::size_t Size() const { return size; }
+  constexpr std::size_t TotalNumBits() const { return bits * size; }
+
+  constexpr T Value(size_t index) const { return Value(std::is_signed<T>(), index); }
+  constexpr void SetValue(size_t index, T value) {
+    const size_t pos = position + bits * index;
+    storage = (storage & ~GetElementMask(index)) |
+              ((static_cast<StorageType>(value) << pos) & GetElementMask(index));
+  }
+  Ref operator[](size_t index) { return Ref(this, index); }
+  constexpr const ConstRef operator[](size_t index) const { return ConstRef(this, index); }
+
+  constexpr Iterator begin() { return Iterator(this, 0); }
+  constexpr Iterator end() { return Iterator(this, size); }
+  constexpr ConstIterator begin() const { return ConstIterator(this, 0); }
+  constexpr ConstIterator end() const { return ConstIterator(this, size); }
+  constexpr ConstIterator cbegin() const { return begin(); }
+  constexpr ConstIterator cend() const { return end(); }
+
+private:
+  // Unsigned version of StorageType
+  using StorageTypeU = std::make_unsigned_t<StorageType>;
+
+  constexpr T Value(std::true_type, size_t index) const
+  {
+    const size_t pos = position + bits * index;
+    const size_t shift_amount = 8 * sizeof(StorageType) - bits;
+    return static_cast<T>((storage << (shift_amount - pos)) >> shift_amount);
+  }
+
+  constexpr T Value(std::false_type, size_t index) const
+  {
+    const size_t pos = position + bits * index;
+    return static_cast<T>((storage & GetElementMask(index)) >> pos);
+  }
+
+  static constexpr StorageType GetElementMask(size_t index)
+  {
+    const size_t pos = position + bits * index;
+    return (std::numeric_limits<StorageTypeU>::max() >> (8 * sizeof(StorageType) - bits)) << pos;
+  }
+
+  StorageType storage;
+
+  static_assert(bits * size + position <= 8 * sizeof(StorageType), "Bitfield array out of range");
+  static_assert(sizeof(T) <= sizeof(StorageType), "T must fit in StorageType");
+
+  // And, you know, just in case people specify something stupid like bits=position=0x80000000
+  static_assert(position < 8 * sizeof(StorageType), "Invalid position");
+  static_assert(bits <= 8 * sizeof(T), "Invalid number of bits");
+  static_assert(bits > 0, "Invalid number of bits");
+  static_assert(size <= 8 * sizeof(StorageType), "Invalid size");
+  static_assert(size > 0, "Invalid size");
+};
+#pragma pack()
+
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayConstRef
+{
+  friend struct BitFieldArray<position, bits, size, T, S>;
+  friend class BitFieldArrayConstIterator<position, bits, size, T, S>;
+
+public:
+  constexpr T Value() const { return m_array->Value(m_index); };
+  constexpr operator T() const { return Value(); }
+
+private:
+  constexpr BitFieldArrayConstRef(const BitFieldArray<position, bits, size, T, S>* array,
+                                  size_t index)
+      : m_array(array), m_index(index)
+  {
+  }
+
+  const BitFieldArray<position, bits, size, T, S>* const m_array;
+  const size_t m_index;
+};
+
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayRef
+{
+  friend struct BitFieldArray<position, bits, size, T, S>;
+  friend class BitFieldArrayIterator<position, bits, size, T, S>;
+
+public:
+  constexpr T Value() const { return m_array->Value(m_index); };
+  constexpr operator T() const { return Value(); }
+  T operator=(const BitFieldArrayRef<position, bits, size, T, S>& value) const
+  {
+    m_array->SetValue(m_index, value);
+    return value;
+  }
+  T operator=(T value) const
+  {
+    m_array->SetValue(m_index, value);
+    return value;
+  }
+
+private:
+  constexpr BitFieldArrayRef(BitFieldArray<position, bits, size, T, S>* array, size_t index)
+      : m_array(array), m_index(index)
+  {
+  }
+
+  BitFieldArray<position, bits, size, T, S>* const m_array;
+  const size_t m_index;
+};
+
+// Satisfies LegacyOutputIterator / std::output_iterator.
+// Does not satisfy LegacyInputIterator / std::input_iterator as std::output_iterator_tag does not
+// extend std::input_iterator_tag.
+// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real
+// references instead of proxy objects.
+// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join.
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayIterator
+{
+  friend struct BitFieldArray<position, bits, size, T, S>;
+
+public:
+  using iterator_category = std::output_iterator_tag;
+  using value_type = T;
+  using difference_type = ptrdiff_t;
+  using pointer = void;
+  using reference = BitFieldArrayRef<position, bits, size, T, S>;
+
+private:
+  constexpr BitFieldArrayIterator(BitFieldArray<position, bits, size, T, S>* array, size_t index)
+      : m_array(array), m_index(index)
+  {
+  }
+
+public:
+  // Required by std::input_or_output_iterator
+  constexpr BitFieldArrayIterator() = default;
+  // Required by LegacyIterator
+  constexpr BitFieldArrayIterator(const BitFieldArrayIterator& other) = default;
+  // Required by LegacyIterator
+  BitFieldArrayIterator& operator=(const BitFieldArrayIterator& other) = default;
+  // Move constructor and assignment operators, explicitly defined for completeness
+  constexpr BitFieldArrayIterator(BitFieldArrayIterator&& other) = default;
+  BitFieldArrayIterator& operator=(BitFieldArrayIterator&& other) = default;
+
+public:
+  BitFieldArrayIterator& operator++()
+  {
+    m_index++;
+    return *this;
+  }
+  BitFieldArrayIterator operator++(int)
+  {
+    BitFieldArrayIterator other(*this);
+    ++*this;
+    return other;
+  }
+  constexpr reference operator*() const { return reference(m_array, m_index); }
+  constexpr bool operator==(BitFieldArrayIterator other) const { return m_index == other.m_index; }
+  constexpr bool operator!=(BitFieldArrayIterator other) const { return m_index != other.m_index; }
+
+private:
+  BitFieldArray<position, bits, size, T, S>* m_array;
+  size_t m_index;
+};
+
+// Satisfies LegacyInputIterator / std::input_iterator.
+// Does not satisfy LegacyForwardIterator / std::forward_iterator, as that requires use of real
+// references instead of proxy objects.
+// This iterator allows use of BitFieldArray in range-based for loops, and with fmt::join.
+template <std::size_t position, std::size_t bits, std::size_t size, typename T, typename S>
+class BitFieldArrayConstIterator
+{
+  friend struct BitFieldArray<position, bits, size, T, S>;
+
+public:
+  using iterator_category = std::input_iterator_tag;
+  using value_type = T;
+  using difference_type = ptrdiff_t;
+  using pointer = void;
+  using reference = BitFieldArrayConstRef<position, bits, size, T, S>;
+
+private:
+  constexpr BitFieldArrayConstIterator(const BitFieldArray<position, bits, size, T, S>* array,
+                                       size_t index)
+      : m_array(array), m_index(index)
+  {
+  }
+
+public:
+  // Required by std::input_or_output_iterator
+  constexpr BitFieldArrayConstIterator() = default;
+  // Required by LegacyIterator
+  constexpr BitFieldArrayConstIterator(const BitFieldArrayConstIterator& other) = default;
+  // Required by LegacyIterator
+  BitFieldArrayConstIterator& operator=(const BitFieldArrayConstIterator& other) = default;
+  // Move constructor and assignment operators, explicitly defined for completeness
+  constexpr BitFieldArrayConstIterator(BitFieldArrayConstIterator&& other) = default;
+  BitFieldArrayConstIterator& operator=(BitFieldArrayConstIterator&& other) = default;
+
+public:
+  BitFieldArrayConstIterator& operator++()
+  {
+    m_index++;
+    return *this;
+  }
+  BitFieldArrayConstIterator operator++(int)
+  {
+    BitFieldArrayConstIterator other(*this);
+    ++*this;
+    return other;
+  }
+  constexpr reference operator*() const { return reference(m_array, m_index); }
+  constexpr bool operator==(BitFieldArrayConstIterator other) const
+  {
+    return m_index == other.m_index;
+  }
+  constexpr bool operator!=(BitFieldArrayConstIterator other) const
+  {
+    return m_index != other.m_index;
+  }
+
+private:
+  const BitFieldArray<position, bits, size, T, S>* m_array;
+  size_t m_index;
+};
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@ -15,11 +15,19 @@
 #define CONCAT2(x, y) DO_CONCAT2(x, y)
 #define DO_CONCAT2(x, y) x##y

-// helper macro to properly align structure members.
-// Calling INSERT_PADDING_BYTES will add a new member variable with a name like "pad121",
-// depending on the current source line to make sure variable names are unique.
-#define INSERT_PADDING_BYTES(num_bytes) u8 CONCAT2(pad, __LINE__)[(num_bytes)]
-#define INSERT_PADDING_WORDS(num_words) u32 CONCAT2(pad, __LINE__)[(num_words)]
+/// Helper macros to insert unused bytes or words to properly align structs. These values will be
+/// zero-initialized.
+#define INSERT_PADDING_BYTES(num_bytes)                                                            \
+    [[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__) {}
+#define INSERT_PADDING_WORDS(num_words)                                                            \
+    [[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__) {}
+
+/// These are similar to the INSERT_PADDING_* macros but do not zero-initialize the contents.
+/// This keeps the structure trivial to construct.
+#define INSERT_PADDING_BYTES_NOINIT(num_bytes)                                                     \
+    [[maybe_unused]] std::array<u8, num_bytes> CONCAT2(pad, __LINE__)
+#define INSERT_PADDING_WORDS_NOINIT(num_words)                                                     \
+    [[maybe_unused]] std::array<u32, num_words> CONCAT2(pad, __LINE__)

 // Inlining
 #ifdef _WIN32
--- a/src/common/hash.h
+++ b/src/common/hash.h
@ -11,6 +11,15 @@

 namespace Common {

+/**
+ * Disables rehashing for std::unordered_map
+ */
+struct IdentityHash {
+    u64 operator()(const u64 hash) const {
+        return hash;
+    }
+};
+
 /**
 * Computes a 64-bit hash over the specified block of data
 * @param data Block of data to compute hash over
@ -33,6 +42,14 @@ static inline u64 ComputeStructHash64(const T& data) noexcept {
    return ComputeHash64(&data, sizeof(data));
 }

+/**
+ * Combines hash lhs with hash rhs providing a unique result.
+ */
+static inline std::size_t HashCombine(std::size_t lhs, std::size_t rhs) noexcept {
+  lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2);
+  return lhs;
+}
+
 /// A helper template that ensures the padding in a struct is initialized by memsetting to 0.
 template <typename T>
 struct HashableStruct {
--- a/src/common/intrusive_ptr.h
+++ b/src/common/intrusive_ptr.h
@ -0,0 +1,261 @@
+/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <utility>
+#include <memory>
+#include <atomic>
+#include <type_traits>
+
+/// Simple reference counter for single threaded environments
+class SingleThreadCounter {
+public:
+    inline void AddRef() {
+        count++;
+    }
+
+    inline bool Release() {
+        return --count == 0;
+    }
+
+private:
+    std::size_t count = 1;
+};
+
+/// Thread-safe reference counter with atomics
+class MultiThreadCounter {
+public:
+    MultiThreadCounter() {
+        count.store(1, std::memory_order_relaxed);
+    }
+
+    inline void AddRef() {
+        count.fetch_add(1, std::memory_order_relaxed);
+    }
+
+    inline bool Release() {
+        auto result = count.fetch_sub(1, std::memory_order_acq_rel);
+        return result == 1;
+    }
+
+private:
+    std::atomic_size_t count;
+};
+
+template <typename T>
+class IntrusivePtr;
+
+template <typename T, typename Deleter = std::default_delete<T>,
+          typename ReferenceOps = SingleThreadCounter>
+class IntrusivePtrEnabled {
+public:
+    using IntrusivePtrType = IntrusivePtr<T>;
+    using EnabledBase = T;
+    using EnabledDeleter = Deleter;
+    using EnabledReferenceOp = ReferenceOps;
+
+    IntrusivePtrEnabled() = default;
+    IntrusivePtrEnabled(const IntrusivePtrEnabled &) = delete;
+    void operator=(const IntrusivePtrEnabled &) = delete;
+
+    /// Decrement the reference counter and optionally free the memory
+    inline void ReleaseRef() {
+        if (ref_counter.Release()) {
+            Deleter()(static_cast<T*>(this));
+        }
+    }
+
+    /// Increment the reference counter
+    inline void AddRef() {
+        ref_counter.AddRef();
+    }
+
+protected:
+    IntrusivePtr<T> RefFromThis();
+
+private:
+    ReferenceOps ref_counter;
+};
+
+/**
+ * Lightweight alternative to std::shared_ptr for reference counting
+ * usecases
+ */
+template <typename T>
+class IntrusivePtr {
+    using ReferenceBase = IntrusivePtrEnabled<
+            typename T::EnabledBase,
+            typename T::EnabledDeleter,
+            typename T::EnabledReferenceOp>;
+
+    template <typename U>
+    friend class IntrusivePtr;
+public:
+    IntrusivePtr() = default;
+    explicit IntrusivePtr(T *handle) : data(handle) {}
+
+    template <typename U>
+    IntrusivePtr(const IntrusivePtr<U> &other) {
+        *this = other;
+    }
+
+    IntrusivePtr(const IntrusivePtr &other) {
+        *this = other;
+    }
+
+    template <typename U>
+    IntrusivePtr(IntrusivePtr<U> &&other) noexcept {
+        *this = std::move(other);
+    }
+
+    IntrusivePtr(IntrusivePtr &&other) noexcept {
+        *this = std::move(other);
+    }
+
+    ~IntrusivePtr() {
+        Reset();
+    }
+
+    /// Returns a reference to the underlying data
+    T& operator*() {
+        return *data;
+    }
+
+    /// Returns an immutable reference to the underlying data
+    const T& operator*() const {
+        return *data;
+    }
+
+    /// Returns a pointer to the underlying data
+    T* operator->() {
+        return data;
+    }
+
+    /// Returns an immutable pointer to the underlying data
+    const T* operator->() const {
+        return data;
+    }
+
+    /// Returns true if the underlaying pointer it valid
+    bool IsValid() const {
+        return data != nullptr;
+    }
+
+    /// Default comparison operators
+    auto operator<=>(const IntrusivePtr& other) const = default;
+
+    /// Returns the raw pointer to the data
+    T* Get() {
+        return data;
+    }
+
+    /// Returns an immutable raw pointer to the data
+    const T* Get() const {
+        return data;
+    }
+
+    void Reset() {
+        // Static up-cast here to avoid potential issues with multiple intrusive inheritance.
+        // Also makes sure that the pointer type actually inherits from this type.
+        if (data)
+            static_cast<ReferenceBase*>(data)->ReleaseRef();
+        data = nullptr;
+    }
+
+    template <typename U>
+    IntrusivePtr& operator=(const IntrusivePtr<U>& other) {
+        static_assert(std::is_base_of_v<T, U>, "Cannot safely assign downcasted intrusive pointers.");
+
+        Reset();
+        data = static_cast<T*>(other.data);
+
+        // Static up-cast here to avoid potential issues with multiple intrusive inheritance.
+        // Also makes sure that the pointer type actually inherits from this type.
+        if (data) {
+            static_cast<ReferenceBase*>(data)->ReleaseRef();
+        }
+
+        return *this;
+    }
+
+    IntrusivePtr& operator=(const IntrusivePtr& other) {
+        if (this != &other) {
+            Reset();
+            data = other.data;
+            if (data)
+                static_cast<ReferenceBase*>(data)->AddRef();
+        }
+
+        return *this;
+    }
+
+    template <typename U>
+    IntrusivePtr &operator=(IntrusivePtr<U> &&other) noexcept {
+        Reset();
+        data = std::exchange(other.data, nullptr);
+        return *this;
+    }
+
+    IntrusivePtr &operator=(IntrusivePtr &&other) noexcept {
+        if (this != &other) {
+            Reset();
+            data = std::exchange(other.data, nullptr);
+        }
+
+        return *this;
+    }
+
+    T* Release() & {
+        return std::exchange(data, nullptr);
+    }
+
+    T* Release() && {
+        return std::exchange(data, nullptr);
+    }
+
+private:
+    T* data = nullptr;
+};
+
+template <typename T, typename Deleter, typename ReferenceOps>
+IntrusivePtr<T> IntrusivePtrEnabled<T, Deleter, ReferenceOps>::RefFromThis() {
+    AddRef();
+    return IntrusivePtr<T>(static_cast<T*>(this));
+}
+
+template <typename Derived>
+using DerivedIntrusivePtrType = IntrusivePtr<Derived>;
+
+template <typename T, typename... P>
+DerivedIntrusivePtrType<T> MakeHandle(P &&... p) {
+    return DerivedIntrusivePtrType<T>(new T(std::forward<P>(p)...));
+}
+
+template <typename Base, typename Derived, typename... P>
+typename Base::IntrusivePtrType MakeDerivedHandle(P &&... p) {
+    return typename Base::IntrusivePtrType(new Derived(std::forward<P>(p)...));
+}
+
+template <typename T>
+using ThreadSafeIntrusivePtrEnabled = IntrusivePtrEnabled<T, std::default_delete<T>, MultiThreadCounter>;
--- a/src/common/object_pool.cpp
+++ b/src/common/object_pool.cpp
@ -0,0 +1,70 @@
+/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "common/object_pool.h"
+#include <cstdlib>
+#include <cstring>
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+
+void* memalign_alloc(size_t boundary, size_t size) {
+#if defined(_WIN32)
+    return _aligned_malloc(size, boundary);
+#elif defined(_ISOC11_SOURCE)
+    return aligned_alloc(boundary, size);
+#elif (_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600)
+    void *ptr = nullptr;
+    if (posix_memalign(&ptr, boundary, size) < 0) {
+        return nullptr;
+    }
+    return ptr;
+#else
+    // Align stuff ourselves. Kinda ugly, but will work anywhere.
+    void **place;
+    uintptr_t addr = 0;
+    void *ptr = malloc(boundary + size + sizeof(uintptr_t));
+
+    if (ptr == nullptr) {
+        return nullptr;
+    }
+
+    addr = ((uintptr_t)ptr + sizeof(uintptr_t) + boundary) & ~(boundary - 1);
+    place = (void **) addr;
+    place[-1] = ptr;
+
+    return (void *) addr;
+#endif
+}
+
+void memalign_free(void *ptr) {
+#if defined(_WIN32)
+    _aligned_free(ptr);
+#elif !defined(_ISOC11_SOURCE) && !((_POSIX_C_SOURCE >= 200112L) || (_XOPEN_SOURCE >= 600))
+    if (ptr != nullptr) {
+        void **p = (void **) ptr;
+        free(p[-1]);
+    }
+#else
+    free(ptr);
+#endif
+}
--- a/src/common/object_pool.h
+++ b/src/common/object_pool.h
@ -0,0 +1,148 @@
+/* Copyright (c) 2017-2022 Hans-Kristian Arntzen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <vector>
+#include <algorithm>
+#include <cstdlib>
+
+void *memalign_alloc(size_t boundary, size_t size);
+void memalign_free(void *ptr);
+
+template <typename T>
+struct AlignedAllocation {
+    static void* operator new(size_t size) {
+        void* ret = memalign_alloc(alignof(T), size);
+        if (!ret) throw std::bad_alloc();
+        return ret;
+    }
+
+    static void* operator new[](size_t size) {
+        void* ret = memalign_alloc(alignof(T), size);
+        if (!ret) throw std::bad_alloc();
+        return ret;
+    }
+
+    static void operator delete(void *ptr) {
+        return memalign_free(ptr);
+    }
+
+    static void operator delete[](void *ptr) {
+        return memalign_free(ptr);
+    }
+};
+
+/**
+ * Allocates objects of type T in batches of 64 * n where
+ * n is the number of times the pool has grown. So the first
+ * time it will allocate 64, then 128 objects etc.
+ */
+template<typename T>
+class ObjectPool {
+public:
+    template<typename... P>
+    T* Allocate(P&&... p) {
+#ifndef OBJECT_POOL_DEBUG
+        if (vacants.empty()) {
+            unsigned num_objects = 64u << memory.size();
+            T *ptr = static_cast<T*>(memalign_alloc(std::max(64, alignof(T)),
+                                                    num_objects * sizeof(T)));
+            if (!ptr) {
+                return nullptr;
+            }
+
+            for (unsigned i = 0; i < num_objects; i++) {
+                vacants.push_back(&ptr[i]);
+            }
+
+            memory.emplace_back(ptr);
+        }
+
+        T *ptr = vacants.back();
+        vacants.pop_back();
+        new(ptr) T(std::forward<P>(p)...);
+        return ptr;
+#else
+        return new T(std::forward<P>(p)...);
+#endif
+    }
+
+    void Free(T *ptr) {
+#ifndef OBJECT_POOL_DEBUG
+        ptr->~T();
+        vacants.push_back(ptr);
+#else
+        delete ptr;
+#endif
+    }
+
+    void Clear() {
+#ifndef OBJECT_POOL_DEBUG
+        vacants.clear();
+        memory.clear();
+#endif
+    }
+
+protected:
+#ifndef OBJECT_POOL_DEBUG
+    std::vector<T*> vacants;
+
+    struct MallocDeleter {
+        void operator()(T *ptr) {
+            memalign_free(ptr);
+        }
+    };
+
+    std::vector<std::unique_ptr<T, MallocDeleter>> memory;
+#endif
+};
+
+template<typename T>
+class ThreadSafeObjectPool : private ObjectPool<T> {
+public:
+    template<typename... P>
+    T* Allocate(P &&... p) {
+        std::lock_guard<std::mutex> holder{lock};
+        return ObjectPool<T>::Allocate(std::forward<P>(p)...);
+    }
+
+    void Free(T *ptr) {
+#ifndef OBJECT_POOL_DEBUG
+        ptr->~T();
+        std::lock_guard<std::mutex> holder{lock};
+        this->vacants.push_back(ptr);
+#else
+        delete ptr;
+#endif
+    }
+
+    void Clear() {
+        std::lock_guard<std::mutex> holder{lock};
+        ObjectPool<T>::Clear();
+    }
+
+private:
+    std::mutex lock;
+};
--- a/src/common/x64/xbyak_abi.h
+++ b/src/common/x64/xbyak_abi.h
@ -158,10 +158,10 @@ struct ABIFrameInfo {

 inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment,
                                           std::size_t needed_frame_size) {
-    int count = (regs & ABI_ALL_GPRS).count();
+    std::size_t count = (regs & ABI_ALL_GPRS).count();
    rsp_alignment -= count * 8;
    std::size_t subtraction = 0;
-    int xmm_count = (regs & ABI_ALL_XMMS).count();
+    std::size_t xmm_count = (regs & ABI_ALL_XMMS).count();
    if (xmm_count) {
        // If we have any XMMs to save, we must align the stack here.
        subtraction = rsp_alignment & 0xF;
--- a/src/core/settings.h
+++ b/src/core/settings.h
@ -29,11 +29,9 @@ enum class LayoutOption {
    SingleScreen,
    LargeScreen,
    SideScreen,
-
    // Similiar to default, but better for mobile devices in portrait mode. Top screen in clamped to
    // the top of the frame, and the bottom screen is enlarged to match the top screen.
    MobilePortrait,
-
    // Similiar to LargeScreen, but better for mobile devices in landscape mode. The screens are
    // clamped to the top of the frame, and the bottom screen is a bit bigger.
    MobileLandscape,
@ -116,7 +114,6 @@ namespace NativeAnalog {
 enum Values {
    CirclePad,
    CStick,
-
    NumAnalogs,
 };

--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@ -9,12 +9,12 @@ add_library(video_core STATIC
    pica.cpp
    pica.h
    pica_state.h
-    pica_types.h
    primitive_assembly.cpp
    primitive_assembly.h
    rasterizer_interface.h
-    regs.cpp
-    regs.h
+    pica_regs.inc
+    pica.cpp
+    pica.h
    regs_framebuffer.h
    regs_lighting.h
    regs_pipeline.h
@ -23,6 +23,15 @@ add_library(video_core STATIC
    regs_texturing.h
    renderer_base.cpp
    renderer_base.h
+    common/backend.h
+    common/buffer.h
+    common/framebuffer.h
+    common/pica_types.h
+    common/shader_gen.cpp
+    common/shader_gen.h
+    common/shader.h
+    common/texture.h
+    common/pipeline.h
    renderer_opengl/frame_dumper_opengl.cpp
    renderer_opengl/frame_dumper_opengl.h
    renderer_opengl/gl_rasterizer.cpp
@ -73,16 +82,21 @@ add_library(video_core STATIC
    renderer_vulkan/pica_to_vulkan.h
    renderer_vulkan/renderer_vulkan.cpp
    renderer_vulkan/renderer_vulkan.h
+    renderer_vulkan/vk_backend.cpp
+    renderer_vulkan/vk_backend.h
    renderer_vulkan/vk_buffer.cpp
    renderer_vulkan/vk_buffer.h
    renderer_vulkan/vk_common.cpp
    renderer_vulkan/vk_common.h
    renderer_vulkan/vk_format_reinterpreter.cpp
    renderer_vulkan/vk_format_reinterpreter.h
+    renderer_vulkan/vk_format_util.cpp
+    renderer_vulkan/vk_format_util.h
    renderer_vulkan/vk_instance.cpp
    renderer_vulkan/vk_instance.h
-    renderer_vulkan/vk_pipeline_builder.cpp
-    renderer_vulkan/vk_pipeline_builder.h
+    renderer_vulkan/vk_pipeline.cpp
+    renderer_vulkan/vk_pipeline.h
+    renderer_vulkan/vk_platform.h
    renderer_vulkan/vk_rasterizer_cache.cpp
    renderer_vulkan/vk_rasterizer_cache.h
    renderer_vulkan/vk_rasterizer.cpp
@ -90,6 +104,8 @@ add_library(video_core STATIC
    renderer_vulkan/vk_shader_state.h
    renderer_vulkan/vk_shader_gen.cpp
    renderer_vulkan/vk_shader_gen.h
+    renderer_vulkan/vk_shader.cpp
+    renderer_vulkan/vk_shader.h
    renderer_vulkan/vk_state.cpp
    renderer_vulkan/vk_state.h
    renderer_vulkan/vk_surface_params.cpp
@ -180,7 +196,7 @@ target_link_libraries(video_core PRIVATE glad::glad glm::glm nihstro-headers Boo
 # Include Vulkan headers
 target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include)
 target_include_directories(video_core PRIVATE ../../externals/vma/include)
-target_link_libraries(video_core PRIVATE glslang SPIRV glslang-default-resource-limits OGLCompiler)
+target_link_libraries(video_core PRIVATE glslang SPIRV robin_hood::robin_hood)

 if (ARCHITECTURE_x86_64)
    target_link_libraries(video_core PUBLIC xbyak::xbyak)
--- a/src/video_core/common/backend.h
+++ b/src/video_core/common/backend.h
@ -0,0 +1,60 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/object_pool.h"
+#include "common/vector_math.h"
+#include "video_core/common/pipeline.h"
+#include "video_core/common/framebuffer.h"
+
+namespace Frontend {
+class EmuWindow;
+}
+
+namespace VideoCore {
+
+/// Common interface of a video backend
+class BackendBase {
+public:
+    BackendBase(Frontend::EmuWindow& window) : window(window) {}
+    virtual ~BackendBase() = default;
+
+    // Triggers a swapchain buffer swap
+    virtual void SwapBuffers();
+
+    // Creates a backend specific texture handle
+    virtual TextureHandle CreateTexture(TextureInfo info) = 0;
+
+    // Creates a backend specific buffer handle
+    virtual BufferHandle CreateBuffer(BufferInfo info) = 0;
+
+    // Creates a backend specific framebuffer handle
+    virtual FramebufferHandle CreateFramebuffer(FramebufferInfo info) = 0;
+
+    // Creates a backend specific pipeline handle
+    virtual PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) = 0;
+
+    // Creates a backend specific sampler object
+    virtual SamplerHandle CreateSampler(SamplerInfo info) = 0;
+
+    // Start a draw operation
+    virtual void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
+                      BufferHandle vertex_buffer,
+                      u32 base_vertex, u32 num_vertices) = 0;
+
+    // Start an indexed draw operation
+    virtual void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
+                             BufferHandle vertex_buffer, BufferHandle index_buffer,
+                             u32 base_index, u32 num_indices, u32 base_vertex) = 0;
+
+    // Executes a compute shader
+    virtual void DispatchCompute(PipelineHandle pipeline, Common::Vec3<u32> groupsize,
+                                 Common::Vec3<u32> groups) = 0;
+
+private:
+    Frontend::EmuWindow& window;
+};
+
+} // namespace VideoCore
--- a/src/video_core/common/buffer.h
+++ b/src/video_core/common/buffer.h
@ -0,0 +1,102 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+#include "common/hash.h"
+#include "common/intrusive_ptr.h"
+
+namespace VideoCore {
+
+enum class BufferUsage : u8 {
+    Vertex = 0,
+    Index = 1,
+    Uniform = 2,
+    Texel = 3,
+    Staging = 4,
+    Undefined = 255
+};
+
+enum class ViewFormat : u8 {
+    R32Float = 0,
+    R32G32Float = 1,
+    R32G32B32Float = 2,
+    R32G32B32A32Float = 3,
+    Undefined = 255
+};
+
+constexpr u32 MAX_BUFFER_VIEWS = 3;
+
+struct BufferInfo {
+    u32 capacity = 0;
+    BufferUsage usage = BufferUsage::Undefined;
+    std::array<ViewFormat, MAX_BUFFER_VIEWS> views{ViewFormat::Undefined};
+
+    const u64 Hash() const {
+        return Common::ComputeStructHash64(*this);
+    }
+};
+
+static_assert(sizeof(BufferInfo) == 8, "BufferInfo not packed!");
+static_assert(std::is_standard_layout_v<BufferInfo>, "BufferInfo is not a standard layout!");
+
+class BufferBase : public IntrusivePtrEnabled<BufferBase> {
+public:
+    BufferBase() = default;
+    BufferBase(const BufferInfo& info) : info(info) {}
+    virtual ~BufferBase() = default;
+
+    /// Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
+    /// and the optional alignment requirement.
+    /// The actual used size must be specified on unmapping the chunk.
+    virtual std::span<u8> Map(u32 size, u32 alignment = 0) {};
+
+    /// Flushes write to buffer memory
+    virtual void Commit(u32 size = 0) {};
+
+    /// Returns the size of the buffer in bytes
+    u32 GetCapacity() const {
+        return info.capacity;
+    }
+
+    /// Returns the usage of the buffer
+    BufferUsage GetUsage() const {
+        return info.usage;
+    }
+
+    /// Returns the starting offset of the currently mapped buffer slice
+    u64 GetCurrentOffset() const {
+        return buffer_offset;
+    }
+
+    /// Returns whether the buffer was invalidated by the most recent Map call
+    bool IsInvalid() const {
+        return invalid;
+    }
+
+    /// Invalidates the buffer
+    void Invalidate() {
+        buffer_offset = 0;
+        invalid = true;
+    }
+
+protected:
+    BufferInfo info{};
+    u32 buffer_offset = 0;
+    bool invalid = false;
+};
+
+using BufferHandle = IntrusivePtr<BufferBase>;
+
+} // namespace VideoCore
+
+namespace std {
+template <>
+struct hash<VideoCore::BufferInfo> {
+    std::size_t operator()(const VideoCore::BufferInfo& info) const noexcept {
+        return info.Hash();
+    }
+};
+} // namespace std
--- a/src/video_core/common/framebuffer.h
+++ b/src/video_core/common/framebuffer.h
@ -0,0 +1,69 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/common/texture.h"
+
+namespace VideoCore {
+
+enum class MSAASamples : u32 {
+    x1,
+    x2,
+    x4,
+    x8
+};
+
+/**
+ * Information about a framebuffer
+ */
+struct FramebufferInfo {
+    TextureHandle color;
+    TextureHandle depth_stencil;
+    MSAASamples samples = MSAASamples::x1;
+    Rect2D draw_rect{};
+
+    /// Hashes the framebuffer object and returns a unique identifier
+    const u64 Hash() const {
+        // The only member IntrusivePtr has is a pointer to the
+        // handle so it's fine hash it
+        return Common::ComputeStructHash64(*this);
+    }
+};
+
+/**
+ * A framebuffer is a collection of render targets and their configuration
+ */
+class FramebufferBase : public IntrusivePtrEnabled<FramebufferBase> {
+public:
+    FramebufferBase(const FramebufferInfo& info) : info(info) {}
+    virtual ~FramebufferBase() = default;
+
+    /// Returns an immutable reference to the color attachment
+    const TextureHandle& GetColorAttachment() const {
+        return info.color;
+    }
+
+    /// Returns an immutable reference to the depth/stencil attachment
+    const TextureHandle& GetDepthStencilAttachment() const {
+        return info.depth_stencil;
+    }
+
+    /// Returns how many samples the framebuffer takes
+    MSAASamples GetMSAASamples() const {
+        return info.samples;
+    }
+
+    /// Returns the rendering area
+    Rect2D GetDrawRectangle() const {
+        return info.draw_rect;
+    }
+
+protected:
+    FramebufferInfo info;
+};
+
+using FramebufferHandle = IntrusivePtr<FramebufferBase>;
+
+} // namespace VideoCore
--- a/src/video_core/common/pica_types.h
+++ b/src/video_core/common/pica_types.h
@ -0,0 +1,157 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cmath>
+#include <cstring>
+#include <boost/serialization/access.hpp>
+#include "common/common_types.h"
+
+namespace Pica {
+
+/**
+ * Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
+ * floating point.
+ *
+ * When decoding, format is as follows:
+ *  - The first `M` bits are the mantissa
+ *  - The next `E` bits are the exponent
+ *  - The last bit is the sign bit
+ *
+ * @todo Verify on HW if this conversion is sufficiently accurate.
+ */
+template <u32 M, u32 E>
+struct Float {
+    static constexpr u32 width = M + E + 1;
+    static constexpr u32 bias = 128 - (1 << (E - 1));
+    static constexpr u32 exponent_mask = (1 << E) - 1;
+    static constexpr u32 mantissa_mask = (1 << M) - 1;
+    static constexpr u32 sign_mask = 1 << (E + M);
+public:
+    static Float FromFloat32(float val) {
+        Float ret;
+        ret.value = val;
+        return ret;
+    }
+
+    static Float FromRaw(u32 hex) {
+        Float res;
+
+        u32 exponent = (hex >> M) & exponent_mask;
+        const u32 mantissa = hex & mantissa_mask;
+        const u32 sign = (hex & sign_mask) << (31 - M - E);
+
+        if (hex & (mantissa_mask | (exponent_mask << M))) {
+            if (exponent == exponent_mask) {
+                exponent = 255;
+            } else {
+                exponent += bias;
+            }
+
+            hex = sign | (mantissa << (23 - M)) | (exponent << 23);
+        } else {
+            hex = sign;
+        }
+
+        std::memcpy(&res.value, &hex, sizeof(float));
+        return res;
+    }
+
+    static Float Zero() {
+        return FromFloat32(0.f);
+    }
+
+    // Not recommended for anything but logging
+    float ToFloat32() const {
+        return value;
+    }
+
+    Float operator*(const Float& flt) const {
+        float result = value * flt.ToFloat32();
+        // PICA gives 0 instead of NaN when multiplying by inf
+        if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) {
+            result = 0.f;
+        }
+
+        return Float::FromFloat32(result);
+    }
+
+    Float operator/(const Float& flt) const {
+        return Float::FromFloat32(ToFloat32() / flt.ToFloat32());
+    }
+
+    Float operator+(const Float& flt) const {
+        return Float::FromFloat32(ToFloat32() + flt.ToFloat32());
+    }
+
+    Float operator-(const Float& flt) const {
+        return Float::FromFloat32(ToFloat32() - flt.ToFloat32());
+    }
+
+    Float& operator*=(const Float& flt) {
+        value = operator*(flt).value;
+        return *this;
+    }
+
+    Float& operator/=(const Float& flt) {
+        value /= flt.ToFloat32();
+        return *this;
+    }
+
+    Float& operator+=(const Float& flt) {
+        value += flt.ToFloat32();
+        return *this;
+    }
+
+    Float& operator-=(const Float& flt) {
+        value -= flt.ToFloat32();
+        return *this;
+    }
+
+    Float operator-() const {
+        return Float::FromFloat32(-ToFloat32());
+    }
+
+    bool operator<(const Float& flt) const {
+        return ToFloat32() < flt.ToFloat32();
+    }
+
+    bool operator>(const Float& flt) const {
+        return ToFloat32() > flt.ToFloat32();
+    }
+
+    bool operator>=(const Float& flt) const {
+        return ToFloat32() >= flt.ToFloat32();
+    }
+
+    bool operator<=(const Float& flt) const {
+        return ToFloat32() <= flt.ToFloat32();
+    }
+
+    bool operator==(const Float& flt) const {
+        return ToFloat32() == flt.ToFloat32();
+    }
+
+    bool operator!=(const Float& flt) const {
+        return ToFloat32() != flt.ToFloat32();
+    }
+
+private:
+    // Stored as a regular float, merely for convenience
+    // TODO: Perform proper arithmetic on this!
+    float value;
+
+    friend class boost::serialization::access;
+    template <class Archive>
+    void serialize(Archive& ar, const unsigned int file_version) {
+        ar& value;
+    }
+};
+
+using Float24 = Float<16, 7>;
+using Float20 = Float<12, 7>;
+using Float16 = Float<10, 5>;
+
+} // namespace Pica
--- a/src/video_core/common/pipeline.h
+++ b/src/video_core/common/pipeline.h
@ -0,0 +1,223 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/bit_field.h"
+#include "common/bit_field_array.h"
+#include "common/hash.h"
+#include "video_core/common/buffer.h"
+#include "video_core/common/texture.h"
+#include "video_core/common/shader.h"
+#include "video_core/regs_framebuffer.h"
+#include "video_core/regs_rasterizer.h"
+#include "video_core/regs_pipeline.h"
+
+namespace VideoCore {
+
+constexpr u32 MAX_SHADER_STAGES = 3;
+constexpr u32 MAX_VERTEX_ATTRIBUTES = 8;
+constexpr u32 MAX_BINDINGS_IN_GROUP = 7;
+constexpr u32 MAX_BINDING_GROUPS = 6;
+
+enum class PipelineType : u8 {
+    Compute = 0,
+    Graphics = 1
+};
+
+enum class BindingType : u32 {
+    None = 0,
+    Uniform = 1,
+    UniformDynamic = 2,
+    TexelBuffer = 3,
+    Texture = 4,
+    Sampler = 5,
+    StorageImage = 6
+};
+
+using BindingGroup = BitFieldArray<0, 3, MAX_BINDINGS_IN_GROUP, BindingType>;
+
+/**
+ * Describes all the resources used in the pipeline
+ */
+struct PipelineLayoutInfo {
+    u8 group_count = 0;
+    std::array<BindingGroup, MAX_BINDING_GROUPS> binding_groups{};
+    u8 push_constant_block_size = 0;
+};
+
+/**
+ * The pipeline state is tightly packed with bitfields to reduce
+ * the overhead of hashing as much as possible
+ */
+union RasterizationState {
+    u8 value = 0;
+    BitField<0, 2, Pica::TriangleTopology> topology;
+    BitField<4, 2, Pica::CullMode> cull_mode;
+};
+
+union DepthStencilState {
+    u64 value = 0;
+    BitField<0, 1, u64> depth_test_enable;
+    BitField<1, 1, u64> depth_write_enable;
+    BitField<2, 1, u64> stencil_test_enable;
+    BitField<3, 3, Pica::CompareFunc> depth_compare_op;
+    BitField<6, 3, Pica::StencilAction> stencil_fail_op;
+    BitField<9, 3, Pica::StencilAction> stencil_pass_op;
+    BitField<12, 3, Pica::StencilAction> stencil_depth_fail_op;
+    BitField<15, 3, Pica::CompareFunc> stencil_compare_op;
+    BitField<18, 8, u64> stencil_reference;
+    BitField<26, 8, u64> stencil_compare_mask;
+    BitField<34, 8, u64> stencil_write_mask;
+};
+
+union BlendState {
+    u32 value = 0;
+    BitField<0, 4, Pica::BlendFactor> src_color_blend_factor;
+    BitField<4, 4, Pica::BlendFactor> dst_color_blend_factor;
+    BitField<8, 3, Pica::BlendEquation> color_blend_eq;
+    BitField<11, 4, Pica::BlendFactor> src_alpha_blend_factor;
+    BitField<15, 4, Pica::BlendFactor> dst_alpha_blend_factor;
+    BitField<19, 3, Pica::BlendEquation> alpha_blend_eq;
+    BitField<22, 4, u32> color_write_mask;
+};
+
+enum class AttribType : u8 {
+    Float = 0,
+    Int = 1,
+    Short = 2
+};
+
+union VertexAttribute {
+    u8 value = 0;
+    BitField<0, 2, AttribType> type;
+    BitField<2, 3, u8> components;
+};
+
+#pragma pack(1)
+struct VertexLayout {
+    u8 stride = 0;
+    std::array<VertexAttribute, MAX_VERTEX_ATTRIBUTES> attributes;
+};
+#pragma pack()
+
+/**
+ * Information about a graphics/compute pipeline
+ */
+#pragma pack(1)
+struct PipelineInfo {
+    std::array<ShaderHandle, MAX_SHADER_STAGES> shaders{};
+    VertexLayout vertex_layout{};
+    PipelineLayoutInfo layout{};
+    BlendState blending{};
+    DepthStencilState depth_stencil{};
+    RasterizationState rasterization{};
+
+    const u64 Hash() const {
+        return Common::ComputeStructHash64(*this);
+    }
+};
+#pragma pack()
+
+class PipelineBase : public IntrusivePtrEnabled<PipelineBase> {
+public:
+    PipelineBase(PipelineType type, PipelineInfo info) :
+        type(type), info(info) {}
+    virtual ~PipelineBase() = default;
+
+    // Disable copy constructor
+    PipelineBase(const PipelineBase&) = delete;
+    PipelineBase& operator=(const PipelineBase&) = delete;
+
+    // Binds the texture in the specified slot
+    virtual void BindTexture(u32 group, u32 slot, TextureHandle handle) = 0;
+
+    // Binds the texture in the specified slot
+    virtual void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) = 0;
+
+    // Binds the sampler in the specified slot
+    virtual void BindSampler(u32 group, u32 slot, SamplerHandle handle) = 0;
+
+    /// Sets the primitive topology
+    void SetTopology(Pica::TriangleTopology topology) {
+        info.rasterization.topology.Assign(topology);
+    }
+
+    /// Sets the culling mode
+    void SetCullMode(Pica::CullMode mode) {
+        info.rasterization.cull_mode.Assign(mode);
+    }
+
+    /// Configures the color blending function
+    void SetColorBlendFunc(Pica::BlendFactor src_color_factor,
+                           Pica::BlendFactor dst_color_factor,
+                           Pica::BlendEquation color_eq) {
+        info.blending.src_color_blend_factor.Assign(src_color_factor);
+        info.blending.dst_color_blend_factor.Assign(dst_color_factor);
+        info.blending.color_blend_eq.Assign(color_eq);
+    }
+
+    /// Configures the alpha blending function
+    void SetAlphaBlendFunc(Pica::BlendFactor src_alpha_factor,
+                           Pica::BlendFactor dst_alpha_factor,
+                           Pica::BlendEquation alpha_eq) {
+        info.blending.src_alpha_blend_factor.Assign(src_alpha_factor);
+        info.blending.dst_alpha_blend_factor.Assign(dst_alpha_factor);
+        info.blending.alpha_blend_eq.Assign(alpha_eq);
+    }
+
+    /// Sets the color write mask
+    void SetColorWriteMask(u32 mask) {
+        info.blending.color_write_mask.Assign(mask);
+    }
+
+    /// Configures the depth test
+    void SetDepthTest(bool enable, Pica::CompareFunc compare_op) {
+        info.depth_stencil.depth_test_enable.Assign(enable);
+        info.depth_stencil.depth_compare_op.Assign(compare_op);
+    }
+
+    /// Enables or disables depth writes
+    void SetDepthWrites(bool enable) {
+        info.depth_stencil.depth_write_enable.Assign(enable);
+    }
+
+    /// Configures the stencil test
+    void SetStencilTest(bool enable, Pica::StencilAction fail, Pica::StencilAction pass,
+                        Pica::StencilAction depth_fail, Pica::CompareFunc compare, u32 ref) {
+        info.depth_stencil.stencil_test_enable.Assign(enable);
+        info.depth_stencil.stencil_fail_op.Assign(fail);
+        info.depth_stencil.stencil_pass_op.Assign(pass);
+        info.depth_stencil.stencil_depth_fail_op.Assign(depth_fail);
+        info.depth_stencil.stencil_compare_op.Assign(compare);
+        info.depth_stencil.stencil_reference.Assign(ref);
+    }
+
+    /// Selects the bits of the stencil values participating in the stencil test
+    void SetStencilCompareMask(u32 mask) {
+        info.depth_stencil.stencil_compare_mask.Assign(mask);
+    }
+
+    /// Selects the bits of the stencil values updated by the stencil test
+    void SetStencilWriteMask(u32 mask) {
+        info.depth_stencil.stencil_write_mask.Assign(mask);
+    }
+
+protected:
+    PipelineType type = PipelineType::Graphics;
+    PipelineInfo info{};
+};
+
+using PipelineHandle = IntrusivePtr<PipelineBase>;
+
+} // namespace VideoCore
+
+namespace std {
+template <>
+struct hash<VideoCore::PipelineInfo> {
+    std::size_t operator()(const VideoCore::PipelineInfo& info) const noexcept {
+        return info.Hash();
+    }
+};
+} // namespace std
--- a/src/video_core/common/shader.h
+++ b/src/video_core/common/shader.h
@ -0,0 +1,62 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+#include <string_view>
+#include <vector>
+#include "common/common_types.h"
+#include "common/intrusive_ptr.h"
+
+namespace VideoCore {
+
+enum class ShaderStage : u32 {
+    Vertex = 0,
+    Geometry = 1,
+    Fragment = 2,
+    Compute = 3,
+    Undefined = 4
+};
+
+// Tells the module how much to optimize the bytecode
+enum class ShaderOptimization : u32 {
+    High = 0,
+    Debug = 1
+};
+
+/// Compiles shader source to backend representation
+class ShaderBase : public IntrusivePtrEnabled<ShaderBase> {
+public:
+    ShaderBase(ShaderStage stage, std::string_view name, std::string&& source) :
+        name(name), stage(stage), source(source) {}
+    virtual ~ShaderBase() = default;
+
+    /// Compiles the shader source code
+    virtual bool Compile(ShaderOptimization level) = 0;
+
+    /// Returns the API specific shader bytecode
+    std::string_view GetSource() const {
+        return source;
+    }
+
+    /// Returns the name given the shader module
+    std::string_view GetName() const {
+        return name;
+    }
+
+    /// Returns the pipeline stage the shader is assigned to
+    ShaderStage GetStage() const {
+        return stage;
+    }
+
+protected:
+    std::string_view name = "None";
+    ShaderStage stage = ShaderStage::Undefined;
+    std::string source;
+};
+
+using ShaderHandle = IntrusivePtr<ShaderBase>;
+
+} // namespace VideoCore
--- a/src/video_core/common/shader_gen.cpp
+++ b/src/video_core/common/shader_gen.cpp
@ -0,0 +1,179 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/bit_set.h"
+#include "video_core/video_core.h"
+#include "video_core/common/shader_gen.h"
+
+namespace VideoCore {
+
+PicaFSConfig::PicaFSConfig(const Pica::Regs& regs) {
+    scissor_test_mode = regs.rasterizer.scissor_test.mode;
+    depthmap_enable = regs.rasterizer.depthmap_enable;
+    alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
+                                ? regs.framebuffer.output_merger.alpha_test.func.Value()
+                                : Pica::CompareFunc::Always;
+    texture0_type = regs.texturing.texture0.type;
+    texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
+
+    // We don't need these otherwise, reset them to avoid unnecessary shader generation
+    alphablend_enable = {};
+    logic_op = {};
+
+    // Copy relevant tev stages fields.
+    // We don't sync const_color here because of the high variance, it is a
+    // shader uniform instead.
+    const auto stages = regs.texturing.GetTevStages();
+    DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
+    for (std::size_t i = 0; i < stages.size(); i++) {
+        const auto& tev_stage = stages[i];
+        tev_stages[i].sources_raw = tev_stage.sources_raw;
+        tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
+        tev_stages[i].ops_raw = tev_stage.ops_raw;
+        tev_stages[i].scales_raw = tev_stage.scales_raw;
+    }
+
+    fog_mode = regs.texturing.fog_mode;
+    fog_flip = regs.texturing.fog_flip != 0;
+
+    combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
+                                  regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
+                                      << 4;
+
+    // Fragment lighting
+    lighting.enable = !regs.lighting.disable;
+    lighting.src_num = regs.lighting.max_light_index + 1;
+
+    for (u32 light_index = 0; light_index < lighting.src_num; ++light_index) {
+        u32 num = regs.lighting.light_enable.GetNum(light_index);
+        const auto& light = regs.lighting.light[num];
+        auto& dst_light = lighting.light[light_index];
+
+        dst_light.num = num;
+        dst_light.directional = light.config.directional != 0;
+        dst_light.two_sided_diffuse = light.config.two_sided_diffuse != 0;
+        dst_light.geometric_factor_0 = light.config.geometric_factor_0 != 0;
+        dst_light.geometric_factor_1 = light.config.geometric_factor_1 != 0;
+        dst_light.dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
+        dst_light.spot_atten_enable = !regs.lighting.IsSpotAttenDisabled(num);
+        dst_light.shadow_enable = !regs.lighting.IsShadowDisabled(num);
+    }
+
+    lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
+    lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
+    lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
+    lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
+
+    lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
+    lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
+    lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
+    lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
+
+    // This is a dummy field due to lack of the corresponding register
+    lighting.lut_sp.enable = true;
+    lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
+    lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
+    lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
+
+    lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
+    lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
+    lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
+    lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
+
+    lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
+    lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
+    lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
+    lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
+
+    lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
+    lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
+    lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
+    lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
+
+    lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
+    lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
+    lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
+    lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
+
+    lighting.config = regs.lighting.config0.config;
+    lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
+    lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
+    lighting.bump_mode = regs.lighting.config0.bump_mode;
+    lighting.bump_selector = regs.lighting.config0.bump_selector;
+    lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
+    lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
+
+    lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
+    lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
+    lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
+    lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
+    lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
+    lighting.shadow_selector = regs.lighting.config0.shadow_selector;
+
+    proctex.enable = regs.texturing.main_config.texture3_enable;
+    if (proctex.enable) {
+        proctex.coord = regs.texturing.main_config.texture3_coordinates;
+        proctex.u_clamp = regs.texturing.proctex.u_clamp;
+        proctex.v_clamp = regs.texturing.proctex.v_clamp;
+        proctex.color_combiner = regs.texturing.proctex.color_combiner;
+        proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
+        proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
+        proctex.noise_enable = regs.texturing.proctex.noise_enable;
+        proctex.u_shift = regs.texturing.proctex.u_shift;
+        proctex.v_shift = regs.texturing.proctex.v_shift;
+        proctex.lut_width = regs.texturing.proctex_lut.width;
+        proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
+        proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
+        proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
+        proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
+        proctex.lod_min = regs.texturing.proctex_lut.lod_min;
+        proctex.lod_max = regs.texturing.proctex_lut.lod_max;
+        proctex.lut_filter = regs.texturing.proctex_lut.filter;
+    }
+
+    shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
+                             Pica::FragmentOperationMode::Shadow;
+
+    shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
+}
+
+PicaVSConfig::PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
+    program_hash = setup.GetProgramCodeHash();
+    swizzle_hash = setup.GetSwizzleDataHash();
+    main_offset = regs.main_offset;
+    sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
+
+    num_outputs = 0;
+    output_map.fill(16);
+
+    for (int reg : Common::BitSet<u32>(regs.output_mask)) {
+        output_map[reg] = num_outputs++;
+    }
+}
+
+PicaFixedGSConfig::PicaFixedGSConfig(const Pica::Regs& regs) {
+    vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
+    gs_output_attributes = vs_output_attributes;
+
+    semantic_maps.fill({16, 0});
+    for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
+        const std::array semantics = {
+            regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
+            regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
+            regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
+            regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
+        };
+
+        for (u32 comp = 0; comp < 4; ++comp) {
+            const std::size_t semantic = static_cast<std::size_t>(semantics[comp]);
+            if (semantic < 24) {
+                semantic_maps[semantic] = {attrib, comp};
+            } else if (semantic != Pica::RasterizerRegs::VSOutputAttributes::INVALID) {
+                LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
+            }
+        }
+    }
+}
+
+} // namespace VideoCore
--- a/src/video_core/common/shader_gen.h
+++ b/src/video_core/common/shader_gen.h
@ -0,0 +1,227 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <string>
+#include "common/hash.h"
+#include "video_core/regs.h"
+#include "video_core/shader/shader.h"
+
+namespace VideoCore {
+
+enum Attributes {
+    ATTRIBUTE_POSITION,
+    ATTRIBUTE_COLOR,
+    ATTRIBUTE_TEXCOORD0,
+    ATTRIBUTE_TEXCOORD1,
+    ATTRIBUTE_TEXCOORD2,
+    ATTRIBUTE_TEXCOORD0_W,
+    ATTRIBUTE_NORMQUAT,
+    ATTRIBUTE_VIEW,
+};
+
+// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
+struct TevStageConfigRaw {
+    u32 sources_raw;
+    u32 modifiers_raw;
+    u32 ops_raw;
+    u32 scales_raw;
+
+    explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
+        Pica::TexturingRegs::TevStageConfig stage;
+        stage.sources_raw = sources_raw;
+        stage.modifiers_raw = modifiers_raw;
+        stage.ops_raw = ops_raw;
+        stage.const_color = 0;
+        stage.scales_raw = scales_raw;
+        return stage;
+    }
+};
+
+/**
+ * This struct contains all state used to generate the GLSL fragment shader that emulates the
+ * current Pica register configuration. This struct is used as a cache key for generated GLSL shader
+ * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
+ * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
+ * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
+ * two separate shaders sharing the same key.
+ */
+struct PicaFSConfig {
+    explicit PicaFSConfig(const Pica::Regs& regs);
+
+    /// Returns the hash of the VS config
+    const u64 Hash() const noexcept {
+        return Common::ComputeStructHash64(*this);
+    }
+
+    bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
+        return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index));
+    }
+
+    bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
+        return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index));
+    }
+
+    Pica::CompareFunc alpha_test_func;
+    Pica::RasterizerRegs::ScissorMode scissor_test_mode;
+    Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
+    std::array<TevStageConfigRaw, 6> tev_stages;
+    bool texture2_use_coord1;
+    u8 combiner_buffer_input;
+
+    Pica::RasterizerRegs::DepthBuffering depthmap_enable;
+    Pica::TexturingRegs::FogMode fog_mode;
+    bool fog_flip;
+    bool alphablend_enable;
+    Pica::LogicOp logic_op;
+
+    struct {
+        struct {
+            unsigned num;
+            bool directional;
+            bool two_sided_diffuse;
+            bool dist_atten_enable;
+            bool spot_atten_enable;
+            bool geometric_factor_0;
+            bool geometric_factor_1;
+            bool shadow_enable;
+        } light[8];
+
+        bool enable;
+        unsigned src_num;
+        Pica::LightingRegs::LightingBumpMode bump_mode;
+        unsigned bump_selector;
+        bool bump_renorm;
+        bool clamp_highlights;
+
+        Pica::LightingRegs::LightingConfig config;
+        bool enable_primary_alpha;
+        bool enable_secondary_alpha;
+
+        bool enable_shadow;
+        bool shadow_primary;
+        bool shadow_secondary;
+        bool shadow_invert;
+        bool shadow_alpha;
+        unsigned shadow_selector;
+
+        struct {
+            bool enable;
+            bool abs_input;
+            Pica::LightingRegs::LightingLutInput type;
+            float scale;
+        } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
+    } lighting;
+
+    struct {
+        bool enable;
+        u32 coord;
+        Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
+        Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
+        bool separate_alpha;
+        bool noise_enable;
+        Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
+        u32 lut_width;
+        u32 lut_offset0;
+        u32 lut_offset1;
+        u32 lut_offset2;
+        u32 lut_offset3;
+        u32 lod_min;
+        u32 lod_max;
+        Pica::TexturingRegs::ProcTexFilter lut_filter;
+    } proctex;
+
+    bool shadow_rendering;
+    bool shadow_texture_orthographic;
+};
+
+/**
+ * This struct contains information to identify a host vertex shader generated from PICA vertex
+ * shader.
+ */
+struct PicaVSConfig {
+    explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
+
+    /// Returns the hash of the VS config
+    const u64 Hash() const noexcept {
+        return Common::ComputeStructHash64(*this);
+    }
+
+    u64 program_hash = 0;
+    u64 swizzle_hash = 0;
+    u32 main_offset = 0;
+    bool sanitize_mul = false;
+
+    // output_map[output register index] -> output attribute index
+    u32 num_outputs = 0;
+    std::array<u32, 16> output_map{};
+};
+
+/**
+ * This struct contains information to identify a GL geometry shader generated from PICA no-geometry
+ * shader pipeline
+ */
+struct PicaFixedGSConfig {
+    explicit PicaFixedGSConfig(const Pica::Regs& regs);
+
+    /// Returns the hash of the GS config
+    const u64 Hash() const noexcept {
+        return Common::ComputeStructHash64(*this);
+    }
+
+    u32 vs_output_attributes = 0;
+    u32 gs_output_attributes = 0;
+
+    struct SemanticMap {
+        u32 attribute_index = 0;
+        u32 component_index = 0;
+    };
+
+    // semantic_maps[semantic name] -> GS output attribute index + component index
+    std::array<SemanticMap, 24> semantic_maps{};
+};
+
+/**
+ * Generates backend specific shader modules using the Pica state configuration
+ * @todo Be replaced with a unified shader compiler
+ */
+class ShaderGeneratorBase {
+public:
+    ShaderGeneratorBase() = default;
+    virtual ~ShaderGeneratorBase() = default;
+
+    /**
+     * Generates the GLSL vertex shader program source code that accepts vertices from software shader
+     * and directly passes them to the fragment shader.
+     * @param separable_shader generates shader that can be used for separate shader object
+     * @returns String of the shader source code
+     */
+    virtual std::string GenerateTrivialVertexShader(bool separable_shader) = 0;
+
+    /**
+     * Generates the GLSL vertex shader program source code for the given VS program
+     * @returns String of the shader source code
+     */
+    virtual std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
+                                             bool separable_shader) = 0;
+
+    /**
+     * Generates the GLSL fixed geometry shader program source code for non-GS PICA pipeline
+     * @returns String of the shader source code
+     */
+    virtual std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) = 0;
+
+    /**
+     * Generates the GLSL fragment shader program source code for the current Pica state
+     * @param config ShaderCacheKey object generated for the current Pica state, used for the shader
+     *               configuration (NOTE: Use state in this struct only, not the Pica registers!)
+     * @param separable_shader generates shader that can be used for separate shader object
+     * @returns String of the shader source code
+     */
+    virtual std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) = 0;
+};
+
+} // namespace VideoCore
--- a/src/video_core/common/texture.h
+++ b/src/video_core/common/texture.h
@ -0,0 +1,171 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+#include "common/hash.h"
+#include "common/intrusive_ptr.h"
+#include "video_core/regs_texturing.h"
+
+namespace VideoCore {
+
+constexpr u32 MAX_COLOR_FORMATS = 5;
+constexpr u32 MAX_DEPTH_FORMATS = 3;
+
+enum class TextureFormat : u8 {
+    RGBA8 = 0,
+    RGB8 = 1,
+    RGB5A1 = 2,
+    RGB565 = 3,
+    RGBA4 = 4,
+    D16 = 5,
+    D24 = 6,
+    D24S8 = 7,
+    Undefined = 255
+};
+
+enum class TextureType : u8 {
+    Texture1D = 0,
+    Texture2D = 1,
+    Texture3D = 2,
+    Undefined = 255
+};
+
+enum class TextureViewType : u8 {
+    View1D = 0,
+    View2D = 1,
+    View3D = 2,
+    ViewCube = 3,
+    View1DArray = 4,
+    View2DArray = 5,
+    ViewCubeArray = 6,
+    Undefined = 255
+};
+
+/**
+ * A rectangle describing part of a texture
+ * @param x, y are the offset from the bottom left corner
+ * @param width, height are the extent of the rectangle
+ */
+struct Rect2D {
+    s32 x = 0;
+    s32 y = 0;
+    u32 width = 0;
+    u32 height = 0;
+};
+
+/**
+ * Information about a texture packed to 8 bytes
+ */
+struct TextureInfo {
+    u16 width = 0;
+    u16 height = 0;
+    u8 levels = 0;
+    TextureType type = TextureType::Undefined;
+    TextureViewType view_type = TextureViewType::Undefined;
+    TextureFormat format = TextureFormat::Undefined;
+
+    const u64 Hash() const {
+        return Common::ComputeStructHash64(*this);
+    }
+};
+
+static_assert(sizeof(TextureInfo) == 8, "TextureInfo not packed!");
+static_assert(std::is_standard_layout_v<TextureInfo>, "TextureInfo is not a standard layout!");
+
+class TextureBase;
+using TextureHandle = IntrusivePtr<TextureBase>;
+
+class TextureBase : public IntrusivePtrEnabled<TextureBase> {
+public:
+    TextureBase() = default;
+    TextureBase(const TextureInfo& info) : info(info) {}
+    virtual ~TextureBase() = default;
+
+    /// Uploads pixel data to the GPU memory
+    virtual void Upload(Rect2D rectangle, u32 stride, std::span<const u8> data,
+                        u32 level = 0) {};
+
+    /// Downloads pixel data from GPU memory
+    virtual void Download(Rect2D rectangle, u32 stride, std::span<u8> data,
+                          u32 level = 0) {};
+
+    /// Copies the rectangle area specified to the destionation texture
+    virtual void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect,
+                        u32 src_level = 0, u32 dest_level = 0) {};
+
+    /// Returns the unique texture identifier
+    const u64 GetHash() const {
+        return info.Hash();
+    }
+
+    /// Returns the width of the texture
+    u16 GetWidth() const {
+        return info.width;
+    }
+
+    /// Returns the height of the texture
+    u16 GetHeight() const {
+        return info.height;
+    }
+
+    /// Returns the number of mipmap levels allocated
+    u16 GetMipLevels() const {
+        return info.levels;
+    }
+
+    /// Returns the pixel format
+    TextureFormat GetFormat() const {
+        return info.format;
+    }
+
+protected:
+    TextureInfo info;
+};
+
+struct SamplerInfo {
+    Pica::TextureFilter mag_filter;
+    Pica::TextureFilter min_filter;
+    Pica::TextureFilter mip_filter;
+    Pica::WrapMode wrap_s;
+    Pica::WrapMode wrap_t;
+    u32 border_color = 0;
+    u32 lod_min = 0;
+    u32 lod_max = 0;
+    s32 lod_bias = 0;
+
+    const u64 Hash() const {
+        return Common::ComputeStructHash64(*this);
+    }
+};
+
+class SamplerBase : public IntrusivePtrEnabled<SamplerBase> {
+public:
+    SamplerBase(SamplerInfo info) : info(info) {}
+    virtual ~SamplerBase() = default;
+
+protected:
+    SamplerInfo info{};
+};
+
+using SamplerHandle = IntrusivePtr<SamplerBase>;
+
+} // namespace VideoCore
+
+namespace std {
+template <>
+struct hash<VideoCore::TextureInfo> {
+    std::size_t operator()(const VideoCore::TextureInfo& info) const noexcept {
+        return info.Hash();
+    }
+};
+
+template <>
+struct hash<VideoCore::SamplerInfo> {
+    std::size_t operator()(const VideoCore::SamplerInfo& info) const noexcept {
+        return info.Hash();
+    }
+};
+} // namespace std
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@ -0,0 +1,60 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+#include <type_traits>
+#include "core/core.h"
+#include "video_core/pica.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::System& system,
+                                                        Frontend::EmuWindow& emu_window) {
+    auto& telemetry_session = system.TelemetrySession();
+    auto& cpu_memory = system.Memory();
+
+    switch (Settings::values.renderer_backend) {
+    case Settings::RendererBackend::OpenGL:
+        return std::make_unique<OpenGL::RendererOpenGL>(emu_window);
+    case Settings::RendererBackend::Vulkan:
+        return std::make_unique<Vulkan::RendererVulkan>(emu_window);
+    default:
+        return nullptr;
+    }
+}
+
+namespace Pica {
+
+GPU::GPU(Core::System& system, Memory::MemorySystem& memory) :
+    system(system), memory(memory) {
+    //renderer = CreateRenderer(system, )
+    rasterizer = renderer->Rasterizer();
+}
+
+void GPU::SwapBuffers() {
+    renderer->SwapBuffers();
+}
+
+void GPU::FlushAll() {
+    rasterizer->FlushAll();
+}
+
+void GPU::FlushRegion(PAddr addr, u32 size) {
+    rasterizer->FlushRegion(addr, size);
+}
+
+void GPU::InvalidateRegion(PAddr addr, u32 size) {
+    rasterizer->InvalidateRegion(addr, size);
+}
+
+void GPU::FlushAndInvalidateRegion(PAddr addr, u32 size) {
+    rasterizer->FlushAndInvalidateRegion(addr, size);
+}
+
+void GPU::ClearAll(bool flush) {
+    rasterizer->ClearAll(flush);
+}
+
+} // namespace Pica
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@ -0,0 +1,81 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include "core/frontend/framebuffer_layout.h"
+#include "video_core/maestro.h"
+
+namespace Core {
+class System;
+}
+
+namespace Memory {
+class MemorySystem;
+}
+
+namespace Frontend {
+class EmuWindow;
+}
+
+namespace VideoCore {
+class RendererBase;
+class RasterizerInterface;
+}
+
+namespace Pica {
+
+class Maestro;
+
+enum class ResultStatus {
+    Success,
+    ErrorGenericDrivers,
+    ErrorUnsupportedGL,
+};
+
+/**
+ * Interface for the PICA GPU
+ */
+class GPU {
+public:
+    GPU(Core::System& system, Memory::MemorySystem& memory);
+    ~GPU() = default;
+
+    /// Swap buffers (render frame)
+    void SwapBuffers();
+
+    /// Notify rasterizer that all caches should be flushed to 3DS memory
+    void FlushAll();
+
+    /// Notify rasterizer that any caches of the specified region should be flushed to 3DS memory
+    void FlushRegion(PAddr addr, u32 size);
+
+    /// Notify rasterizer that any caches of the specified region should be invalidated
+    void InvalidateRegion(PAddr addr, u32 size);
+
+    /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
+    void FlushAndInvalidateRegion(PAddr addr, u32 size);
+
+    /// Removes as much state as possible from the rasterizer in preparation for a save/load state
+    void ClearAll(bool flush);
+
+    /// Request a screenshot of the next frame
+    void RequestScreenshot(u8* data, std::function<void()> callback,
+                           const Layout::FramebufferLayout& layout);
+
+    /// Returns the resolution scale factor
+    u16 GetResolutionScaleFactor();
+
+private:
+    Core::System& system;
+    Memory::MemorySystem& memory;
+
+    // Renderer
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+    std::unique_ptr<VideoCore::RendererBase> renderer = nullptr;
+    std::unique_ptr<Maestro> maestro = nullptr;
+};
+
+} // namespace VideoCore
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@ -34,13 +34,13 @@ template <typename T>
 void Zero(T& o) {
    static_assert(std::is_trivially_copyable_v<T>,
                  "It's undefined behavior to memset a non-trivially copyable type");
-    std::memset(&o, 0, sizeof(o));
+    memset(&o, 0, sizeof(o));
 }

 State::State() : geometry_pipeline(*this) {
    auto SubmitVertex = [this](const Shader::AttributeBuffer& vertex) {
        using Pica::Shader::OutputVertex;
-        auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1,
+        auto AddTriangle = [this](const OutputVertex& v0, const OutputVertex& v1,
                                  const OutputVertex& v2) {
            VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
        };
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@ -4,7 +4,6 @@

 #pragma once

-#include "video_core/regs_texturing.h"
 namespace Pica {

 /// Initialize Pica state
--- a/src/video_core/pica_regs.inc
+++ b/src/video_core/pica_regs.inc
@ -0,0 +1,400 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+//#define PICA_REG(name, address)
+PICA_REG(FINALIZE,                         0x010)
+PICA_REG(FACECULLING_CONFIG,               0x040)
+PICA_REG(VIEWPORT_WIDTH,                   0x041)
+PICA_REG(VIEWPORT_INVW,                    0x042)
+PICA_REG(VIEWPORT_HEIGHT,                  0x043)
+PICA_REG(VIEWPORT_INVH,                    0x044)
+PICA_REG(FRAGOP_CLIP,                      0x047)
+PICA_REG(FRAGOP_CLIP_DATA0,                0x048)
+PICA_REG(FRAGOP_CLIP_DATA1,                0x049)
+PICA_REG(FRAGOP_CLIP_DATA2,                0x04A)
+PICA_REG(FRAGOP_CLIP_DATA3,                0x04B)
+PICA_REG(DEPTHMAP_SCALE,                   0x04D)
+PICA_REG(DEPTHMAP_OFFSET,                  0x04E)
+PICA_REG(SH_OUTMAP_TOTAL,                  0x04F)
+PICA_REG(SH_OUTMAP_O0,                     0x050)
+PICA_REG(SH_OUTMAP_O1,                     0x051)
+PICA_REG(SH_OUTMAP_O2,                     0x052)
+PICA_REG(SH_OUTMAP_O3,                     0x053)
+PICA_REG(SH_OUTMAP_O4,                     0x054)
+PICA_REG(SH_OUTMAP_O5,                     0x055)
+PICA_REG(SH_OUTMAP_O6,                     0x056)
+PICA_REG(EARLYDEPTH_FUNC,                  0x061)
+PICA_REG(EARLYDEPTH_TEST1,                 0x062)
+PICA_REG(EARLYDEPTH_CLEAR,                 0x063)
+PICA_REG(SH_OUTATTR_MODE,                  0x064)
+PICA_REG(SCISSORTEST_MODE,                 0x065)
+PICA_REG(SCISSORTEST_POS,                  0x066)
+PICA_REG(SCISSORTEST_DIM,                  0x067)
+PICA_REG(VIEWPORT_XY,                      0x068)
+PICA_REG(EARLYDEPTH_DATA,                  0x06A)
+PICA_REG(DEPTHMAP_ENABLE,                  0x06D)
+PICA_REG(RENDERBUF_DIM,                    0x06E)
+PICA_REG(SH_OUTATTR_CLOCK,                 0x06F)
+PICA_REG(TEXUNIT_CONFIG,                   0x080)
+PICA_REG(TEXUNIT0_BORDER_COLOR,            0x081)
+PICA_REG(TEXUNIT0_DIM,                     0x082)
+PICA_REG(TEXUNIT0_PARAM,                   0x083)
+PICA_REG(TEXUNIT0_LOD,                     0x084)
+PICA_REG(TEXUNIT0_ADDR1,                   0x085)
+PICA_REG(TEXUNIT0_ADDR2,                   0x086)
+PICA_REG(TEXUNIT0_ADDR3,                   0x087)
+PICA_REG(TEXUNIT0_ADDR4,                   0x088)
+PICA_REG(TEXUNIT0_ADDR5,                   0x089)
+PICA_REG(TEXUNIT0_ADDR6,                   0x08A)
+PICA_REG(TEXUNIT0_SHADOW,                  0x08B)
+PICA_REG(TEXUNIT0_TYPE,                    0x08E)
+PICA_REG(LIGHTING_ENABLE0,                 0x08F)
+PICA_REG(TEXUNIT1_BORDER_COLOR,            0x091)
+PICA_REG(TEXUNIT1_DIM,                     0x092)
+PICA_REG(TEXUNIT1_PARAM,                   0x093)
+PICA_REG(TEXUNIT1_LOD,                     0x094)
+PICA_REG(TEXUNIT1_ADDR,                    0x095)
+PICA_REG(TEXUNIT1_TYPE,                    0x096)
+PICA_REG(TEXUNIT2_BORDER_COLOR,            0x099)
+PICA_REG(TEXUNIT2_DIM,                     0x09A)
+PICA_REG(TEXUNIT2_PARAM,                   0x09B)
+PICA_REG(TEXUNIT2_LOD,                     0x09C)
+PICA_REG(TEXUNIT2_ADDR,                    0x09D)
+PICA_REG(TEXUNIT2_TYPE,                    0x09E)
+PICA_REG(TEXUNIT3_PROCTEX0,                0x0A8)
+PICA_REG(TEXUNIT3_PROCTEX1,                0x0A9)
+PICA_REG(TEXUNIT3_PROCTEX2,                0x0AA)
+PICA_REG(TEXUNIT3_PROCTEX3,                0x0AB)
+PICA_REG(TEXUNIT3_PROCTEX4,                0x0AC)
+PICA_REG(TEXUNIT3_PROCTEX5,                0x0AD)
+PICA_REG(PROCTEX_LUT,                      0x0AF)
+PICA_REG(PROCTEX_LUT_DATA0,                0x0B0)
+PICA_REG(PROCTEX_LUT_DATA1,                0x0B1)
+PICA_REG(PROCTEX_LUT_DATA2,                0x0B2)
+PICA_REG(PROCTEX_LUT_DATA3,                0x0B3)
+PICA_REG(PROCTEX_LUT_DATA4,                0x0B4)
+PICA_REG(PROCTEX_LUT_DATA5,                0x0B5)
+PICA_REG(PROCTEX_LUT_DATA6,                0x0B6)
+PICA_REG(PROCTEX_LUT_DATA7,                0x0B7)
+PICA_REG(TEXENV0_SOURCE,                   0x0C0)
+PICA_REG(TEXENV0_OPERAND,                  0x0C1)
+PICA_REG(TEXENV0_COMBINER,                 0x0C2)
+PICA_REG(TEXENV0_COLOR,                    0x0C3)
+PICA_REG(TEXENV0_SCALE,                    0x0C4)
+PICA_REG(TEXENV1_SOURCE,                   0x0C8)
+PICA_REG(TEXENV1_OPERAND,                  0x0C9)
+PICA_REG(TEXENV1_COMBINER,                 0x0CA)
+PICA_REG(TEXENV1_COLOR,                    0x0CB)
+PICA_REG(TEXENV1_SCALE,                    0x0CC)
+PICA_REG(TEXENV2_SOURCE,                   0x0D0)
+PICA_REG(TEXENV2_OPERAND,                  0x0D1)
+PICA_REG(TEXENV2_COMBINER,                 0x0D2)
+PICA_REG(TEXENV2_COLOR,                    0x0D3)
+PICA_REG(TEXENV2_SCALE,                    0x0D4)
+PICA_REG(TEXENV3_SOURCE,                   0x0D8)
+PICA_REG(TEXENV3_OPERAND,                  0x0D9)
+PICA_REG(TEXENV3_COMBINER,                 0x0DA)
+PICA_REG(TEXENV3_COLOR,                    0x0DB)
+PICA_REG(TEXENV3_SCALE,                    0x0DC)
+PICA_REG(TEXENV_UPDATE_BUFFER,             0x0E0)
+PICA_REG(FOG_COLOR,                        0x0E1)
+PICA_REG(GAS_ATTENUATION,                  0x0E4)
+PICA_REG(GAS_ACCMAX,                       0x0E5)
+PICA_REG(FOG_LUT_INDEX,                    0x0E6)
+PICA_REG(FOG_LUT_DATA0,                    0x0E8)
+PICA_REG(FOG_LUT_DATA1,                    0x0E9)
+PICA_REG(FOG_LUT_DATA2,                    0x0EA)
+PICA_REG(FOG_LUT_DATA3,                    0x0EB)
+PICA_REG(FOG_LUT_DATA4,                    0x0EC)
+PICA_REG(FOG_LUT_DATA5,                    0x0ED)
+PICA_REG(FOG_LUT_DATA6,                    0x0EE)
+PICA_REG(FOG_LUT_DATA7,                    0x0EF)
+PICA_REG(TEXENV4_SOURCE,                   0x0F0)
+PICA_REG(TEXENV4_OPERAND,                  0x0F1)
+PICA_REG(TEXENV4_COMBINER,                 0x0F2)
+PICA_REG(TEXENV4_COLOR,                    0x0F3)
+PICA_REG(TEXENV4_SCALE,                    0x0F4)
+PICA_REG(TEXENV5_SOURCE,                   0x0F8)
+PICA_REG(TEXENV5_OPERAND,                  0x0F9)
+PICA_REG(TEXENV5_COMBINER,                 0x0FA)
+PICA_REG(TEXENV5_COLOR,                    0x0FB)
+PICA_REG(TEXENV5_SCALE,                    0x0FC)
+PICA_REG(TEXENV_BUFFER_COLOR,              0x0FD)
+PICA_REG(COLOR_OPERATION,                  0x100)
+PICA_REG(BLEND_FUNC,                       0x101)
+PICA_REG(LOGIC_OP,                         0x102)
+PICA_REG(BLEND_COLOR,                      0x103)
+PICA_REG(FRAGOP_ALPHA_TEST,                0x104)
+PICA_REG(STENCIL_TEST,                     0x105)
+PICA_REG(STENCIL_OP,                       0x106)
+PICA_REG(DEPTH_COLOR_MASK,                 0x107)
+PICA_REG(FRAMEBUFFER_INVALIDATE,           0x110)
+PICA_REG(FRAMEBUFFER_FLUSH,                0x111)
+PICA_REG(COLORBUFFER_READ,                 0x112)
+PICA_REG(COLORBUFFER_WRITE,                0x113)
+PICA_REG(DEPTHBUFFER_READ,                 0x114)
+PICA_REG(DEPTHBUFFER_WRITE,                0x115)
+PICA_REG(DEPTHBUFFER_FORMAT,               0x116)
+PICA_REG(COLORBUFFER_FORMAT,               0x117)
+PICA_REG(EARLYDEPTH_TEST2,                 0x118)
+PICA_REG(FRAMEBUFFER_BLOCK32,              0x11B)
+PICA_REG(DEPTHBUFFER_LOC,                  0x11C)
+PICA_REG(COLORBUFFER_LOC,                  0x11D)
+PICA_REG(FRAMEBUFFER_DIM,                  0x11E)
+PICA_REG(GAS_LIGHT_XY,                     0x120)
+PICA_REG(GAS_LIGHT_Z,                      0x121)
+PICA_REG(GAS_LIGHT_Z_COLOR,                0x122)
+PICA_REG(GAS_LUT_INDEX,                    0x123)
+PICA_REG(GAS_LUT_DATA,                     0x124)
+PICA_REG(GAS_DELTAZ_DEPTH,                 0x126)
+PICA_REG(FRAGOP_SHADOW,                    0x130)
+PICA_REG(LIGHT0_SPECULAR0,                 0x140)
+PICA_REG(LIGHT0_SPECULAR1,                 0x141)
+PICA_REG(LIGHT0_DIFFUSE,                   0x142)
+PICA_REG(LIGHT0_AMBIENT,                   0x143)
+PICA_REG(LIGHT0_XY,                        0x144)
+PICA_REG(LIGHT0_Z,                         0x145)
+PICA_REG(LIGHT0_SPOTDIR_XY,                0x146)
+PICA_REG(LIGHT0_SPOTDIR_Z,                 0x147)
+PICA_REG(LIGHT0_CONFIG,                    0x149)
+PICA_REG(LIGHT0_ATTENUATION_BIAS,          0x14A)
+PICA_REG(LIGHT0_ATTENUATION_SCALE,         0x14B)
+PICA_REG(LIGHT1_SPECULAR0,                 0x150)
+PICA_REG(LIGHT1_SPECULAR1,                 0x151)
+PICA_REG(LIGHT1_DIFFUSE,                   0x152)
+PICA_REG(LIGHT1_AMBIENT,                   0x153)
+PICA_REG(LIGHT1_XY,                        0x154)
+PICA_REG(LIGHT1_Z,                         0x155)
+PICA_REG(LIGHT1_SPOTDIR_XY,                0x156)
+PICA_REG(LIGHT1_SPOTDIR_Z,                 0x157)
+PICA_REG(LIGHT1_CONFIG,                    0x159)
+PICA_REG(LIGHT1_ATTENUATION_BIAS,          0x15A)
+PICA_REG(LIGHT1_ATTENUATION_SCALE,         0x15B)
+PICA_REG(LIGHT2_SPECULAR0,                 0x160)
+PICA_REG(LIGHT2_SPECULAR1,                 0x161)
+PICA_REG(LIGHT2_DIFFUSE,                   0x162)
+PICA_REG(LIGHT2_AMBIENT,                   0x163)
+PICA_REG(LIGHT2_XY,                        0x164)
+PICA_REG(LIGHT2_Z,                         0x165)
+PICA_REG(LIGHT2_SPOTDIR_XY,                0x166)
+PICA_REG(LIGHT2_SPOTDIR_Z,                 0x167)
+PICA_REG(LIGHT2_CONFIG,                    0x169)
+PICA_REG(LIGHT2_ATTENUATION_BIAS,          0x16A)
+PICA_REG(LIGHT2_ATTENUATION_SCALE,         0x16B)
+PICA_REG(LIGHT3_SPECULAR0,                 0x170)
+PICA_REG(LIGHT3_SPECULAR1,                 0x171)
+PICA_REG(LIGHT3_DIFFUSE,                   0x172)
+PICA_REG(LIGHT3_AMBIENT,                   0x173)
+PICA_REG(LIGHT3_XY,                        0x174)
+PICA_REG(LIGHT3_Z,                         0x175)
+PICA_REG(LIGHT3_SPOTDIR_XY,                0x176)
+PICA_REG(LIGHT3_SPOTDIR_Z,                 0x177)
+PICA_REG(LIGHT3_CONFIG,                    0x179)
+PICA_REG(LIGHT3_ATTENUATION_BIAS,          0x17A)
+PICA_REG(LIGHT3_ATTENUATION_SCALE,         0x17B)
+PICA_REG(LIGHT4_SPECULAR0,                 0x180)
+PICA_REG(LIGHT4_SPECULAR1,                 0x181)
+PICA_REG(LIGHT4_DIFFUSE,                   0x182)
+PICA_REG(LIGHT4_AMBIENT,                   0x183)
+PICA_REG(LIGHT4_XY,                        0x184)
+PICA_REG(LIGHT4_Z,                         0x185)
+PICA_REG(LIGHT4_SPOTDIR_XY,                0x186)
+PICA_REG(LIGHT4_SPOTDIR_Z,                 0x187)
+PICA_REG(LIGHT4_CONFIG,                    0x189)
+PICA_REG(LIGHT4_ATTENUATION_BIAS,          0x18A)
+PICA_REG(LIGHT4_ATTENUATION_SCALE,         0x18B)
+PICA_REG(LIGHT5_SPECULAR0,                 0x190)
+PICA_REG(LIGHT5_SPECULAR1,                 0x191)
+PICA_REG(LIGHT5_DIFFUSE,                   0x192)
+PICA_REG(LIGHT5_AMBIENT,                   0x193)
+PICA_REG(LIGHT5_XY,                        0x194)
+PICA_REG(LIGHT5_Z,                         0x195)
+PICA_REG(LIGHT5_SPOTDIR_XY,                0x196)
+PICA_REG(LIGHT5_SPOTDIR_Z,                 0x197)
+PICA_REG(LIGHT5_CONFIG,                    0x199)
+PICA_REG(LIGHT5_ATTENUATION_BIAS,          0x19A)
+PICA_REG(LIGHT5_ATTENUATION_SCALE,         0x19B)
+PICA_REG(LIGHT6_SPECULAR0,                 0x1A0)
+PICA_REG(LIGHT6_SPECULAR1,                 0x1A1)
+PICA_REG(LIGHT6_DIFFUSE,                   0x1A2)
+PICA_REG(LIGHT6_AMBIENT,                   0x1A3)
+PICA_REG(LIGHT6_XY,                        0x1A4)
+PICA_REG(LIGHT6_Z,                         0x1A5)
+PICA_REG(LIGHT6_SPOTDIR_XY,                0x1A6)
+PICA_REG(LIGHT6_SPOTDIR_Z,                 0x1A7)
+PICA_REG(LIGHT6_CONFIG,                    0x1A9)
+PICA_REG(LIGHT6_ATTENUATION_BIAS,          0x1AA)
+PICA_REG(LIGHT6_ATTENUATION_SCALE,         0x1AB)
+PICA_REG(LIGHT7_SPECULAR0,                 0x1B0)
+PICA_REG(LIGHT7_SPECULAR1,                 0x1B1)
+PICA_REG(LIGHT7_DIFFUSE,                   0x1B2)
+PICA_REG(LIGHT7_AMBIENT,                   0x1B3)
+PICA_REG(LIGHT7_XY,                        0x1B4)
+PICA_REG(LIGHT7_Z,                         0x1B5)
+PICA_REG(LIGHT7_SPOTDIR_XY,                0x1B6)
+PICA_REG(LIGHT7_SPOTDIR_Z,                 0x1B7)
+PICA_REG(LIGHT7_CONFIG,                    0x1B9)
+PICA_REG(LIGHT7_ATTENUATION_BIAS,          0x1BA)
+PICA_REG(LIGHT7_ATTENUATION_SCALE,         0x1BB)
+PICA_REG(LIGHTING_AMBIENT,                 0x1C0)
+PICA_REG(LIGHTING_NUM_LIGHTS,              0x1C2)
+PICA_REG(LIGHTING_CONFIG0,                 0x1C3)
+PICA_REG(LIGHTING_CONFIG1,                 0x1C4)
+PICA_REG(LIGHTING_LUT_INDEX,               0x1C5)
+PICA_REG(LIGHTING_ENABLE1,                 0x1C6)
+PICA_REG(LIGHTING_LUT_DATA0,               0x1C8)
+PICA_REG(LIGHTING_LUT_DATA1,               0x1C9)
+PICA_REG(LIGHTING_LUT_DATA2,               0x1CA)
+PICA_REG(LIGHTING_LUT_DATA3,               0x1CB)
+PICA_REG(LIGHTING_LUT_DATA4,               0x1CC)
+PICA_REG(LIGHTING_LUT_DATA5,               0x1CD)
+PICA_REG(LIGHTING_LUT_DATA6,               0x1CE)
+PICA_REG(LIGHTING_LUT_DATA7,               0x1CF)
+PICA_REG(LIGHTING_LUTINPUT_ABS,            0x1D0)
+PICA_REG(LIGHTING_LUTINPUT_SELECT,         0x1D1)
+PICA_REG(LIGHTING_LUTINPUT_SCALE,          0x1D2)
+PICA_REG(LIGHTING_LIGHT_PERMUTATION,       0x1D9)
+PICA_REG(ATTRIBBUFFERS_LOC,                0x200)
+PICA_REG(ATTRIBBUFFERS_FORMAT_LOW,         0x201)
+PICA_REG(ATTRIBBUFFERS_FORMAT_HIGH,        0x202)
+PICA_REG(ATTRIBBUFFER0_OFFSET,             0x203)
+PICA_REG(ATTRIBBUFFER0_CONFIG1,            0x204)
+PICA_REG(ATTRIBBUFFER0_CONFIG2,            0x205)
+PICA_REG(ATTRIBBUFFER1_OFFSET,             0x206)
+PICA_REG(ATTRIBBUFFER1_CONFIG1,            0x207)
+PICA_REG(ATTRIBBUFFER1_CONFIG2,            0x208)
+PICA_REG(ATTRIBBUFFER2_OFFSET,             0x209)
+PICA_REG(ATTRIBBUFFER2_CONFIG1,            0x20A)
+PICA_REG(ATTRIBBUFFER2_CONFIG2,            0x20B)
+PICA_REG(ATTRIBBUFFER3_OFFSET,             0x20C)
+PICA_REG(ATTRIBBUFFER3_CONFIG1,            0x20D)
+PICA_REG(ATTRIBBUFFER3_CONFIG2,            0x20E)
+PICA_REG(ATTRIBBUFFER4_OFFSET,             0x20F)
+PICA_REG(ATTRIBBUFFER4_CONFIG1,            0x210)
+PICA_REG(ATTRIBBUFFER4_CONFIG2,            0x211)
+PICA_REG(ATTRIBBUFFER5_OFFSET,             0x212)
+PICA_REG(ATTRIBBUFFER5_CONFIG1,            0x213)
+PICA_REG(ATTRIBBUFFER5_CONFIG2,            0x214)
+PICA_REG(ATTRIBBUFFER6_OFFSET,             0x215)
+PICA_REG(ATTRIBBUFFER6_CONFIG1,            0x216)
+PICA_REG(ATTRIBBUFFER6_CONFIG2,            0x217)
+PICA_REG(ATTRIBBUFFER7_OFFSET,             0x218)
+PICA_REG(ATTRIBBUFFER7_CONFIG1,            0x219)
+PICA_REG(ATTRIBBUFFER7_CONFIG2,            0x21A)
+PICA_REG(ATTRIBBUFFER8_OFFSET,             0x21B)
+PICA_REG(ATTRIBBUFFER8_CONFIG1,            0x21C)
+PICA_REG(ATTRIBBUFFER8_CONFIG2,            0x21D)
+PICA_REG(ATTRIBBUFFER9_OFFSET,             0x21E)
+PICA_REG(ATTRIBBUFFER9_CONFIG1,            0x21F)
+PICA_REG(ATTRIBBUFFER9_CONFIG2,            0x220)
+PICA_REG(ATTRIBBUFFER10_OFFSET,            0x221)
+PICA_REG(ATTRIBBUFFER10_CONFIG1,           0x222)
+PICA_REG(ATTRIBBUFFER10_CONFIG2,           0x223)
+PICA_REG(ATTRIBBUFFER11_OFFSET,            0x224)
+PICA_REG(ATTRIBBUFFER11_CONFIG1,           0x225)
+PICA_REG(ATTRIBBUFFER11_CONFIG2,           0x226)
+PICA_REG(INDEXBUFFER_CONFIG,               0x227)
+PICA_REG(NUMVERTICES,                      0x228)
+PICA_REG(GEOSTAGE_CONFIG,                  0x229)
+PICA_REG(VERTEX_OFFSET,                    0x22A)
+PICA_REG(POST_VERTEX_CACHE_NUM,            0x22D)
+PICA_REG(DRAWARRAYS,                       0x22E)
+PICA_REG(DRAWELEMENTS,                     0x22F)
+PICA_REG(VTX_FUNC,                         0x231)
+PICA_REG(FIXEDATTRIB_INDEX,                0x232)
+PICA_REG(FIXEDATTRIB_DATA0,                0x233)
+PICA_REG(FIXEDATTRIB_DATA1,                0x234)
+PICA_REG(FIXEDATTRIB_DATA2,                0x235)
+PICA_REG(CMDBUF_SIZE0,                     0x238)
+PICA_REG(CMDBUF_SIZE1,                     0x239)
+PICA_REG(CMDBUF_ADDR0,                     0x23A)
+PICA_REG(CMDBUF_ADDR1,                     0x23B)
+PICA_REG(CMDBUF_JUMP0,                     0x23C)
+PICA_REG(CMDBUF_JUMP1,                     0x23D)
+PICA_REG(VSH_NUM_ATTR,                     0x242)
+PICA_REG(VSH_COM_MODE,                     0x244)
+PICA_REG(START_DRAW_FUNC0,                 0x245)
+PICA_REG(VSH_OUTMAP_TOTAL1,                0x24A)
+PICA_REG(VSH_OUTMAP_TOTAL2,                0x251)
+PICA_REG(GSH_MISC0,                        0x252)
+PICA_REG(GEOSTAGE_CONFIG2,                 0x253)
+PICA_REG(GSH_MISC1,                        0x254)
+PICA_REG(PRIMITIVE_CONFIG,                 0x25E)
+PICA_REG(RESTART_PRIMITIVE,                0x25F)
+PICA_REG(GSH_BOOLUNIFORM,                  0x280)
+PICA_REG(GSH_INTUNIFORM_I0,                0x281)
+PICA_REG(GSH_INTUNIFORM_I1,                0x282)
+PICA_REG(GSH_INTUNIFORM_I2,                0x283)
+PICA_REG(GSH_INTUNIFORM_I3,                0x284)
+PICA_REG(GSH_INPUTBUFFER_CONFIG,           0x289)
+PICA_REG(GSH_ENTRYPOINT,                   0x28A)
+PICA_REG(GSH_ATTRIBUTES_PERMUTATION_LOW,   0x28B)
+PICA_REG(GSH_ATTRIBUTES_PERMUTATION_HIGH,  0x28C)
+PICA_REG(GSH_OUTMAP_MASK,                  0x28D)
+PICA_REG(GSH_CODETRANSFER_END,             0x28F)
+PICA_REG(GSH_FLOATUNIFORM_INDEX,           0x290)
+PICA_REG(GSH_FLOATUNIFORM_DATA0,           0x291)
+PICA_REG(GSH_FLOATUNIFORM_DATA1,           0x292)
+PICA_REG(GSH_FLOATUNIFORM_DATA2,           0x293)
+PICA_REG(GSH_FLOATUNIFORM_DATA3,           0x294)
+PICA_REG(GSH_FLOATUNIFORM_DATA4,           0x295)
+PICA_REG(GSH_FLOATUNIFORM_DATA5,           0x296)
+PICA_REG(GSH_FLOATUNIFORM_DATA6,           0x297)
+PICA_REG(GSH_FLOATUNIFORM_DATA7,           0x298)
+PICA_REG(GSH_CODETRANSFER_INDEX,           0x29B)
+PICA_REG(GSH_CODETRANSFER_DATA0,           0x29C)
+PICA_REG(GSH_CODETRANSFER_DATA1,           0x29D)
+PICA_REG(GSH_CODETRANSFER_DATA2,           0x29E)
+PICA_REG(GSH_CODETRANSFER_DATA3,           0x29F)
+PICA_REG(GSH_CODETRANSFER_DATA4,           0x2A0)
+PICA_REG(GSH_CODETRANSFER_DATA5,           0x2A1)
+PICA_REG(GSH_CODETRANSFER_DATA6,           0x2A2)
+PICA_REG(GSH_CODETRANSFER_DATA7,           0x2A3)
+PICA_REG(GSH_OPDESCS_INDEX,                0x2A5)
+PICA_REG(GSH_OPDESCS_DATA0,                0x2A6)
+PICA_REG(GSH_OPDESCS_DATA1,                0x2A7)
+PICA_REG(GSH_OPDESCS_DATA2,                0x2A8)
+PICA_REG(GSH_OPDESCS_DATA3,                0x2A9)
+PICA_REG(GSH_OPDESCS_DATA4,                0x2AA)
+PICA_REG(GSH_OPDESCS_DATA5,                0x2AB)
+PICA_REG(GSH_OPDESCS_DATA6,                0x2AC)
+PICA_REG(GSH_OPDESCS_DATA7,                0x2AD)
+PICA_REG(VSH_BOOLUNIFORM,                  0x2B0)
+PICA_REG(VSH_INTUNIFORM_I0,                0x2B1)
+PICA_REG(VSH_INTUNIFORM_I1,                0x2B2)
+PICA_REG(VSH_INTUNIFORM_I2,                0x2B3)
+PICA_REG(VSH_INTUNIFORM_I3,                0x2B4)
+PICA_REG(VSH_INPUTBUFFER_CONFIG,           0x2B9)
+PICA_REG(VSH_ENTRYPOINT,                   0x2BA)
+PICA_REG(VSH_ATTRIBUTES_PERMUTATION_LOW,   0x2BB)
+PICA_REG(VSH_ATTRIBUTES_PERMUTATION_HIGH,  0x2BC)
+PICA_REG(VSH_OUTMAP_MASK,                  0x2BD)
+PICA_REG(VSH_CODETRANSFER_END,             0x2BF)
+PICA_REG(VSH_FLOATUNIFORM_INDEX,           0x2C0)
+PICA_REG(VSH_FLOATUNIFORM_DATA0,           0x2C1)
+PICA_REG(VSH_FLOATUNIFORM_DATA1,           0x2C2)
+PICA_REG(VSH_FLOATUNIFORM_DATA2,           0x2C3)
+PICA_REG(VSH_FLOATUNIFORM_DATA3,           0x2C4)
+PICA_REG(VSH_FLOATUNIFORM_DATA4,           0x2C5)
+PICA_REG(VSH_FLOATUNIFORM_DATA5,           0x2C6)
+PICA_REG(VSH_FLOATUNIFORM_DATA6,           0x2C7)
+PICA_REG(VSH_FLOATUNIFORM_DATA7,           0x2C8)
+PICA_REG(VSH_CODETRANSFER_INDEX,           0x2CB)
+PICA_REG(VSH_CODETRANSFER_DATA0,           0x2CC)
+PICA_REG(VSH_CODETRANSFER_DATA1,           0x2CD)
+PICA_REG(VSH_CODETRANSFER_DATA2,           0x2CE)
+PICA_REG(VSH_CODETRANSFER_DATA3,           0x2CF)
+PICA_REG(VSH_CODETRANSFER_DATA4,           0x2D0)
+PICA_REG(VSH_CODETRANSFER_DATA5,           0x2D1)
+PICA_REG(VSH_CODETRANSFER_DATA6,           0x2D2)
+PICA_REG(VSH_CODETRANSFER_DATA7,           0x2D3)
+PICA_REG(VSH_OPDESCS_INDEX,                0x2D5)
+PICA_REG(VSH_OPDESCS_DATA0,                0x2D6)
+PICA_REG(VSH_OPDESCS_DATA1,                0x2D7)
+PICA_REG(VSH_OPDESCS_DATA2,                0x2D8)
+PICA_REG(VSH_OPDESCS_DATA3,                0x2D9)
+PICA_REG(VSH_OPDESCS_DATA4,                0x2DA)
+PICA_REG(VSH_OPDESCS_DATA5,                0x2DB)
+PICA_REG(VSH_OPDESCS_DATA6,                0x2DC)
+PICA_REG(VSH_OPDESCS_DATA7,                0x2DD)
--- a/src/video_core/pica_types.h
+++ b/src/video_core/pica_types.h
@ -1,4 +1,4 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2022 Citra Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@ -22,40 +22,44 @@ namespace Pica {
 *
 * @todo Verify on HW if this conversion is sufficiently accurate.
 */
-template <unsigned M, unsigned E>
+template <u32 M, u32 E>
 struct Float {
+    static constexpr u32 width = M + E + 1;
+    static constexpr u32 bias = 128 - (1 << (E - 1));
+    static constexpr u32 exponent_mask = (1 << E) - 1;
+    static constexpr u32 mantissa_mask = (1 << M) - 1;
+    static constexpr u32 sign_mask = 1 << (E + M);
 public:
-    static Float<M, E> FromFloat32(float val) {
-        Float<M, E> ret;
+    static Float FromFloat32(float val) {
+        Float ret;
        ret.value = val;
        return ret;
    }

-    static Float<M, E> FromRaw(u32 hex) {
-        Float<M, E> res;
+    static Float FromRaw(u32 hex) {
+        Float res;

-        const int width = M + E + 1;
-        const int bias = 128 - (1 << (E - 1));
-        int exponent = (hex >> M) & ((1 << E) - 1);
-        const unsigned mantissa = hex & ((1 << M) - 1);
-        const unsigned sign = (hex >> (E + M)) << 31;
+        u32 exponent = (hex >> M) & exponent_mask;
+        const u32 mantissa = hex & mantissa_mask;
+        const u32 sign = (hex & sign_mask) << (31 - M - E);

-        if (hex & ((1 << (width - 1)) - 1)) {
-            if (exponent == (1 << E) - 1)
+        if (hex & (mantissa_mask | (exponent_mask << M))) {
+            if (exponent == exponent_mask) {
                exponent = 255;
-            else
+            } else {
                exponent += bias;
+            }
+
            hex = sign | (mantissa << (23 - M)) | (exponent << 23);
        } else {
            hex = sign;
        }

        std::memcpy(&res.value, &hex, sizeof(float));
-
        return res;
    }

-    static Float<M, E> Zero() {
+    static Float Zero() {
        return FromFloat32(0.f);
    }

@ -64,80 +68,77 @@ public:
        return value;
    }

-    Float<M, E> operator*(const Float<M, E>& flt) const {
+    Float operator*(const Float& flt) const {
        float result = value * flt.ToFloat32();
        // PICA gives 0 instead of NaN when multiplying by inf
-        if (std::isnan(result))
-            if (!std::isnan(value) && !std::isnan(flt.ToFloat32()))
+        if (std::isnan(result) && !std::isnan(value) && !std::isnan(flt.ToFloat32())) {
            result = 0.f;
-        return Float<M, E>::FromFloat32(result);
        }

-    Float<M, E> operator/(const Float<M, E>& flt) const {
-        return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
+        return Float::FromFloat32(result);
    }

-    Float<M, E> operator+(const Float<M, E>& flt) const {
-        return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
+    Float operator/(const Float& flt) const {
+        return Float::FromFloat32(ToFloat32() / flt.ToFloat32());
    }

-    Float<M, E> operator-(const Float<M, E>& flt) const {
-        return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
+    Float operator+(const Float& flt) const {
+        return Float::FromFloat32(ToFloat32() + flt.ToFloat32());
    }

-    Float<M, E>& operator*=(const Float<M, E>& flt) {
+    Float operator-(const Float& flt) const {
+        return Float::FromFloat32(ToFloat32() - flt.ToFloat32());
+    }
+
+    Float& operator*=(const Float& flt) {
        value = operator*(flt).value;
        return *this;
    }

-    Float<M, E>& operator/=(const Float<M, E>& flt) {
+    Float& operator/=(const Float& flt) {
        value /= flt.ToFloat32();
        return *this;
    }

-    Float<M, E>& operator+=(const Float<M, E>& flt) {
+    Float& operator+=(const Float& flt) {
        value += flt.ToFloat32();
        return *this;
    }

-    Float<M, E>& operator-=(const Float<M, E>& flt) {
+    Float& operator-=(const Float& flt) {
        value -= flt.ToFloat32();
        return *this;
    }

-    Float<M, E> operator-() const {
-        return Float<M, E>::FromFloat32(-ToFloat32());
+    Float operator-() const {
+        return Float::FromFloat32(-ToFloat32());
    }

-    bool operator<(const Float<M, E>& flt) const {
+    bool operator<(const Float& flt) const {
        return ToFloat32() < flt.ToFloat32();
    }

-    bool operator>(const Float<M, E>& flt) const {
+    bool operator>(const Float& flt) const {
        return ToFloat32() > flt.ToFloat32();
    }

-    bool operator>=(const Float<M, E>& flt) const {
+    bool operator>=(const Float& flt) const {
        return ToFloat32() >= flt.ToFloat32();
    }

-    bool operator<=(const Float<M, E>& flt) const {
+    bool operator<=(const Float& flt) const {
        return ToFloat32() <= flt.ToFloat32();
    }

-    bool operator==(const Float<M, E>& flt) const {
+    bool operator==(const Float& flt) const {
        return ToFloat32() == flt.ToFloat32();
    }

-    bool operator!=(const Float<M, E>& flt) const {
+    bool operator!=(const Float& flt) const {
        return ToFloat32() != flt.ToFloat32();
    }

 private:
-    static const unsigned MASK = (1 << (M + E + 1)) - 1;
-    static const unsigned MANTISSA_MASK = (1 << M) - 1;
-    static const unsigned EXPONENT_MASK = (1 << E) - 1;
-
    // Stored as a regular float, merely for convenience
    // TODO: Perform proper arithmetic on this!
    float value;
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@ -13,10 +13,6 @@ namespace OpenGL {
 struct ScreenInfo;
 }

-namespace Vulkan {
-struct ScreenInfo;
-}
-
 namespace Pica::Shader {
 struct OutputVertex;
 } // namespace Pica::Shader
@ -84,13 +80,6 @@ public:
        return false;
    }

-    /// Attempt to use a faster method to display the framebuffer to screen
-    virtual bool AccelerateDisplay(const GPU::Regs::FramebufferConfig& config,
-                                   PAddr framebuffer_addr, u32 pixel_stride,
-                                   Vulkan::ScreenInfo& screen_info) {
-        return false;
-    }
-
    /// Attempt to draw using hardware shaders
    virtual bool AccelerateDrawBatch(bool is_indexed) {
        return false;
--- a/src/video_core/regs_framebuffer.h
+++ b/src/video_core/regs_framebuffer.h
@ -14,14 +14,13 @@

 namespace Pica {

-struct FramebufferRegs {
-    enum class FragmentOperationMode : u32 {
+enum class FragmentOperationMode : u32 {
    Default = 0,
    Gas = 1,
    Shadow = 3,
-    };
+};

-    enum class LogicOp : u32 {
+enum class LogicOp : u32 {
    Clear = 0,
    And = 1,
    AndReverse = 2,
@ -38,17 +37,17 @@ struct FramebufferRegs {
    AndInverted = 13,
    OrReverse = 14,
    OrInverted = 15,
-    };
+};

-    enum class BlendEquation : u32 {
+enum class BlendEquation : u32 {
    Add = 0,
    Subtract = 1,
    ReverseSubtract = 2,
    Min = 3,
    Max = 4,
-    };
+};

-    enum class BlendFactor : u32 {
+enum class BlendFactor : u32 {
    Zero = 0,
    One = 1,
    SourceColor = 2,
@ -64,9 +63,9 @@ struct FramebufferRegs {
    ConstantAlpha = 12,
    OneMinusConstantAlpha = 13,
    SourceAlphaSaturate = 14,
-    };
+};

-    enum class CompareFunc : u32 {
+enum class CompareFunc : u32 {
    Never = 0,
    Always = 1,
    Equal = 2,
@ -75,9 +74,9 @@ struct FramebufferRegs {
    LessThanOrEqual = 5,
    GreaterThan = 6,
    GreaterThanOrEqual = 7,
-    };
+};

-    enum class StencilAction : u32 {
+enum class StencilAction : u32 {
    Keep = 0,
    Zero = 1,
    Replace = 2,
@ -86,8 +85,9 @@ struct FramebufferRegs {
    Invert = 5,
    IncrementWrap = 6,
    DecrementWrap = 7,
-    };
+};

+struct FramebufferRegs {
    struct {
        union {
            BitField<0, 2, FragmentOperationMode> fragment_operation_mode;
--- a/src/video_core/regs_pipeline.h
+++ b/src/video_core/regs_pipeline.h
@ -12,6 +12,13 @@

 namespace Pica {

+enum class TriangleTopology : u32 {
+    List = 0,
+    Strip = 1,
+    Fan = 2,
+    Shader = 3, // Programmable setup unit implemented in a geometry shader
+};
+
 struct PipelineRegs {
    enum class VertexAttributeFormat : u32 {
        BYTE = 0,
@ -250,13 +257,6 @@ struct PipelineRegs {

    INSERT_PADDING_WORDS(0x9);

-    enum class TriangleTopology : u32 {
-        List = 0,
-        Strip = 1,
-        Fan = 2,
-        Shader = 3, // Programmable setup unit implemented in a geometry shader
-    };
-
    BitField<8, 2, TriangleTopology> triangle_topology;

    u32 restart_primitive;
--- a/src/video_core/regs_rasterizer.h
+++ b/src/video_core/regs_rasterizer.h
@ -6,21 +6,20 @@

 #include <array>
 #include "common/bit_field.h"
-#include "common/common_funcs.h"
-#include "common/common_types.h"
+#include "common/vector_math.h"
 #include "video_core/pica_types.h"

 namespace Pica {

-struct RasterizerRegs {
-    enum class CullMode : u32 {
-        // Select which polygons are considered to be "frontfacing".
+// Select which polygons are considered to be "frontfacing".
+enum class CullMode : u32 {
    KeepAll = 0,
    KeepClockWise = 1,
    KeepCounterClockWise = 2,
-        // TODO: What does the third value imply?
-    };
+    KeepAll2 = 3 // Same as KeepAll
+};

+struct RasterizerRegs {
    union {
        BitField<0, 2, CullMode> cull_mode;
    };
--- a/src/video_core/regs_texturing.h
+++ b/src/video_core/regs_texturing.h
@ -13,18 +13,7 @@

 namespace Pica {

-struct TexturingRegs {
-    struct TextureConfig {
-        enum TextureType : u32 {
-            Texture2D = 0,
-            TextureCube = 1,
-            Shadow2D = 2,
-            Projection2D = 3,
-            ShadowCube = 4,
-            Disabled = 5,
-        };
-
-        enum WrapMode : u32 {
+enum WrapMode : u32 {
    ClampToEdge = 0,
    ClampToBorder = 1,
    Repeat = 2,
@ -34,11 +23,22 @@ struct TexturingRegs {
    ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
    Repeat2 = 6,        // Same as Repeat
    Repeat3 = 7,        // Same as Repeat
-        };
+};

-        enum TextureFilter : u32 {
+enum TextureFilter : u32 {
    Nearest = 0,
    Linear = 1,
+};
+
+struct TexturingRegs {
+    struct TextureConfig {
+        enum TextureType : u32 {
+            Texture2D = 0,
+            TextureCube = 1,
+            Shadow2D = 2,
+            Projection2D = 3,
+            ShadowCube = 4,
+            Disabled = 5,
        };

        union {
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@ -5,8 +5,8 @@
 #include <memory>
 #include "core/frontend/emu_window.h"
 #include "video_core/renderer_base.h"
-#include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/swrasterizer/swrasterizer.h"
 #include "video_core/video_core.h"

--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -414,7 +414,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
    MICROPROFILE_SCOPE(OpenGL_GS);
    const auto& regs = Pica::g_state.regs;

-    if (regs.pipeline.use_gs != Pica::PipelineRegs::UseGS::No) {
+    if (regs.pipeline.use_gs != Pica::UseGS::No) {
        LOG_ERROR(Render_OpenGL, "Accelerate draw doesn't support geometry shader");
        return false;
    }
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@ -9,18 +9,17 @@
 #include <tuple>
 #include <utility>
 #include <fmt/format.h>
-#include <nihstro/shader_bytecode.h>
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "video_core/shader_compiler/frontend/opcode.h"
+#include "video_core/shader_compiler/frontned/instruction.h"
+#include "video_core/shader_compiler/frontend/register.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"

-namespace OpenGL::ShaderDecompiler {
+using Pica::Shader::OpCode;
+using Pica::Shader::DestRegister;

-using nihstro::Instruction;
-using nihstro::OpCode;
-using nihstro::RegisterType;
-using nihstro::SourceRegister;
-using nihstro::SwizzlePattern;
+namespace OpenGL::ShaderDecompiler {

 constexpr u32 PROGRAM_END = Pica::Shader::MAX_PROGRAM_CODE_LENGTH;

--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@ -9,7 +9,7 @@
 #include <optional>
 #include <string>
 #include "common/common_types.h"
-#include "video_core/shader/shader.h"
+#include "video_core/shader_compiler/shader.h"

 namespace OpenGL::ShaderDecompiler {

--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@ -175,11 +175,11 @@ public:

    void Create(const char* source, GLenum type) {
        if (shader_or_program.which() == 0) {
-            boost::get<OGLShader>(shader_or_program).Create(source, type);
+            std::get<OGLShader>(shader_or_program).Create(source, type);
        } else {
            OGLShader shader;
            shader.Create(source, type);
-            OGLProgram& program = boost::get<OGLProgram>(shader_or_program);
+            OGLProgram& program = std::get<OGLProgram>(shader_or_program);
            program.Create(true, {shader.handle});
            SetShaderUniformBlockBindings(program.handle);

@ -191,9 +191,9 @@ public:

    GLuint GetHandle() const {
        if (shader_or_program.which() == 0) {
-            return boost::get<OGLShader>(shader_or_program).handle;
+            return std::get<OGLShader>(shader_or_program).handle;
        } else {
-            return boost::get<OGLProgram>(shader_or_program).handle;
+            return std::get<OGLProgram>(shader_or_program).handle;
        }
    }

@ -204,7 +204,7 @@ public:
    }

 private:
-    boost::variant<OGLShader, OGLProgram> shader_or_program;
+    std::variant<OGLShader, OGLProgram> shader_or_program;
 };

 class TrivialVertexShader {
--- a/src/video_core/renderer_vulkan/pica_to_vulkan.h
+++ b/src/video_core/renderer_vulkan/pica_to_vulkan.h
@ -1,4 +1,4 @@
-// Copyright 2015 Citra Emulator Project
+// Copyright 2022 Citra Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

@ -8,28 +8,32 @@
 #include <glm/glm.hpp>
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "video_core/regs_framebuffer.h"
-#include "video_core/regs_lighting.h"
-#include "video_core/regs_texturing.h"
+#include "video_core/regs.h"
 #include "video_core/renderer_vulkan/vk_common.h"

 namespace PicaToVK {

-using TextureFilter = Pica::TexturingRegs::TextureConfig::TextureFilter;
-
 struct FilterInfo {
    vk::Filter mag_filter, min_filter;
    vk::SamplerMipmapMode mip_mode;
 };

-inline FilterInfo TextureFilterMode(TextureFilter mag, TextureFilter min, TextureFilter mip) {
-    std::array<vk::Filter, 2> filter_table = { vk::Filter::eNearest, vk::Filter::eLinear };
-    std::array<vk::SamplerMipmapMode, 2> mipmap_table = { vk::SamplerMipmapMode::eNearest, vk::SamplerMipmapMode::eLinear };
+inline FilterInfo TextureFilterMode(Pica::TextureFilter mag, Pica::TextureFilter min,
+                                    Pica::TextureFilter mip) {
+    constexpr std::array filter_table = {
+        vk::Filter::eNearest,
+        vk::Filter::eLinear
+    };

-    return FilterInfo{filter_table[mag], filter_table[min], mipmap_table[mip]};
+    constexpr std::array mipmap_table = {
+        vk::SamplerMipmapMode::eNearest,
+        vk::SamplerMipmapMode::eLinear
+    };
+
+    return FilterInfo{filter_table.at(mag), filter_table.at(min), mipmap_table.at(mip)};
 }

-inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
+inline vk::SamplerAddressMode WrapMode(Pica::WrapMode mode) {
    static constexpr std::array<vk::SamplerAddressMode, 8> wrap_mode_table{{
        vk::SamplerAddressMode::eClampToEdge,
        vk::SamplerAddressMode::eClampToBorder,
@ -63,7 +67,7 @@ inline vk::SamplerAddressMode WrapMode(Pica::TexturingRegs::TextureConfig::WrapM
    return wrap_mode_table[index];
 }

-inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation) {
+inline vk::BlendOp BlendEquation(Pica::BlendEquation equation) {
    static constexpr std::array<vk::BlendOp, 5> blend_equation_table{{
        vk::BlendOp::eAdd,
        vk::BlendOp::eSubtract,
@ -85,7 +89,7 @@ inline vk::BlendOp BlendEquation(Pica::FramebufferRegs::BlendEquation equation)
    return blend_equation_table[index];
 }

-inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
+inline vk::BlendFactor BlendFunc(Pica::BlendFactor factor) {
    static constexpr std::array<vk::BlendFactor, 15> blend_func_table{{
        vk::BlendFactor::eZero,                 // BlendFactor::Zero
        vk::BlendFactor::eOne,                  // BlendFactor::One
@ -117,7 +121,7 @@ inline vk::BlendFactor BlendFunc(Pica::FramebufferRegs::BlendFactor factor) {
    return blend_func_table[index];
 }

-inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
+inline vk::LogicOp LogicOp(Pica::LogicOp op) {
    static constexpr std::array<vk::LogicOp, 16> logic_op_table{{
        vk::LogicOp::eClear,        // Clear
        vk::LogicOp::eAnd,          // And
@ -150,7 +154,7 @@ inline vk::LogicOp LogicOp(Pica::FramebufferRegs::LogicOp op) {
    return logic_op_table[index];
 }

-inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
+inline vk::CompareOp CompareFunc(Pica::CompareFunc func) {
    static constexpr std::array<vk::CompareOp, 8> compare_func_table{{
        vk::CompareOp::eNever,          // CompareFunc::Never
        vk::CompareOp::eAlways,         // CompareFunc::Always
@ -175,7 +179,7 @@ inline vk::CompareOp CompareFunc(Pica::FramebufferRegs::CompareFunc func) {
    return compare_func_table[index];
 }

-inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
+inline vk::StencilOp StencilOp(Pica::StencilAction action) {
    static constexpr std::array<vk::StencilOp, 8> stencil_op_table{{
        vk::StencilOp::eKeep,               // StencilAction::Keep
        vk::StencilOp::eZero,               // StencilAction::Zero
@ -200,6 +204,30 @@ inline vk::StencilOp StencilOp(Pica::FramebufferRegs::StencilAction action) {
    return stencil_op_table[index];
 }

+inline vk::PrimitiveTopology PrimitiveTopology(Pica::TriangleTopology topology) {
+    switch (topology) {
+    case Pica::TriangleTopology::Fan:
+        return vk::PrimitiveTopology::eTriangleFan;
+    case Pica::TriangleTopology::List:
+    case Pica::TriangleTopology::Shader:
+        return vk::PrimitiveTopology::eTriangleList;
+    case Pica::TriangleTopology::Strip:
+        return vk::PrimitiveTopology::eTriangleStrip;
+    }
+}
+
+inline vk::CullModeFlags CullMode(Pica::CullMode mode) {
+    switch (mode) {
+    case Pica::CullMode::KeepAll:
+    case Pica::CullMode::KeepAll2:
+        return vk::CullModeFlagBits::eNone;
+    case Pica::CullMode::KeepClockWise:
+        return vk::CullModeFlagBits::eBack;
+    case Pica::CullMode::KeepCounterClockWise:
+        return vk::CullModeFlagBits::eFront;
+    }
+}
+
 inline glm::vec4 ColorRGBA8(const u32 color) {
    return glm::vec4{
        (color >> 0 & 0xFF) / 255.0f,
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@ -2,22 +2,6 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-// Enable vulkan platforms
-#if defined(ANDROID) || defined (__ANDROID__)
-  #define VK_USE_PLATFORM_ANDROID_KHR 1
-#elif defined(_WIN32)
-  #define VK_USE_PLATFORM_WIN32_KHR 1
-#elif defined(__APPLE__)
-  #define VK_USE_PLATFORM_MACOS_MVK 1
-  #define VK_USE_PLATFORM_METAL_EXT 1
-#else
-  #ifdef WAYLAND_DISPLAY
-    #define VK_USE_PLATFORM_WAYLAND_KHR 1
-  #else // wayland
-    #define VK_USE_PLATFORM_XLIB_KHR 1
-  #endif
-#endif
-
 #include <glm/gtc/matrix_transform.hpp>
 #include "common/assert.h"
 #include "common/logging/log.h"
@ -56,83 +40,6 @@

 namespace Vulkan {

-vk::SurfaceKHR CreateSurface(const vk::Instance& instance,
-                             const Frontend::EmuWindow& emu_window) {
-    const auto& window_info = emu_window.GetWindowInfo();
-    vk::SurfaceKHR surface;
-
-#if VK_USE_PLATFORM_WIN32_KHR
-    if (window_info.type == Frontend::WindowSystemType::Windows) {
-        const HWND hWnd = static_cast<HWND>(window_info.render_surface);
-        const vk::Win32SurfaceCreateInfoKHR win32_ci{{}, nullptr, hWnd};
-        if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
-            LOG_ERROR(Render_Vulkan, "Failed to initialize Win32 surface");
-            UNREACHABLE();
-        }
-    }
-#elif VK_USE_PLATFORM_XLIB_KHR
-    if (window_info.type == Frontend::WindowSystemType::X11) {
-        const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
-            static_cast<Display*>(window_info.display_connection),
-            reinterpret_cast<Window>(window_info.render_surface)};
-        if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
-            LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
-            UNREACHABLE();
-        }
-    }
-
-#elif VK_USE_PLATFORM_WAYLAND_KHR
-    if (window_info.type == Frontend::WindowSystemType::Wayland) {
-        const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
-            static_cast<wl_display*>(window_info.display_connection),
-            static_cast<wl_surface*>(window_info.render_surface)};
-        if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
-            LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
-            UNREACHABLE();
-        }
-    }
-#endif
-    if (!surface) {
-        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
-        UNREACHABLE();
-    }
-
-    return surface;
-}
-
-std::vector<const char*> RequiredExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
-    std::vector<const char*> extensions;
-    extensions.reserve(6);
-    switch (window_type) {
-    case Frontend::WindowSystemType::Headless:
-        break;
-#ifdef _WIN32
-    case Frontend::WindowSystemType::Windows:
-        extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
-        break;
-#endif
-#if !defined(_WIN32) && !defined(__APPLE__)
-    case Frontend::WindowSystemType::X11:
-        extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
-        break;
-    case Frontend::WindowSystemType::Wayland:
-        extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
-        break;
-#endif
-    default:
-        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
-        break;
-    }
-    if (window_type != Frontend::WindowSystemType::Headless) {
-        extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
-    }
-    if (enable_debug_utils) {
-        extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
-    }
-    extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
-    return extensions;
-}
-
 RendererVulkan::RendererVulkan(Frontend::EmuWindow& window)
    : RendererBase{window} {

--- a/src/video_core/renderer_vulkan/vk_backend.cpp
+++ b/src/video_core/renderer_vulkan/vk_backend.cpp
@ -0,0 +1,178 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#define VULKAN_HPP_NO_CONSTRUCTORS
+#include "core/core.h"
+#include "common/object_pool.h"
+#include "video_core/renderer_vulkan/vk_backend.h"
+#include "video_core/renderer_vulkan/vk_buffer.h"
+#include "video_core/renderer_vulkan/vk_texture.h"
+
+namespace VideoCore::Vulkan {
+
+Backend::Backend(Frontend::EmuWindow& window) : BackendBase(window),
+    instance(window), swapchain(instance, instance.GetSurface()),
+    scheduler(instance) {
+
+    // TODO: Properly report GPU hardware
+    auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
+    constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
+    telemetry_session.AddField(user_system, "GPU_Vendor", "NVIDIA");
+    telemetry_session.AddField(user_system, "GPU_Model", "GTX 1650");
+    telemetry_session.AddField(user_system, "GPU_Vulkan_Version", "Vulkan 1.3");
+
+    // Pre-create all needed renderpasses by the renderer
+    constexpr std::array color_formats = {
+        vk::Format::eR8G8B8A8Unorm,
+        vk::Format::eR8G8B8Unorm,
+        vk::Format::eR5G5B5A1UnormPack16,
+        vk::Format::eR5G6B5UnormPack16,
+        vk::Format::eR4G4B4A4UnormPack16
+    };
+
+    constexpr std::array depth_stencil_formats = {
+        vk::Format::eD16Unorm,
+        vk::Format::eX8D24UnormPack32,
+        vk::Format::eD24UnormS8Uint,
+    };
+
+    // Create all required renderpasses
+    for (u32 color = 0; color < MAX_COLOR_FORMATS; color++) {
+        for (u32 depth = 0; depth < MAX_DEPTH_FORMATS; depth++) {
+            u32 index = color * MAX_COLOR_FORMATS + depth;
+            renderpass_cache[index] = CreateRenderPass(color_formats[color], depth_stencil_formats[depth]);
+        }
+    }
+}
+
+Backend::~Backend() {
+    vk::Device device = instance.GetDevice();
+    for (auto& renderpass : renderpass_cache) {
+        device.destroyRenderPass(renderpass);
+    }
+}
+
+/**
+ * To avoid many small heap allocations during handle creation, each resource has a dedicated pool
+ * associated with it that batch allocates memory.
+ */
+BufferHandle Backend::CreateBuffer(BufferInfo info) {
+    static ObjectPool<Buffer> buffer_pool;
+    return IntrusivePtr<Buffer>{buffer_pool.Allocate(info)};
+}
+
+FramebufferHandle Backend::CreateFramebuffer(FramebufferInfo info) {
+}
+
+TextureHandle Backend::CreateTexture(TextureInfo info) {
+    static ObjectPool<Texture> texture_pool;
+    return IntrusivePtr<Texture>{texture_pool.Allocate(info)};
+}
+
+PipelineHandle Backend::CreatePipeline(PipelineType type, PipelineInfo info) {
+    static ObjectPool<Pipeline> pipeline_pool;
+
+    // Find a pipeline layout first
+    if (auto iter = pipeline_layouts.find(info.layout); iter != pipeline_layouts.end()) {
+        PipelineLayout& layout = iter->second;
+
+        return IntrusivePtr<Pipeline>{pipeline_pool.Allocate(instance, layout, type, info, cache)};
+    }
+
+    // Create the layout
+    auto result = pipeline_layouts.emplace(info.layout, PipelineLayout{instance, info.layout});
+    return IntrusivePtr<Pipeline>{pipeline_pool.Allocate(instance, result.first->second, type, info, cache)};
+}
+
+SamplerHandle Backend::CreateSampler(SamplerInfo info) {
+    static ObjectPool<Sampler> sampler_pool;
+    return IntrusivePtr<Sampler>{sampler_pool.Allocate(info)};
+}
+
+void Backend::Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
+                   BufferHandle vertex_buffer,
+                   u32 base_vertex, u32 num_vertices) {
+    vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
+
+    Buffer* vertex = static_cast<Buffer*>(vertex_buffer.Get());
+    command_buffer.bindVertexBuffers(0, vertex->GetHandle(), {0});
+
+    // Submit draw
+    command_buffer.draw(num_vertices, 1, base_vertex, 0);
+}
+
+void Backend::DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
+                          BufferHandle vertex_buffer, BufferHandle index_buffer,
+                          u32 base_index, u32 num_indices, u32 base_vertex) {
+
+}
+
+
+vk::RenderPass Backend::CreateRenderPass(vk::Format color, vk::Format depth) const {
+    // Define attachments
+    const std::array attachments = {
+        vk::AttachmentDescription{
+            .format = color,
+            .stencilLoadOp = vk::AttachmentLoadOp::eDontCare,
+            .stencilStoreOp = vk::AttachmentStoreOp::eDontCare,
+            .initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
+            .finalLayout = vk::ImageLayout::eColorAttachmentOptimal
+        },
+        vk::AttachmentDescription{
+            .format = depth,
+            .initialLayout = vk::ImageLayout::eShaderReadOnlyOptimal,
+            .finalLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal
+        }
+    };
+
+    // Our renderpasses only defines one color and depth attachment
+    const vk::AttachmentReference color_attachment_ref = {
+        .attachment = 0,
+        .layout = vk::ImageLayout::eColorAttachmentOptimal
+    };
+
+    const vk::AttachmentReference depth_attachment_ref = {
+        .attachment = 1,
+        .layout = vk::ImageLayout::eDepthStencilAttachmentOptimal
+    };
+
+    const vk::SubpassDependency subpass_dependency = {
+        .srcSubpass = VK_SUBPASS_EXTERNAL,
+        .dstSubpass = 0,
+        .srcStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput |
+                        vk::PipelineStageFlagBits::eEarlyFragmentTests,
+        .dstStageMask = vk::PipelineStageFlagBits::eColorAttachmentOutput |
+                        vk::PipelineStageFlagBits::eEarlyFragmentTests,
+        .srcAccessMask = vk::AccessFlagBits::eNone,
+        .dstAccessMask = vk::AccessFlagBits::eColorAttachmentWrite |
+                         vk::AccessFlagBits::eDepthStencilAttachmentWrite,
+        .dependencyFlags = vk::DependencyFlagBits::eByRegion
+    };
+
+    // We also require only one subpass
+    const vk::SubpassDescription subpass = {
+        .pipelineBindPoint = vk::PipelineBindPoint::eGraphics,
+        .inputAttachmentCount = 0,
+        .pInputAttachments = nullptr,
+        .colorAttachmentCount = 1,
+        .pColorAttachments = &color_attachment_ref,
+        .pResolveAttachments = 0,
+        .pDepthStencilAttachment = &depth_attachment_ref
+    };
+
+    const vk::RenderPassCreateInfo renderpass_info = {
+        .attachmentCount = 2,
+        .pAttachments = attachments.data(),
+        .subpassCount = 1,
+        .pSubpasses = &subpass,
+        .dependencyCount = 1,
+        .pDependencies = &subpass_dependency
+    };
+
+    // Create the renderpass
+    vk::Device device = instance.GetDevice();
+    return device.createRenderPass(renderpass_info);
+}
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_backend.h
+++ b/src/video_core/renderer_vulkan/vk_backend.h
@ -0,0 +1,75 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+#include "video_core/common/backend.h"
+#include "video_core/renderer_vulkan/vk_task_scheduler.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+#include "video_core/renderer_vulkan/vk_pipeline.h"
+
+namespace VideoCore::Vulkan {
+
+class Texture;
+
+constexpr u32 RENDERPASS_COUNT = MAX_COLOR_FORMATS * MAX_DEPTH_FORMATS;
+
+class Backend : public VideoCore::BackendBase {
+public:
+    Backend(Frontend::EmuWindow& window);
+    ~Backend();
+
+    void SwapBuffers() override;
+
+    BufferHandle CreateBuffer(BufferInfo info) override;
+
+    FramebufferHandle CreateFramebuffer(FramebufferInfo info) override;
+
+    TextureHandle CreateTexture(TextureInfo info) override;
+
+    PipelineHandle CreatePipeline(PipelineType type, PipelineInfo info) override;
+
+    SamplerHandle CreateSampler(SamplerInfo info) override;
+
+    void Draw(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
+              BufferHandle vertex_buffer,
+              u32 base_vertex, u32 num_vertices) override;
+
+    void DrawIndexed(PipelineHandle pipeline, FramebufferHandle draw_framebuffer,
+                     BufferHandle vertex_buffer, BufferHandle index_buffer,
+                     u32 base_index, u32 num_indices, u32 base_vertex) override;
+
+    void DispatchCompute(PipelineHandle pipeline, Common::Vec3<u32> groupsize,
+                         Common::Vec3<u32> groups) override;
+
+    // Returns the vulkan instance
+    inline const Instance& GetInstance() const {
+        return instance;
+    }
+
+    // Returns the vulkan command buffer scheduler
+    inline CommandScheduler& GetScheduler() {
+        return scheduler;
+    }
+
+private:
+    vk::RenderPass CreateRenderPass(vk::Format color, vk::Format depth) const;
+
+private:
+    Instance instance;
+    Swapchain swapchain;
+    CommandScheduler scheduler;
+
+    // The formats Citra uses are limited so we can pre-create
+    // all the renderpasses we will need
+    std::array<vk::RenderPass, RENDERPASS_COUNT> renderpass_cache;
+    vk::PipelineCache cache;
+
+    // Pipeline layout cache
+    std::unordered_map<PipelineLayoutInfo, PipelineLayout> pipeline_layouts;
+};
+
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer.cpp
@ -2,165 +2,181 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#define VULKAN_HPP_NO_CONSTRUCTORS
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "video_core/renderer_vulkan/vk_buffer.h"
 #include "video_core/renderer_vulkan/vk_task_scheduler.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
-#include <cstring>

-namespace Vulkan {
+namespace VideoCore::Vulkan {
+
+inline vk::BufferUsageFlags ToVkBufferUsage(BufferUsage usage) {
+    constexpr std::array vk_buffer_usages = {
+        vk::BufferUsageFlagBits::eVertexBuffer,
+        vk::BufferUsageFlagBits::eIndexBuffer,
+        vk::BufferUsageFlagBits::eUniformBuffer,
+        vk::BufferUsageFlagBits::eUniformTexelBuffer,
+        vk::BufferUsageFlagBits::eTransferSrc
+    };
+
+    return vk::BufferUsageFlagBits::eTransferDst |
+            vk_buffer_usages.at(static_cast<u32>(usage));
+}
+
+inline vk::Format ToVkViewFormat(ViewFormat format) {
+    constexpr std::array vk_view_formats = {
+        vk::Format::eR32Sfloat,
+        vk::Format::eR32G32Sfloat,
+        vk::Format::eR32G32B32Sfloat,
+        vk::Format::eR32G32B32A32Sfloat
+    };
+
+    return vk_view_formats.at(static_cast<u32>(format));
+}
+
+Buffer::Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info) :
+        BufferBase(info), instance(instance), scheduler(scheduler) {
+
+    vk::BufferCreateInfo buffer_info = {
+        .size = info.capacity,
+        .usage = ToVkBufferUsage(info.usage)
+    };
+
+    VmaAllocationCreateInfo alloc_create_info = {
+        .flags = info.usage == BufferUsage::Staging ?
+                (VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+                VMA_ALLOCATION_CREATE_MAPPED_BIT) :
+                VmaAllocationCreateFlags{},
+        .usage = VMA_MEMORY_USAGE_AUTO
+    };
+
+    VkBuffer unsafe_buffer = VK_NULL_HANDLE;
+    VkBufferCreateInfo unsafe_buffer_info = static_cast<VkBufferCreateInfo>(buffer_info);
+    VmaAllocationInfo alloc_info;
+    VmaAllocator allocator = instance.GetAllocator();
+
+    // Allocate texture memory
+    vmaCreateBuffer(allocator, &unsafe_buffer_info, &alloc_create_info,
+                    &unsafe_buffer, &allocation, &alloc_info);
+    buffer = vk::Buffer{unsafe_buffer};
+
+    u32 view = 0;
+    vk::Device device = instance.GetDevice();
+    while (info.views[view] != ViewFormat::Undefined) {
+        const vk::BufferViewCreateInfo view_info = {
+            .buffer = buffer,
+            .format = ToVkViewFormat(info.views[view]),
+            .range = info.capacity
+        };
+
+        views[view++] = device.createBufferView(view_info);
+    }
+
+    // Map memory
+    if (info.usage == BufferUsage::Staging) {
+        mapped_ptr = alloc_info.pMappedData;
+    }
+}

 Buffer::~Buffer() {
-    Destroy();
-}
-
-void Buffer::Create(const Buffer::Info& info) {
-    auto device = g_vk_instace->GetDevice();
-    buffer_info = info;
-
-    vk::BufferCreateInfo bufferInfo({}, info.size, info.usage);
-    buffer = device.createBuffer(bufferInfo);
-
-    auto mem_requirements = device.getBufferMemoryRequirements(buffer);
-
-    auto memory_type_index = FindMemoryType(mem_requirements.memoryTypeBits, info.properties);
-    vk::MemoryAllocateInfo alloc_info(mem_requirements.size, memory_type_index);
-
-    memory = device.allocateMemory(alloc_info);
-    device.bindBufferMemory(buffer, memory, 0);
-
-    // Optionally map the buffer to CPU memory
-    if (info.properties & vk::MemoryPropertyFlagBits::eHostVisible) {
-        host_ptr = device.mapMemory(memory, 0, info.size);
-    }
-
-    for (auto& format : info.view_formats) {
-        if (format != vk::Format::eUndefined) {
-            views[view_count++] = device.createBufferView({{}, buffer, format, 0, info.size});
-        }
-    }
-}
-
-void Buffer::Recreate() {
-    Destroy();
-    Create(buffer_info);
-}
-
-void Buffer::Destroy() {
    if (buffer) {
-        if (host_ptr != nullptr) {
-            g_vk_instace->GetDevice().unmapMemory(memory);
-        }
+        auto deleter = [allocation = allocation,
+                        buffer = buffer,
+                        views = views](vk::Device device, VmaAllocator allocator) {
+            vmaDestroyBuffer(allocator, static_cast<VkBuffer>(buffer), allocation);

-        auto deleter = [buffer = buffer,
-                        memory = memory,
-                        view_count = view_count,
-                        views = views]() {
-            auto device = g_vk_instace->GetDevice();
-            device.destroyBuffer(buffer);
-            device.freeMemory(memory);
-
-            for (u32 i = 0; i < view_count; i++) {
-                device.destroyBufferView(views[i]);
+            u32 view_index = 0;
+            while (views[view_index]) {
+                device.destroyBufferView(views[view_index++]);
            }
        };

-        g_vk_task_scheduler->Schedule(deleter);
+        // Delete the buffer immediately if it's allocated in host memory
+        if (info.usage == BufferUsage::Staging) {
+            vk::Device device = instance.GetDevice();
+            VmaAllocator allocator = instance.GetAllocator();
+            deleter(device, allocator);
+        } else {
+            scheduler.Schedule(deleter);
+        }
    }
 }

-u32 Buffer::FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties) {
-    vk::PhysicalDeviceMemoryProperties mem_properties = g_vk_instace->GetPhysicalDevice().getMemoryProperties();
-
-    for (uint32_t i = 0; i < mem_properties.memoryTypeCount; i++)
-    {
-        auto flags = mem_properties.memoryTypes[i].propertyFlags;
-        if ((type_filter & (1 << i)) && (flags & properties) == properties)
-            return i;
-    }
-
-    LOG_CRITICAL(Render_Vulkan, "Failed to find suitable memory type.");
-    UNREACHABLE();
-}
-
-void Buffer::Upload(std::span<const std::byte> data, u32 offset,
-                      vk::AccessFlags access_to_block,
-                      vk::PipelineStageFlags stage_to_block) {
-    auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer();
-    // For small data uploads use vkCmdUpdateBuffer
-    if (data.size_bytes() < 1024) {
-        cmdbuffer.updateBuffer(buffer, 0, data.size_bytes(), data.data());
-    }
-    else {
-        auto [ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(data.size());
-        if (!ptr) {
-            LOG_ERROR(Render_Vulkan, "Cannot upload data without staging buffer!");
-        }
-
-        // Copy pixels to staging buffer
-        std::memcpy(ptr, data.data(), data.size_bytes());
-
-        auto region = vk::BufferCopy{staging_offset, offset, data.size_bytes()};
-        auto& staging = g_vk_task_scheduler->GetStaging();
-        cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, region);
-    }
-
-    vk::BufferMemoryBarrier barrier{
-        vk::AccessFlagBits::eTransferWrite, access_to_block,
-        VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
-        buffer, offset, data.size_bytes()
-    };
-
-    // Add a pipeline barrier for the region modified
-    cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block,
-                              vk::DependencyFlagBits::eByRegion,
-                              0, nullptr, 1, &barrier, 0, nullptr);
-}
-
-std::tuple<u8*, u32, bool> StreamBuffer::Map(u32 size, u32 alignment) {
-    ASSERT(size <= buffer_info.size);
-    ASSERT(alignment <= buffer_info.size);
+std::span<u8> Buffer::Map(u32 size, u32 alignment) {
+    ASSERT(size <= info.capacity && alignment <= info.capacity);

    if (alignment > 0) {
-        buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
+        buffer_offset = Common::AlignUp<std::size_t>(buffer_offset, alignment);
    }

-    bool invalidate = false;
-    if (buffer_pos + size > buffer_info.size) {
-        buffer_pos = 0;
-        invalidate = true;
+    // If the buffer is full, invalidate it
+    if (buffer_offset + size > info.capacity) {
+        Invalidate();
    }

-    auto [staging_ptr, staging_offset] = g_vk_task_scheduler->RequestStaging(size);
-    mapped_chunk = vk::BufferCopy{staging_offset, buffer_pos, size};
-
-    return std::make_tuple(staging_ptr, buffer_pos, invalidate);
+    if (info.usage == BufferUsage::Staging) {
+        return std::span<u8>{reinterpret_cast<u8*>(mapped_ptr) + buffer_offset, size};
+    } else {
+        Buffer& staging = scheduler.GetCommandUploadBuffer();
+        return staging.Map(size, alignment);
+    }
 }

-void StreamBuffer::Commit(u32 size, vk::AccessFlags access_to_block,
-                          vk::PipelineStageFlags stage_to_block) {
-    if (size > 0) {
-        mapped_chunk.size = size;
+void Buffer::Commit(u32 size) {
+    VmaAllocator allocator = instance.GetAllocator();
+    if (info.usage == BufferUsage::Staging && size > 0) {
+        vmaFlushAllocation(allocator, allocation, buffer_offset, size);
+    } else {
+        vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
+        Buffer& staging = scheduler.GetCommandUploadBuffer();

-        auto cmdbuffer = g_vk_task_scheduler->GetUploadCommandBuffer();
-        auto& staging = g_vk_task_scheduler->GetStaging();
-        cmdbuffer.copyBuffer(staging.GetBuffer(), buffer, mapped_chunk);
+        const vk::BufferCopy copy_region = {
+            .srcOffset = staging.GetCurrentOffset(),
+            .dstOffset = buffer_offset,
+            .size = size
+        };

-        vk::BufferMemoryBarrier barrier{
-            vk::AccessFlagBits::eTransferWrite, access_to_block,
-            VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
-            buffer, mapped_chunk.dstOffset, mapped_chunk.size
+        // Copy staging buffer to device local buffer
+        command_buffer.copyBuffer(staging.GetHandle(), buffer, copy_region);
+
+        vk::AccessFlags access_mask;
+        vk::PipelineStageFlags stage_mask;
+        switch (info.usage) {
+        case BufferUsage::Vertex:
+            access_mask = vk::AccessFlagBits::eVertexAttributeRead;
+            stage_mask = vk::PipelineStageFlagBits::eVertexInput;
+            break;
+        case BufferUsage::Index:
+            access_mask = vk::AccessFlagBits::eIndexRead;
+            stage_mask = vk::PipelineStageFlagBits::eVertexInput;
+            break;
+        case BufferUsage::Uniform:
+        case BufferUsage::Texel:
+            access_mask = vk::AccessFlagBits::eUniformRead;
+            stage_mask = vk::PipelineStageFlagBits::eVertexShader |
+                    vk::PipelineStageFlagBits::eFragmentShader;
+            break;
+        default:
+            LOG_CRITICAL(Render_Vulkan, "Unknown BufferUsage flag!");
+        }
+
+        const vk::BufferMemoryBarrier buffer_barrier = {
+            .srcAccessMask = vk::AccessFlagBits::eTransferWrite,
+            .dstAccessMask = access_mask,
+            .buffer = buffer,
+            .offset = buffer_offset,
+            .size = size
        };

        // Add a pipeline barrier for the region modified
-        cmdbuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_to_block,
-                                vk::DependencyFlagBits::eByRegion,
-                                0, nullptr, 1, &barrier, 0, nullptr);
+        command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, stage_mask,
+                                       vk::DependencyFlagBits::eByRegion, {}, buffer_barrier, {});

-        buffer_pos += size;
    }
+
+    buffer_offset += size;
 }

 }
--- a/src/video_core/renderer_vulkan/vk_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_buffer.h
@ -4,80 +4,47 @@

 #pragma once

-#include <memory>
-#include <vector>
-#include <deque>
-#include <span>
-#include "common/common_types.h"
+#include <array>
+#include "common/assert.h"
+#include "video_core/common/buffer.h"
 #include "video_core/renderer_vulkan/vk_common.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-constexpr u32 MAX_BUFFER_VIEWS = 5;
-constexpr u32 MAX_COMMIT_CHUNKS = 6;
+class Instance;
+class CommandScheduler;

-/// Generic Vulkan buffer object used by almost every resource
-class Buffer : public NonCopyable {
+class Buffer : public VideoCore::BufferBase {
 public:
-    struct Info {
-        u32 size;
-        vk::MemoryPropertyFlags properties;
-        vk::BufferUsageFlags usage;
-        std::array<vk::Format, MAX_BUFFER_VIEWS> view_formats{};
-    };
+    Buffer(Instance& instance, CommandScheduler& scheduler, const BufferInfo& info);
+    ~Buffer() override;

-    Buffer() = default;
-    ~Buffer();
+    std::span<u8> Map(u32 size, u32 alignment = 0) override;

-    /// Enable move operations
-    Buffer(Buffer&&) = default;
-    Buffer& operator=(Buffer&&) = default;
+    /// Flushes write to buffer memory
+    void Commit(u32 size = 0) override;

-    /// Create a new Vulkan buffer object
-    void Create(const Info& info);
-    void Recreate();
-    void Destroy();
+    /// Returns the Vulkan buffer handle
+    vk::Buffer GetHandle() const {
+        return buffer;
+    }

-    /// Global utility functions used by other objects
-    static u32 FindMemoryType(u32 type_filter, vk::MemoryPropertyFlags properties);
-
-    /// Return a pointer to the mapped memory if the buffer is host mapped
-    u8* GetHostPointer() const { return reinterpret_cast<u8*>(host_ptr); }
-    const vk::BufferView& GetView(u32 i = 0) const { return views[i]; }
-    const vk::Buffer& GetBuffer() const { return buffer; }
-    u32 GetSize() const { return buffer_info.size; }
-
-    void Upload(std::span<const std::byte> data, u32 offset,
-                vk::AccessFlags access_to_block = vk::AccessFlagBits::eVertexAttributeRead,
-                vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexInput);
+    /// Returns an immutable reference to the requested buffer view
+    const vk::BufferView& GetView(u32 index = 0) const {
+        ASSERT(index < view_count);
+        return views[index];
+    }

 protected:
-    Info buffer_info;
-    vk::Buffer buffer;
-    vk::DeviceMemory memory;
-    void* host_ptr = nullptr;
-    std::array<vk::BufferView, MAX_BUFFER_VIEWS> views;
-    u32 view_count{};
-};
+    Instance& instance;
+    CommandScheduler& scheduler;

-class StreamBuffer : public Buffer {
-public:
-    /*
-     * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
-     * and the optional alignment requirement.
-     * If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
-     * The return values are the pointer to the new chunk, the offset within the buffer,
-     * and the invalidation flag for previous chunks.
-     * The actual used size must be specified on unmapping the chunk.
-     */
-    std::tuple<u8*, u32, bool> Map(u32 size, u32 alignment = 0);
-    void Commit(u32 size, vk::AccessFlags access_to_block = vk::AccessFlagBits::eUniformRead,
-                vk::PipelineStageFlags stage_to_block = vk::PipelineStageFlagBits::eVertexShader |
-                                                        vk::PipelineStageFlagBits::eFragmentShader);
-
-private:
-    u32 buffer_pos{};
-    vk::BufferCopy mapped_chunk;
+    // Vulkan buffer handle
+    void* mapped_ptr = nullptr;
+    vk::Buffer buffer = VK_NULL_HANDLE;
+    VmaAllocation allocation = VK_NULL_HANDLE;
+    std::array<vk::BufferView, MAX_BUFFER_VIEWS> views{};
+    u32 view_count = 0;
 };

 }
--- a/src/video_core/renderer_vulkan/vk_common.h
+++ b/src/video_core/renderer_vulkan/vk_common.h
@ -14,3 +14,74 @@
 #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
 #define VMA_VULKAN_VERSION 1001000 // Vulkan 1.1
 #include <vk_mem_alloc.h>
+
+namespace VideoCore::Vulkan {
+
+/// Returns the aligned byte size of each pixel in the specified format
+constexpr float GetFormatSize(vk::Format format) {
+    switch (format) {
+    case vk::Format::eR8G8B8A8Unorm:
+    case vk::Format::eD24UnormS8Uint:
+        return 4;
+    case vk::Format::eR8G8B8Unorm:
+        return 3;
+    case vk::Format::eR5G5B5A1UnormPack16:
+    case vk::Format::eR5G6B5UnormPack16:
+    case vk::Format::eR4G4B4A4UnormPack16:
+    case vk::Format::eD16Unorm:
+        return 2;
+    default:
+        return 0;
+    };
+}
+
+/// Return the image aspect associated on the provided format
+constexpr vk::ImageAspectFlags GetImageAspect(vk::Format format) {
+    vk::ImageAspectFlags flags;
+    switch (format) {
+    case vk::Format::eD16UnormS8Uint:
+    case vk::Format::eD24UnormS8Uint:
+    case vk::Format::eX8D24UnormPack32:
+    case vk::Format::eD32SfloatS8Uint:
+        flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
+        break;
+    case vk::Format::eD16Unorm:
+    case vk::Format::eD32Sfloat:
+        flags = vk::ImageAspectFlagBits::eDepth;
+        break;
+    default:
+        flags = vk::ImageAspectFlagBits::eColor;
+    }
+
+    return flags;
+}
+
+/// Returns a bit mask with the required usage of a format with a particular aspect
+constexpr vk::ImageUsageFlags GetImageUsage(vk::ImageAspectFlags aspect) {
+    auto usage = vk::ImageUsageFlagBits::eSampled |
+            vk::ImageUsageFlagBits::eTransferDst |
+            vk::ImageUsageFlagBits::eTransferSrc;
+
+    if (aspect & vk::ImageAspectFlagBits::eDepth) {
+        return usage | vk::ImageUsageFlagBits::eDepthStencilAttachment;
+    } else {
+        return usage | vk::ImageUsageFlagBits::eColorAttachment;
+    }
+};
+
+/// Returns a bit mask with the required features of a format with a particular aspect
+constexpr vk::FormatFeatureFlags GetFormatFeatures(vk::ImageAspectFlags aspect) {
+    auto usage = vk::FormatFeatureFlagBits::eSampledImage |
+            vk::FormatFeatureFlagBits::eTransferDst |
+            vk::FormatFeatureFlagBits::eTransferSrc |
+            vk::FormatFeatureFlagBits::eBlitSrc |
+            vk::FormatFeatureFlagBits::eBlitDst;
+
+    if (aspect & vk::ImageAspectFlagBits::eDepth) {
+        return usage | vk::FormatFeatureFlagBits::eDepthStencilAttachment;
+    } else {
+        return usage | vk::FormatFeatureFlagBits::eColorAttachment;
+    }
+};
+
+}
--- a/src/video_core/renderer_vulkan/vk_format_util.cpp
+++ b/src/video_core/renderer_vulkan/vk_format_util.cpp
@ -0,0 +1,6 @@
+#include "vk_format_util.h"
+
+vk_format_util::vk_format_util()
+{
+
+}
--- a/src/video_core/renderer_vulkan/vk_format_util.h
+++ b/src/video_core/renderer_vulkan/vk_format_util.h
@ -0,0 +1,436 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <span>
+#include <bit>
+#include <string_view>
+#include <array>
+#include <cstring>
+#include <bitset>
+#include <type_traits>
+#include <vulkan/vulkan_format_traits.hpp>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/vk_common.h"
+
+namespace VideoCore::Vulkan {
+
+enum class SIMD : u8 {
+    None = 0,
+    SSE4 = 1,
+    AVX2 = 2,
+    NEON = 3
+};
+
+/**
+ * A Pixel holds a pixel value or a SIMD lane holding multiple "real" pixels
+ */
+#pragma pack(1)
+template <u8 bytes, SIMD simd = SIMD::None>
+struct Pixel {
+    using StorageType = std::conditional_t<bytes <= 1, u8,
+                        std::conditional_t<bytes <= 2, u16,
+                        std::conditional_t<bytes <= 4, u32, u64>>>;
+    Pixel() = default;
+
+    // Memory load/store
+    constexpr void Load(u8* memory) {
+        std::memcpy(&storage, memory, bytes);
+    }
+
+    constexpr void Store(u8* memory) const {
+        std::memcpy(memory, &storage, bytes);
+    }
+
+    // Returns the number of bytes until the next pixel
+    constexpr u8 GetStride() const {
+        return bytes;
+    }
+
+    // Bitwise operators
+    constexpr Pixel RotateRight(int n) const {
+        return std::rotr(storage, n);
+    }
+
+    constexpr StorageType operator & (const StorageType mask) const {
+        return storage & mask;
+    }
+
+    constexpr StorageType operator | (const StorageType mask) const {
+        return storage | mask;
+    }
+
+    constexpr StorageType operator >>(const int n) const {
+        return storage >> n;
+    }
+
+    constexpr StorageType operator <<(const int n) const {
+        return storage << n;
+    }
+
+private:
+    StorageType storage;
+};
+#pragma pack()
+
+/**
+ * Information about a pixel format
+ */
+template <u8 Components>
+struct FormatInfo {
+    constexpr FormatInfo(vk::Format format) {
+        for (int i = 0; i < components; i++) {
+            name[i] = vk::componentName(format, i)[0];
+            is_float[i] = std::string_view{vk::componentNumericFormat(format, i)}
+                        == "SFLOAT";
+            bits[i] = vk::componentBits(format, i);
+            bit_offset[i] = (i > 0 ? bit_offset[i - 1] + bits[i - 1] : 0);
+        }
+
+        bytes = (format == vk::Format::eD32SfloatS8Uint ? 8 :
+                 vk::blockSize(format));
+    }
+
+    static constexpr u32 components = Components;
+    std::array<char, components> name;
+    std::array<bool, components> is_float;
+    std::array<u8, components> bit_offset;
+    std::array<u8, components> bits;
+    u8 bytes; // This includes the padding in D32S8
+};
+
+/**
+ * Represents a mapping of components from one format to another
+ */
+template <FormatInfo source, FormatInfo dest>
+struct Mapping {
+    static constexpr u32 component_map_bits = 4;
+    static constexpr u32 component_map_mask = (1 << component_map_bits) - 1;
+
+    constexpr Mapping() {
+        for (int i = 0; i < source.names.size(); i++) {
+            constexpr char source_name = source.names[i];
+            for (u8 j = 0; j < dest.names.size(); j++) {
+                constexpr char dest_name = dest.names[j];
+                if constexpr (source_name == dest_name) {
+                    storage |= ((j & component_map_mask) << component_map_bits * i);
+                    break;
+                }
+            }
+        }
+    }
+
+    constexpr u8 GetMapping(const int component) {
+        return (storage >> (component * component_map_bits)) & component_map_mask;
+    }
+
+    // Returns the number of bits to rotate a pixel to the right
+    // to match the mapping of the destiation format. If it's not
+    // possible returns -1
+    constexpr s32 TestMappingRotation() {
+        constexpr u16 identity = 0x3210;
+
+        u32 total_bits_rotated = 0;
+        auto test_rotation = [&](s32 i) -> bool {
+            return (storage == std::rotr(identity, i * component_map_bits));
+        };
+
+        for (s32 rot = 0; rot < 4; rot++) {
+            if (test_rotation(rot)) {
+                return total_bits_rotated;
+            }
+
+            total_bits_rotated += source.bits[rot];
+        }
+
+        return -1;
+    }
+
+    // Returns true if the each component of the source format has the
+    // same bit-width as the mapped destination format component
+    constexpr bool AreBitwiseEqual() {
+        bool result = source.bytes == dest.bytes;
+        for (int i = 0; i < source.components; i++) {
+            result &= (source.bits[i] == dest.bits[GetMapping(i)]);
+        }
+
+        return result;
+    }
+
+private:
+    // Since there are at most 4 components we can use 4 bits for each component
+    u16 storage = 0xFFFF;
+};
+
+// Allows for loop like iteration at compile time
+template <auto Start, auto End, class F>
+constexpr void ForEach(F&& f) {
+    if constexpr (Start < End) {
+        f(std::integral_constant<decltype(Start), Start>());
+        ForEach<Start + 1, End>(f);
+    }
+}
+
+// Copies pixel data from a source to a destionation buffer, performing
+// format conversion at the same time
+template <vk::Format source_format, vk::Format dest_format, SIMD simd>
+constexpr void Convert2(std::span<const u8> source, std::span<u8> dest) {
+    constexpr u32 source_components = vk::componentCount(source_format);
+    constexpr u32 dest_components = vk::componentCount(dest_format);
+
+    // Query vulkan hpp format traits for the info we need
+    constexpr FormatInfo<source_components> source_info{source_format};
+    constexpr FormatInfo<dest_components> dest_info{dest_format};
+
+    // Create a table with the required component mapping
+    constexpr Mapping<source_info, dest_info> mapping{};
+
+    // Begin conversion
+    u32 source_offset = 0;
+    u32 dest_offset = 0;
+    while (source_offset < source.size()) {
+        // Load source pixel
+        Pixel<source_info.bytes, simd> source_pixel;
+        Pixel<dest_info.bytes, simd> dest_pixel{};
+
+        // Load data into the pixel
+        source_pixel.Load(source.data() + source_offset);
+
+        // OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations
+        // of one another. We can use a faster path for these
+        if constexpr (s32 rot = mapping.TestMappingRotation();
+                      rot > -1 && mapping.AreBitwiseEqual()) {
+            dest_pixel = source_pixel.RotateRight(rot);
+        // RGB8 <-> RGBA8 is extrenely common on desktop GPUs
+        // so it deserves a special path
+        } else if constexpr (true) {
+        } else {
+            ForEach<0, source_components>([&](auto comp) {
+                constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3;
+
+                // If the component is not mapped skip it
+                if constexpr (dest_comp == 0xFF) {
+                    return;
+                }
+
+                // Retrieve component
+                u32 component = GetComponent<source_format, source_bytes, comp>(source_pixel);
+
+                constexpr bool is_source_float = IsFloat<source_format>(comp);
+                constexpr bool is_dest_float = IsFloat<dest_format>(dest_comp);
+
+                // Perform float <-> int conversion (normalization)
+                if constexpr (is_source_float && !is_dest_float) {
+                    float temp;
+                    std::memcpy(&temp, &component, sizeof(float));
+
+                    constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1;
+                    component = static_cast<u32>(temp * mask);
+                } else if constexpr (!is_source_float && is_dest_float) {
+                    constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1;
+                    float temp = static_cast<float>(component) / mask;
+                    std::memcpy(&component, &temp, sizeof(float));
+                }
+
+                SetComponent<dest_format, dest_bytes, dest_comp>(dest_pixel, component);
+            });
+        }
+
+        // Write destination pixel (dest_bytes includes the padding so we cannot use it here)
+        std::memcpy(dest.data() + dest_offset, DataPtr<dest_bytes>(dest_pixel),
+                    vk::blockSize(dest_format));
+
+        // Copy next pixel
+        source_offset += source_pixel.GetStride();
+        dest_offset += dest_pixel.GetStride();
+    }
+}
+
+// Asign the byte count with an integral type
+template <u8 bytes>
+struct PackedInt { using type = typename std::array<u8, bytes>; };
+
+template <>
+struct PackedInt<1> { using type = u8; };
+
+template <>
+struct PackedInt<2> { using type = u16; };
+
+template <>
+struct PackedInt<4> { using type = u32; };
+
+template <>
+struct PackedInt<8> { using type = u64; };
+
+template <u8 bytes>
+using PackedType = typename PackedInt<bytes>::type;
+
+// Returns the pointer to the raw bytes respecting the underlying type
+template <u8 bytes>
+constexpr u8* DataPtr(PackedType<bytes>& data) {
+    if constexpr (std::is_integral_v<PackedType<bytes>>) {
+        return reinterpret_cast<u8*>(&data);
+    } else {
+        return data.data();
+    }
+}
+
+// Returns true when the specified component is of float type
+template <vk::Format format>
+constexpr bool IsFloat(u8 component) {
+    return std::string_view{vk::componentNumericFormat(format, component)} == "SFLOAT";
+}
+
+// Returns the offset in bits of the component from the start of the pixel
+template <vk::Format format, u8 component, u8 i = 0>
+constexpr u32 GetComponentBitOffset() {
+    if constexpr (i == component) {
+        return 0;
+    } else {
+        return vk::componentBits(format, i) +
+                GetComponentBitOffset<format, component, i + 1>();
+    }
+}
+
+// Returns the data located at the specified component
+template <vk::Format format, u8 bytes, u8 component>
+constexpr u32 GetComponent(PackedType<bytes>& pixel) {
+    constexpr u64 bit_offset = GetComponentBitOffset<format, component>();
+    constexpr u64 component_bits = vk::componentBits(format, component);
+    constexpr u64 mask = (1 << component_bits) - 1;
+
+    // First process packed formats which are easy to extract from
+    if constexpr (std::is_integral_v<PackedType<bytes>>) {
+        return (pixel >> bit_offset) & mask;
+    } else {
+        // Assume component_bits and offset are byte aligned. Otherwise
+        // this would be extremely complicated
+        using ComponentType = PackedType<(component_bits >> 3)>;
+        static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0);
+        static_assert(std::is_integral_v<ComponentType>);
+
+        constexpr u64 byte_offset = bit_offset >> 3;
+        return *reinterpret_cast<ComponentType*>(DataPtr<bytes>(pixel) + byte_offset);
+    }
+}
+
+template <vk::Format format, u8 bytes, u8 component>
+constexpr void SetComponent(PackedType<bytes>& pixel, u32 data) {
+    constexpr u64 bit_offset = GetComponentBitOffset<format, component>();
+    constexpr u64 component_bits = vk::componentBits(format, component);
+    constexpr u64 mask = (1ull << component_bits) - 1;
+
+    // First process packed formats which are easy to write
+    if constexpr (std::is_integral_v<PackedType<bytes>>) {
+        pixel |= (data & mask) << bit_offset;
+    } else {
+        // Assume component_bits and offset are byte aligned. Otherwise
+        // this would be extremely complicated
+        using ComponentType = PackedType<(component_bits >> 3)>;
+        static_assert(component_bits % 8 == 0 && bit_offset % 8 == 0);
+        static_assert(std::is_integral_v<ComponentType>);
+
+        constexpr u64 byte_offset = bit_offset >> 3;
+        *reinterpret_cast<ComponentType*>(DataPtr(pixel) + byte_offset) = data;
+    }
+}
+
+constexpr bool CanUseRotation();
+
+// Lookup table that maps component i of source format
+// to component mapping[i] of the destination format
+template <vk::Format source_format, u8 source_components,
+          vk::Format dest_format, u8 dest_components>
+constexpr auto ComponentMapping() {
+    // Since there are at most 4 components we can use 2 bits for each index
+    u8 mapping = 0xFF;
+    for (u8 i = 0; i < source_components; i++) {
+        auto source_name = vk::componentName(source_format, i);
+        for (u8 j = 0; j < dest_components; j++) {
+            auto dest_name = vk::componentName(dest_format, j);
+            if (std::string_view{source_name} == std::string_view{dest_name}) {
+                mapping |= ((j & 0x3) << 2 * i);
+                break;
+            }
+        }
+    }
+
+    return mapping;
+}
+
+// Allows for loop like iteration at compile time
+template <auto Start, auto End, class F>
+constexpr void ConstexprFor(F&& f) {
+    if constexpr (Start < End) {
+        f(std::integral_constant<decltype(Start), Start>());
+        ConstexprFor<Start + 1, End>(f);
+    }
+}
+
+// Copies pixel data from a source to a destionation buffer, performing
+// format conversion at the same time
+template <vk::Format source_format, u8 source_bytes,
+          vk::Format dest_format, u8 dest_bytes>
+constexpr void Convert(std::span<const u8> source, std::span<u8> dest) {
+    constexpr u32 source_components = vk::componentCount(source_format);
+    constexpr u32 dest_components = vk::componentCount(dest_format);
+
+    // Create a table with the required component mapping
+    constexpr auto mapping = ComponentMapping<source_format, source_components,
+                                              dest_format, dest_components>();
+    u32 source_offset = 0;
+    u32 dest_offset = 0;
+    while (source_offset < source.size()) {
+        // Load source pixel
+        PackedType<source_bytes> source_pixel;
+        std::memcpy(DataPtr<source_bytes>(source_pixel),
+                    source.data() + source_offset, source_bytes);
+
+        PackedType<dest_bytes> dest_pixel{};
+
+        // OPTIMIZATION: Some formats (RGB5A1, A1RGB5) are simply rotations
+        // of one another. We can use a faster path for these
+
+        ConstexprFor<0, source_components>([&](auto comp) {
+            constexpr u8 dest_comp = (mapping >> (2 * comp)) & 0x3;
+
+            // If the component is not mapped skip it
+            if constexpr (dest_comp == 0xFF) {
+                return;
+            }
+
+            // Retrieve component
+            u32 component = GetComponent<source_format, source_bytes, comp>(source_pixel);
+
+            constexpr bool is_source_float = IsFloat<source_format>(comp);
+            constexpr bool is_dest_float = IsFloat<dest_format>(dest_comp);
+
+            // Perform float <-> int conversion (normalization)
+            if constexpr (is_source_float && !is_dest_float) {
+                float temp;
+                std::memcpy(&temp, &component, sizeof(float));
+
+                constexpr u64 mask = (1ull << vk::componentBits(dest_format, dest_comp)) - 1;
+                component = static_cast<u32>(temp * mask);
+            } else if constexpr (!is_source_float && is_dest_float) {
+                constexpr u64 mask = (1ull << vk::componentBits(source_format, comp)) - 1;
+                float temp = static_cast<float>(component) / mask;
+                std::memcpy(&component, &temp, sizeof(float));
+            }
+
+            SetComponent<dest_format, dest_bytes, dest_comp>(dest_pixel, component);
+        });
+
+        // Write destination pixel (dest_bytes includes the padding so we cannot use it here)
+        std::memcpy(dest.data() + dest_offset, DataPtr<dest_bytes>(dest_pixel),
+                    vk::blockSize(dest_format));
+
+        // Copy next pixel
+        source_offset += source_bytes;
+        dest_offset += dest_bytes;
+    }
+}
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_instance.cpp
+++ b/src/video_core/renderer_vulkan/vk_instance.cpp
@ -2,52 +2,148 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <fstream>
+#define VULKAN_HPP_NO_CONSTRUCTORS
+#include <span>
 #include <array>
-#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/vk_platform.h"
 #include "video_core/renderer_vulkan/vk_instance.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-std::unique_ptr<Instance> g_vk_instace;
+Instance::Instance(Frontend::EmuWindow& window) {
+    auto window_info = window.GetWindowInfo();
+
+    // Enable the instance extensions the backend uses
+    auto extensions = GetInstanceExtensions(window_info.type, true);
+
+    // We require a Vulkan 1.1 driver
+    const u32 available_version = vk::enumerateInstanceVersion();
+    if (available_version < VK_API_VERSION_1_1) {
+        LOG_CRITICAL(Render_Vulkan, "Vulkan 1.0 is not supported, 1.1 is required!");
+    }
+
+    const vk::ApplicationInfo application_info = {
+        .pApplicationName = "Citra",
+        .applicationVersion = VK_MAKE_VERSION(1, 0, 0),
+        .pEngineName = "Citra Vulkan",
+        .engineVersion = VK_MAKE_VERSION(1, 0, 0),
+        .apiVersion = available_version
+    };
+
+    const std::array layers = {"VK_LAYER_KHRONOS_validation"};
+    const vk::InstanceCreateInfo instance_info = {
+        .pApplicationInfo = &application_info,
+        .enabledLayerCount = static_cast<u32>(layers.size()),
+        .ppEnabledLayerNames = layers.data(),
+        .enabledExtensionCount = static_cast<u32>(extensions.size()),
+        .ppEnabledExtensionNames = extensions.data()
+    };
+
+    // Create VkInstance
+    instance = vk::createInstance(instance_info);
+    surface = CreateSurface(instance, window);
+
+    // TODO: GPU select dialog
+    physical_device = instance.enumeratePhysicalDevices()[0];
+    device_limits = physical_device.getProperties().limits;
+
+    // Create logical device
+    CreateDevice(true);
+}

 Instance::~Instance() {
    device.waitIdle();
-
    device.destroy();
    instance.destroy();
 }

-bool Instance::Create(vk::Instance new_instance, vk::PhysicalDevice gpu,
-                        vk::SurfaceKHR surface, bool enable_validation_layer) {
-    instance = new_instance;
-    physical_device = gpu;
-
-    // Get physical device limits
-    device_limits = physical_device.getProperties().limits;
-
+bool Instance::CreateDevice(bool validation_enabled) {
    // Determine required extensions and features
-    if (!FindExtensions() || !FindFeatures())
-        return false;
+    auto feature_chain = physical_device.getFeatures2<vk::PhysicalDeviceFeatures2,
+                                                      vk::PhysicalDeviceDynamicRenderingFeaturesKHR,
+                                                      vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT,
+                                                      vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>();

-    // Create logical device
-    return CreateDevice(surface, enable_validation_layer);
-}
+    // Not having geometry shaders or wide lines will cause issues with rendering.
+    const vk::PhysicalDeviceFeatures available = feature_chain.get().features;
+    if (!available.geometryShader && !available.wideLines) {
+        LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Accelerated rendering not possible!");
+    }

-bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
-    // Can't create an instance without a valid surface
-    if (!surface) {
-        LOG_CRITICAL(Render_Vulkan, "Invalid surface provided during instance creation!");
+    // Enable some common features other emulators like Dolphin use
+    const vk::PhysicalDeviceFeatures2 features = {
+        .features = {
+            .robustBufferAccess = available.robustBufferAccess,
+            .geometryShader = available.geometryShader,
+            .sampleRateShading = available.sampleRateShading,
+            .dualSrcBlend = available.dualSrcBlend,
+            .logicOp = available.logicOp,
+            .depthClamp = available.depthClamp,
+            .largePoints = available.largePoints,
+            .samplerAnisotropy = available.samplerAnisotropy,
+            .occlusionQueryPrecise = available.occlusionQueryPrecise,
+            .fragmentStoresAndAtomics = available.fragmentStoresAndAtomics,
+            .shaderStorageImageMultisample = available.shaderStorageImageMultisample,
+            .shaderClipDistance = available.shaderClipDistance
+        }
+    };
+
+    // Enable newer Vulkan features
+    auto enabled_features = vk::StructureChain{
+        features,
+        feature_chain.get<vk::PhysicalDeviceDynamicRenderingFeaturesKHR>(),
+        feature_chain.get<vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT>(),
+        feature_chain.get<vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT>()
+    };
+
+    auto extension_list = physical_device.enumerateDeviceExtensionProperties();
+    if (extension_list.empty()) {
+        LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
        return false;
    }

+    // List available device extensions
+    for (const auto& extension : extension_list) {
+        LOG_INFO(Render_Vulkan, "Vulkan extension: {}", extension.extensionName);
+    }
+
+    // Helper lambda for adding extensions
+    std::array<const char*, 6> enabled_extensions;
+    u32 enabled_extension_count = 0;
+
+    auto AddExtension = [&](std::string_view name, bool required) -> bool {
+        auto result = std::find_if(extension_list.begin(), extension_list.end(), [&](const auto& prop) {
+            return name.compare(prop.extensionName.data());
+        });
+
+        if (result != extension_list.end()) {
+            LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
+            enabled_extensions[enabled_extension_count++] = name.data();
+            return true;
+        }
+
+        if (required) {
+            LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
+        }
+
+        return false;
+    };
+
+    // Add required extensions
+    AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true);
+
+    // Check for optional features
+    dynamic_rendering = AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, false);
+    extended_dynamic_state = AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
+    push_descriptors = AddExtension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, false);
+
+    // Search queue families for graphics and present queues
    auto family_properties = physical_device.getQueueFamilyProperties();
    if (family_properties.empty()) {
        LOG_CRITICAL(Render_Vulkan, "Vulkan physical device reported no queues.");
        return false;
    }

-    // Search queue families for graphics and present queues
    graphics_queue_family_index = -1;
    present_queue_family_index = -1;
    for (int i = 0; i < family_properties.size(); i++) {
@ -68,24 +164,35 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
        }
    }

-    if (graphics_queue_family_index == -1 ||
-        present_queue_family_index == -1) {
+    if (graphics_queue_family_index == -1 || present_queue_family_index == -1) {
        LOG_CRITICAL(Render_Vulkan, "Unable to find graphics and/or present queues.");
        return false;
    }

    static constexpr float queue_priorities[] = {1.0f};

-    const std::array layers{"VK_LAYER_KHRONOS_validation"};
-    const std::array queue_infos{
-        vk::DeviceQueueCreateInfo{{}, graphics_queue_family_index, 1, queue_priorities},
-        vk::DeviceQueueCreateInfo{{}, present_queue_family_index, 1, queue_priorities}
+    const std::array layers = {"VK_LAYER_KHRONOS_validation"};
+    const std::array queue_infos = {
+        vk::DeviceQueueCreateInfo{
+            .queueFamilyIndex = graphics_queue_family_index,
+            .queueCount = 1,
+            .pQueuePriorities = queue_priorities
+        },
+        vk::DeviceQueueCreateInfo{
+            .queueFamilyIndex = present_queue_family_index,
+            .queueCount = 1,
+            .pQueuePriorities = queue_priorities
+        }
    };

-    vk::DeviceCreateInfo device_info({}, 1, queue_infos.data(), 0, nullptr,
-                extensions.size(), extensions.data(), nullptr, &features);
+    vk::DeviceCreateInfo device_info = {
+        .pNext = &enabled_features,
+        .queueCreateInfoCount = 1,
+        .pQueueCreateInfos = queue_infos.data(),
+        .enabledExtensionCount = enabled_extension_count,
+        .ppEnabledExtensionNames = enabled_extensions.data(),
+    };

-    // Create queue create info structs
    if (graphics_queue_family_index != present_queue_family_index) {
        device_info.queueCreateInfoCount = 2;
    }
@ -104,87 +211,67 @@ bool Instance::CreateDevice(vk::SurfaceKHR surface, bool validation_enabled) {
    graphics_queue = device.getQueue(graphics_queue_family_index, 0);
    present_queue = device.getQueue(present_queue_family_index, 0);

-    return true;
-}
-
-bool Instance::FindFeatures() {
-    auto available = physical_device.getFeatures();
-
-    // Not having geometry shaders or wide lines will cause issues with rendering.
-    if (!available.geometryShader && !available.wideLines) {
-        LOG_WARNING(Render_Vulkan, "Geometry shaders not availabe! Rendering will be limited");
-    }
-
-    // Enable some common features other emulators like Dolphin use
-    vk_features.dualSrcBlend = available.dualSrcBlend;
-    vk_features.geometryShader = available.geometryShader;
-    vk_features.samplerAnisotropy = available.samplerAnisotropy;
-    vk_features.logicOp = available.logicOp;
-    vk_features.fragmentStoresAndAtomics = available.fragmentStoresAndAtomics;
-    vk_features.sampleRateShading = available.sampleRateShading;
-    vk_features.largePoints = available.largePoints;
-    vk_features.shaderStorageImageMultisample = available.shaderStorageImageMultisample;
-    vk_features.occlusionQueryPrecise = available.occlusionQueryPrecise;
-    vk_features.shaderClipDistance = available.shaderClipDistance;
-    vk_features.depthClamp = available.depthClamp;
-    vk_features.textureCompressionBC = available.textureCompressionBC;
-
-    // Enable newer Vulkan features
-    vk12_features.timelineSemaphore = true;
-    vk13_features.dynamicRendering = true;
-    dynamic_state_features.extendedDynamicState = true;
-    dynamic_state2_features.extendedDynamicState2 = true;
-
-    // Include features in device creation
-    vk12_features.pNext = &vk13_features;
-    vk13_features.pNext = &dynamic_state_features;
-    dynamic_state_features.pNext = &dynamic_state2_features;
-    features = vk::PhysicalDeviceFeatures2{vk_features, &vk12_features};
+    // Create the VMA allocator
+    CreateAllocator();

    return true;
 }

-bool Instance::FindExtensions() {
-    auto available = physical_device.enumerateDeviceExtensionProperties();
-    if (available.empty()) {
-        LOG_CRITICAL(Render_Vulkan, "No extensions supported by device.");
-        return false;
-    }
-
-    // List available device extensions
-    for (const auto& prop : available) {
-        LOG_INFO(Render_Vulkan, "Vulkan extension: {}", prop.extensionName);
-    }
-
-    // Helper lambda for adding extensions
-    auto AddExtension = [&](const char* name, bool required) {
-        auto result = std::find_if(available.begin(), available.end(), [&](const auto& prop) {
-            return !std::strcmp(name, prop.extensionName);
-        });
-
-        if (result != available.end()) {
-            LOG_INFO(Render_Vulkan, "Enabling extension: {}", name);
-            extensions.push_back(name);
-            return true;
-        }
-
-        if (required) {
-            LOG_ERROR(Render_Vulkan, "Unable to find required extension {}.", name);
-        }
-
-        return false;
+void Instance::CreateAllocator() {
+    VmaVulkanFunctions functions = {
+        .vkGetInstanceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetInstanceProcAddr,
+        .vkGetDeviceProcAddr = VULKAN_HPP_DEFAULT_DISPATCHER.vkGetDeviceProcAddr
    };

-    // Add required extensions
-    if (!AddExtension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true) ||
-        !AddExtension(VK_KHR_DYNAMIC_RENDERING_EXTENSION_NAME, true) ||
-        !AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, true) ||
-        !AddExtension(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME, true) ||
-        !AddExtension(VK_EXT_COLOR_WRITE_ENABLE_EXTENSION_NAME, true)) {
-        return false;
+    VmaAllocatorCreateInfo allocator_info = {
+        .physicalDevice = physical_device,
+        .device = device,
+        .pVulkanFunctions = &functions,
+        .instance = instance,
+        .vulkanApiVersion = VK_API_VERSION_1_1
+    };
+
+    vmaCreateAllocator(&allocator_info, &allocator);
+}
+
+bool Instance::IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const {
+    static std::unordered_map<vk::Format, vk::FormatProperties> supported;
+    if (auto iter = supported.find(format); iter != supported.end()) {
+        return (iter->second.optimalTilingFeatures & usage) == usage;
    }

-    return true;
+    // Cache format properties so we don't have to query the driver all the time
+    const vk::FormatProperties properties = physical_device.getFormatProperties(format);
+    supported.insert(std::make_pair(format, properties));
+
+    return (properties.optimalTilingFeatures & usage) == usage;
+}
+
+vk::Format Instance::GetFormatAlternative(vk::Format format) const {
+    vk::FormatFeatureFlags features = GetFormatFeatures(GetImageAspect(format));
+    if (IsFormatSupported(format, features)) {
+       return format;
+    }
+
+    // Return the most supported alternative format preferably with the
+    // same block size according to the Vulkan spec.
+    // See 43.3. Required Format Support of the Vulkan spec
+    switch (format) {
+    case vk::Format::eD24UnormS8Uint:
+        return vk::Format::eD32SfloatS8Uint;
+    case vk::Format::eX8D24UnormPack32:
+        return vk::Format::eD32Sfloat;
+    case vk::Format::eR5G5B5A1UnormPack16:
+        return vk::Format::eA1R5G5B5UnormPack16;
+    case vk::Format::eR4G4B4A4UnormPack16:
+        return vk::Format::eB4G4R4A4UnormPack16;
+    case vk::Format::eR8G8B8Unorm:
+        return vk::Format::eR8G8B8A8Unorm;
+    default:
+        LOG_WARNING(Render_Vulkan, "Unable to find compatible alternative to format = {} with usage {}",
+                                    vk::to_string(format), vk::to_string(features));
+        return vk::Format::eR8G8B8A8Unorm;
+    }
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@ -8,61 +8,104 @@
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_common.h"

-namespace Vulkan {
+namespace Frontend {
+class EmuWindow;
+}
+
+namespace VideoCore::Vulkan {

 /// The global Vulkan instance
 class Instance {
 public:
-    Instance() = default;
+    Instance(Frontend::EmuWindow& window);
    ~Instance();

-    /// Construct global Vulkan context
-    bool Create(vk::Instance instance, vk::PhysicalDevice gpu,
-                vk::SurfaceKHR surface, bool enable_validation_layer);
+    /// Returns the Vulkan instance
+    vk::Instance GetInstance() const {
+        return instance;
+    }

-    vk::Device GetDevice() const { return device; }
-    vk::PhysicalDevice GetPhysicalDevice() const { return physical_device; }
-    vk::Instance GetInstance() const { return instance; }
+    /// Returns the Vulkan surface
+    vk::SurfaceKHR GetSurface() const {
+        return surface;
+    }
+
+    /// Returns the current physical device
+    vk::PhysicalDevice GetPhysicalDevice() const {
+        return physical_device;
+    }
+
+    /// Returns the Vulkan device
+    vk::Device GetDevice() const {
+        return device;
+    }
+
+    VmaAllocator GetAllocator() const {
+        return allocator;
+    }

    /// Retrieve queue information
-    u32 GetGraphicsQueueFamilyIndex() const { return graphics_queue_family_index; }
-    u32 GetPresentQueueFamilyIndex() const { return present_queue_family_index; }
-    vk::Queue GetGraphicsQueue() const { return graphics_queue; }
-    vk::Queue GetPresentQueue() const { return present_queue; }
+    u32 GetGraphicsQueueFamilyIndex() const {
+        return graphics_queue_family_index;
+    }
+
+    u32 GetPresentQueueFamilyIndex() const {
+        return present_queue_family_index;
+    }
+
+    vk::Queue GetGraphicsQueue() const {
+        return graphics_queue;
+    }
+
+    vk::Queue GetPresentQueue() const {
+        return present_queue;
+    }

    /// Feature support
-    bool SupportsAnisotropicFiltering() const;
-    u32 UniformMinAlignment() const { return static_cast<u32>(device_limits.minUniformBufferOffsetAlignment); }
+    bool IsDynamicRenderingSupported() const {
+        return dynamic_rendering;
+    }
+
+    bool IsExtendedDynamicStateSupported() const {
+        return extended_dynamic_state;
+    }
+
+    bool IsPushDescriptorsSupported() const {
+        return push_descriptors;
+    }
+
+    /// Returns the minimum required alignment for uniforms
+    vk::DeviceSize UniformMinAlignment() const {
+        return device_limits.minUniformBufferOffsetAlignment;
+    }
+
+    /// Returns true when the format supports the provided feature flags
+    bool IsFormatSupported(vk::Format format, vk::FormatFeatureFlags usage) const;
+
+    /// Returns the most compatible format that supports the provided feature flags
+    vk::Format GetFormatAlternative(vk::Format format) const;

 private:
-    bool CreateDevice(vk::SurfaceKHR surface, bool validation_enabled);
-    bool FindExtensions();
-    bool FindFeatures();
+    bool CreateDevice(bool validation_enabled);
+    void CreateAllocator();

-public:
+private:
    // Queue family indexes
-    u32 present_queue_family_index{}, graphics_queue_family_index{};
+    u32 present_queue_family_index = 0, graphics_queue_family_index = 0;
    vk::Queue present_queue, graphics_queue;

    // Core vulkan objects
+    vk::Device device;
    vk::PhysicalDevice physical_device;
    vk::Instance instance;
-    vk::Device device;
-
-    // Extensions and features
-    std::vector<const char*> extensions;
-    vk::PhysicalDeviceFeatures2 features{};
+    vk::SurfaceKHR surface;
    vk::PhysicalDeviceLimits device_limits;
+    VmaAllocator allocator;

    // Features per vulkan version
-    vk::PhysicalDeviceFeatures vk_features{};
-    vk::PhysicalDeviceVulkan13Features vk13_features{};
-    vk::PhysicalDeviceVulkan12Features vk12_features{};
-    vk::PhysicalDeviceExtendedDynamicStateFeaturesEXT dynamic_state_features{};
-    vk::PhysicalDeviceExtendedDynamicState2FeaturesEXT dynamic_state2_features{};
-    vk::PhysicalDeviceColorWriteEnableFeaturesEXT color_write_features{};
+    bool dynamic_rendering = false;
+    bool extended_dynamic_state = false;
+    bool push_descriptors = false;
 };

-extern std::unique_ptr<Instance> g_vk_instace;
-
-} // namespace Vulkan
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline.cpp
@ -0,0 +1,414 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#define VULKAN_HPP_NO_CONSTRUCTORS
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/pica_to_vulkan.h"
+#include "video_core/renderer_vulkan/vk_pipeline.h"
+#include "video_core/renderer_vulkan/vk_shader.h"
+#include "video_core/renderer_vulkan/vk_texture.h"
+#include "video_core/renderer_vulkan/vk_buffer.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+
+namespace VideoCore::Vulkan {
+
+// Maximum binding per descriptor set
+constexpr u32 MAX_BINDING_SLOTS = 7;
+
+vk::ShaderStageFlags ToVkStageFlags(BindingType type) {
+    vk::ShaderStageFlags flags;
+    switch (type) {
+    case BindingType::Sampler:
+    case BindingType::Texture:
+    case BindingType::TexelBuffer:
+        flags = vk::ShaderStageFlagBits::eFragment;
+        break;
+    case BindingType::StorageImage:
+    case BindingType::Uniform:
+    case BindingType::UniformDynamic:
+        flags = vk::ShaderStageFlagBits::eFragment |
+                vk::ShaderStageFlagBits::eVertex |
+                vk::ShaderStageFlagBits::eGeometry |
+                vk::ShaderStageFlagBits::eCompute;
+        break;
+    default:
+        LOG_ERROR(Render_Vulkan, "Unknown descriptor type!");
+    }
+
+    return flags;
+}
+
+vk::DescriptorType ToVkDescriptorType(BindingType type) {
+    switch (type) {
+    case BindingType::Uniform:
+        return vk::DescriptorType::eUniformBuffer;
+    case BindingType::UniformDynamic:
+        return vk::DescriptorType::eUniformBufferDynamic;
+    case BindingType::TexelBuffer:
+        return vk::DescriptorType::eUniformTexelBuffer;
+    case BindingType::Texture:
+        return vk::DescriptorType::eSampledImage;
+    case BindingType::Sampler:
+        return vk::DescriptorType::eSampler;
+    case BindingType::StorageImage:
+        return vk::DescriptorType::eStorageImage;
+    default:
+        LOG_CRITICAL(Render_Vulkan, "Unknown descriptor type!");
+        UNREACHABLE();
+    }
+}
+
+u32 AttribBytes(VertexAttribute attrib) {
+    switch (attrib.type) {
+    case AttribType::Float:
+        return sizeof(float) * attrib.components;
+    case AttribType::Int:
+        return sizeof(u32) * attrib.components;
+    case AttribType::Short:
+        return sizeof(u16) * attrib.components;
+    }
+}
+
+vk::Format ToVkAttributeFormat(VertexAttribute attrib) {
+    switch (attrib.type) {
+    case AttribType::Float:
+        switch (attrib.components) {
+        case 1: return vk::Format::eR32Sfloat;
+        case 2: return vk::Format::eR32G32Sfloat;
+        case 3: return vk::Format::eR32G32B32Sfloat;
+        case 4: return vk::Format::eR32G32B32A32Sfloat;
+        }
+    default:
+        LOG_CRITICAL(Render_Vulkan, "Unimplemented vertex attribute format!");
+        UNREACHABLE();
+    }
+}
+
+vk::ShaderStageFlagBits ToVkShaderStage(ShaderStage stage) {
+    switch (stage) {
+    case ShaderStage::Vertex:
+        return vk::ShaderStageFlagBits::eVertex;
+    case ShaderStage::Fragment:
+        return vk::ShaderStageFlagBits::eFragment;
+    case ShaderStage::Geometry:
+        return vk::ShaderStageFlagBits::eGeometry;
+    case ShaderStage::Compute:
+        return vk::ShaderStageFlagBits::eCompute;
+    default:
+        LOG_CRITICAL(Render_Vulkan, "Undefined shader stage!");
+        UNREACHABLE();
+    }
+}
+
+PipelineLayout::PipelineLayout(Instance& instance, PipelineLayoutInfo info) :
+    instance(instance), set_layout_count(info.group_count) {
+
+    // Used as temp storage for CreateDescriptorSet
+    std::array<vk::DescriptorSetLayoutBinding, MAX_BINDING_SLOTS> set_bindings;
+    std::array<vk::DescriptorUpdateTemplateEntry, MAX_BINDING_SLOTS> update_entries;
+
+    vk::Device device = instance.GetDevice();
+    for (u32 set = 0; set < set_layout_count; set++) {
+        auto& group = info.binding_groups[set];
+
+        u32 binding = 0;
+        while (group[binding] != BindingType::None) {
+            const BindingType type = group[binding];
+            set_bindings[binding] = vk::DescriptorSetLayoutBinding{
+                .binding = binding,
+                .descriptorType = ToVkDescriptorType(type),
+                .descriptorCount = 1,
+                .stageFlags = ToVkStageFlags(type)
+            };
+
+            // Also create update template to speed up descriptor writes
+            update_entries[binding] = vk::DescriptorUpdateTemplateEntry{
+                .dstBinding = binding,
+                .dstArrayElement = 0,
+                .descriptorCount = 1,
+                .descriptorType  = ToVkDescriptorType(type),
+                .offset = binding * sizeof(DescriptorData),
+                .stride = sizeof(DescriptorData)
+            };
+
+            binding++;
+        }
+
+        const vk::DescriptorSetLayoutCreateInfo layout_info = {
+            .bindingCount = binding,
+            .pBindings = set_bindings.data()
+        };
+
+        // Create descriptor set layout
+        set_layouts[set] = device.createDescriptorSetLayout(layout_info);
+
+        const vk::DescriptorUpdateTemplateCreateInfo template_info = {
+            .descriptorUpdateEntryCount = binding,
+            .pDescriptorUpdateEntries = update_entries.data(),
+            .descriptorSetLayout = set_layouts[set]
+        };
+
+        // Create descriptor set update template
+        update_templates[set] = device.createDescriptorUpdateTemplate(template_info);
+    }
+
+    // Create pipeline layout
+    const vk::PushConstantRange range = {
+        .offset = 0,
+        .size = info.push_constant_block_size
+    };
+
+    bool push_constants = info.push_constant_block_size > 0;
+    const u32 range_count = push_constants ? 1u : 0u;
+
+    const vk::PipelineLayoutCreateInfo layout_info = {
+        .setLayoutCount = set_layout_count,
+        .pSetLayouts = set_layouts.data(),
+        .pushConstantRangeCount = range_count,
+        .pPushConstantRanges = &range
+    };
+
+    pipeline_layout = device.createPipelineLayout(layout_info);
+}
+
+PipelineLayout::~PipelineLayout() {
+    vk::Device device = instance.GetDevice();
+    device.destroyPipelineLayout(pipeline_layout);
+
+    u32 i = 0;
+    while (set_layouts[i] && update_templates[i]) {
+        device.destroyDescriptorSetLayout(set_layouts[i]);
+        device.destroyDescriptorUpdateTemplate(update_templates[i]);
+    }
+}
+
+Pipeline::Pipeline(Instance& instance, PipelineLayout& owner, PipelineType type,
+                   PipelineInfo info, vk::PipelineCache cache) : PipelineBase(type, info),
+    instance(instance), owner(owner) {
+
+    vk::Device device = instance.GetDevice();
+
+    u32 shader_count = 0;
+    std::array<vk::PipelineShaderStageCreateInfo, MAX_SHADER_STAGES> shader_stages;
+    for (int i = 0; i < info.shaders.size(); i++) {
+        auto& shader = info.shaders[i];
+        if (!shader.IsValid()) {
+            shader_count = i;
+            break;
+        }
+
+        Shader* vk_shader = static_cast<Shader*>(shader.Get());
+        shader_stages[i] = vk::PipelineShaderStageCreateInfo{
+            .stage = ToVkShaderStage(shader->GetStage()),
+            .module = vk_shader->GetHandle(),
+            .pName = shader->GetName().data(),
+        };
+    }
+
+    // Create a graphics pipeline
+    if (type == PipelineType::Graphics) {
+        const vk::VertexInputBindingDescription binding_desc = {
+            .binding = 0,
+            .stride = info.vertex_layout.stride
+        };
+
+        // Populate vertex attribute structures
+        u32 attribute_count = 0;
+        std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> attribute_desc;
+        for (u32 i = 0; i < MAX_VERTEX_ATTRIBUTES; i++) {
+            auto& attr = info.vertex_layout.attributes[i];
+            if (attr.components == 0) {
+                attribute_count = i;
+                break;
+            }
+
+            attribute_desc[i] = vk::VertexInputAttributeDescription{
+                .location = i,
+                .binding = 0,
+                .format = ToVkAttributeFormat(attr),
+                .offset = (i > 0 ? attribute_desc[i - 1].offset +
+                                   AttribBytes(info.vertex_layout.attributes[i - 1]) : 0)
+            };
+        }
+
+        const vk::PipelineVertexInputStateCreateInfo vertex_input_info = {
+            .vertexBindingDescriptionCount = 1,
+            .pVertexBindingDescriptions = &binding_desc,
+            .vertexAttributeDescriptionCount = attribute_count,
+            .pVertexAttributeDescriptions = attribute_desc.data()
+        };
+
+        const vk::PipelineInputAssemblyStateCreateInfo input_assembly = {
+            .topology = PicaToVK::PrimitiveTopology(info.rasterization.topology),
+            .primitiveRestartEnable = false
+        };
+
+        const vk::PipelineRasterizationStateCreateInfo raster_state = {
+            .depthClampEnable = false,
+            .rasterizerDiscardEnable = false,
+            .cullMode = PicaToVK::CullMode(info.rasterization.cull_mode),
+            .frontFace = vk::FrontFace::eClockwise,
+            .depthBiasEnable = false,
+            .lineWidth = 1.0f
+        };
+
+        const vk::PipelineMultisampleStateCreateInfo multisampling = {
+            .rasterizationSamples  = vk::SampleCountFlagBits::e1,
+            .sampleShadingEnable = false
+        };
+
+        const vk::PipelineColorBlendAttachmentState colorblend_attachment = {
+            .blendEnable = true,
+            .srcColorBlendFactor = PicaToVK::BlendFunc(info.blending.src_color_blend_factor),
+            .dstColorBlendFactor = PicaToVK::BlendFunc(info.blending.dst_color_blend_factor),
+            .colorBlendOp = PicaToVK::BlendEquation(info.blending.color_blend_eq),
+            .srcAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.src_alpha_blend_factor),
+            .dstAlphaBlendFactor = PicaToVK::BlendFunc(info.blending.dst_alpha_blend_factor),
+            .alphaBlendOp = PicaToVK::BlendEquation(info.blending.alpha_blend_eq),
+            .colorWriteMask = static_cast<vk::ColorComponentFlags>(info.blending.color_write_mask)
+        };
+
+        const vk::PipelineColorBlendStateCreateInfo color_blending = {
+            .logicOpEnable = true,
+            .logicOp = vk::LogicOp::eCopy, // TODO
+            .attachmentCount = 1,
+            .pAttachments = &colorblend_attachment,
+        };
+
+        const bool extended_dynamic_states = instance.IsExtendedDynamicStateSupported();
+        const std::array dynamic_states = {
+            vk::DynamicState::eViewport,
+            vk::DynamicState::eScissor,
+            vk::DynamicState::eLineWidth,
+            vk::DynamicState::eStencilCompareMask,
+            vk::DynamicState::eStencilWriteMask,
+            vk::DynamicState::eStencilReference,
+            // VK_EXT_extended_dynamic_state
+            vk::DynamicState::eCullModeEXT,
+            vk::DynamicState::eDepthCompareOpEXT,
+            vk::DynamicState::eDepthTestEnableEXT,
+            vk::DynamicState::eDepthWriteEnableEXT,
+            vk::DynamicState::eFrontFaceEXT,
+            vk::DynamicState::ePrimitiveTopologyEXT,
+            vk::DynamicState::eStencilOpEXT,
+            vk::DynamicState::eStencilTestEnableEXT,
+        };
+
+        const vk::PipelineDynamicStateCreateInfo dynamic_info = {
+            .dynamicStateCount = extended_dynamic_states ? 14u : 6u,
+            .pDynamicStates = dynamic_states.data()
+        };
+
+        const vk::StencilOpState stencil_op_state = {
+            .failOp = PicaToVK::StencilOp(info.depth_stencil.stencil_fail_op),
+            .passOp = PicaToVK::StencilOp(info.depth_stencil.stencil_pass_op),
+            .depthFailOp = PicaToVK::StencilOp(info.depth_stencil.stencil_depth_fail_op),
+            .compareOp = PicaToVK::CompareFunc(info.depth_stencil.stencil_compare_op),
+            .compareMask = static_cast<u32>(info.depth_stencil.stencil_compare_mask.Value()),
+            .writeMask = static_cast<u32>(info.depth_stencil.stencil_write_mask.Value()),
+            .reference = static_cast<u32>(info.depth_stencil.stencil_reference.Value())
+        };
+
+        const vk::PipelineDepthStencilStateCreateInfo depth_info = {
+            .depthTestEnable = static_cast<u32>(info.depth_stencil.depth_test_enable.Value()),
+            .depthWriteEnable = static_cast<u32>(info.depth_stencil.depth_write_enable.Value()),
+            .depthCompareOp = PicaToVK::CompareFunc(info.depth_stencil.depth_compare_op),
+            .depthBoundsTestEnable = false,
+            .stencilTestEnable = static_cast<u32>(info.depth_stencil.stencil_test_enable.Value()),
+            .front = stencil_op_state,
+            .back = stencil_op_state
+        };
+
+        const vk::GraphicsPipelineCreateInfo pipeline_info = {
+            .stageCount = shader_count,
+            .pStages = shader_stages.data(),
+            .pVertexInputState = &vertex_input_info,
+            .pInputAssemblyState = &input_assembly,
+            .pRasterizationState = &raster_state,
+            .pMultisampleState = &multisampling,
+            .pDepthStencilState = &depth_info,
+            .pColorBlendState = &color_blending,
+            .pDynamicState = &dynamic_info,
+            .layout = owner.GetLayout(),
+            .renderPass = {}
+        };
+
+        if (auto result = device.createGraphicsPipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) {
+            pipeline = result.value;
+        } else {
+           LOG_CRITICAL(Render_Vulkan, "Graphics pipeline creation failed!");
+           UNREACHABLE();
+        }
+    } else { // Compute pipeline
+        ASSERT(shader_count == 1);
+        const vk::ComputePipelineCreateInfo pipeline_info = {
+            .stage = shader_stages[0],
+            .layout = owner.GetLayout()
+        };
+
+        if (auto result = device.createComputePipeline(cache, pipeline_info); result.result == vk::Result::eSuccess) {
+            pipeline = result.value;
+        } else {
+           LOG_CRITICAL(Render_Vulkan, "Compute pipeline creation failed!");
+           UNREACHABLE();
+        }
+
+    }
+}
+
+Pipeline::~Pipeline() {
+    vk::Device device = instance.GetDevice();
+    device.destroyPipeline(pipeline);
+}
+
+
+void Pipeline::BindTexture(u32 group, u32 slot, TextureHandle handle) {
+    Texture* texture = static_cast<Texture*>(handle.Get());
+
+    const DescriptorData data = {
+        .image_info = vk::DescriptorImageInfo{
+            .imageView = texture->GetView(),
+            .imageLayout = texture->GetLayout()
+        }
+    };
+
+    owner.SetBinding(group, slot, data);
+}
+
+void Pipeline::BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view) {
+    Buffer* buffer = static_cast<Buffer*>(handle.Get());
+
+    // Texel buffers are bound with their views
+    if (buffer->GetUsage() == BufferUsage::Texel) {
+        const DescriptorData data = {
+            .buffer_view = buffer->GetView(view)
+        };
+
+        owner.SetBinding(group, slot, data);
+    } else {
+        const DescriptorData data = {
+            .buffer_info = vk::DescriptorBufferInfo{
+                .buffer = buffer->GetHandle(),
+                .offset = 0,
+                .range = buffer->GetCapacity()
+            }
+        };
+
+        owner.SetBinding(group, slot, data);
+    }
+}
+
+void Pipeline::BindSampler(u32 group, u32 slot, SamplerHandle handle) {
+    Sampler* sampler = static_cast<Sampler*>(handle.Get());
+
+    const DescriptorData data = {
+        .image_info = vk::DescriptorImageInfo{
+            .sampler = sampler->GetHandle()
+        }
+    };
+
+    owner.SetBinding(group, slot, data);
+}
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline.h
@ -0,0 +1,96 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include "video_core/common/pipeline.h"
+#include "video_core/renderer_vulkan/vk_common.h"
+
+namespace VideoCore::Vulkan {
+
+class Instance;
+class CommandScheduler;
+
+union DescriptorData {
+    vk::DescriptorImageInfo image_info{};
+    vk::DescriptorBufferInfo buffer_info;
+    vk::BufferView buffer_view;
+};
+
+/**
+ * Stores the pipeline layout as well as the descriptor set layouts
+ * and update templates associated with those layouts.
+ * Functions as the "parent" to a group of pipelines that share the same layout
+ */
+class PipelineLayout {
+public:
+    PipelineLayout(Instance& instance, PipelineLayoutInfo info);
+    ~PipelineLayout();
+
+    // Disable copy constructor
+    PipelineLayout(const PipelineLayout&) = delete;
+    PipelineLayout& operator=(const PipelineLayout&) = delete;
+
+    // Assigns data to a particular binding
+    void SetBinding(u32 set, u32 binding, DescriptorData data) {
+        update_data[set][binding] = data;
+    }
+
+    // Returns the most current descriptor update data
+    std::span<DescriptorData> GetData(u32 set) {
+        return std::span{update_data.at(set).data(), set_layout_count};
+    }
+
+    // Returns the underlying vulkan pipeline layout handle
+    vk::PipelineLayout GetLayout() const {
+        return pipeline_layout;
+    }
+
+    // Returns the descriptor set update template handle associated with the provided set index
+    vk::DescriptorUpdateTemplate GetUpdateTemplate(u32 set) const {
+        return update_templates.at(set);
+    }
+
+private:
+    Instance& instance;
+    vk::PipelineLayout pipeline_layout = VK_NULL_HANDLE;
+    u32 set_layout_count = 0;
+    std::array<vk::DescriptorSetLayout, MAX_BINDING_GROUPS> set_layouts;
+    std::array<vk::DescriptorUpdateTemplate, MAX_BINDING_GROUPS> update_templates;
+
+    // Update data for the descriptor sets
+    using SetData = std::array<DescriptorData, MAX_BINDINGS_IN_GROUP>;
+    std::array<SetData, MAX_BINDING_GROUPS> update_data;
+};
+
+class Pipeline : public VideoCore::PipelineBase {
+public:
+    Pipeline(Instance& instance, PipelineLayout& owner,
+             PipelineType type, PipelineInfo info, vk::PipelineCache cache);
+    ~Pipeline() override;
+
+    void BindTexture(u32 group, u32 slot, TextureHandle handle) override;
+
+    void BindBuffer(u32 group, u32 slot, BufferHandle handle, u32 view = 0) override;
+
+    void BindSampler(u32 group, u32 slot, SamplerHandle handle) override;
+
+    /// Returns the layout tracker that owns this pipeline
+    PipelineLayout& GetOwner() const {
+        return owner;
+    }
+
+    /// Returns the underlying vulkan pipeline handle
+    vk::Pipeline GetHandle() const {
+        return pipeline;
+    }
+
+private:
+    Instance& instance;
+    PipelineLayout& owner;
+    vk::Pipeline pipeline;
+};
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_builder.cpp
@ -1,267 +0,0 @@
-// Copyright 2022 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include "video_core/renderer_vulkan/vk_pipeline_builder.h"
-#include "video_core/renderer_vulkan/vk_instance.h"
-#include "video_core/renderer_vulkan/vk_shader_state.h"
-#include <algorithm>
-#include <array>
-#include <type_traits>
-
-namespace Vulkan {
-
-PipelineLayoutBuilder::PipelineLayoutBuilder() {
-    Clear();
-}
-
-void PipelineLayoutBuilder::Clear() {
-    pipeline_layout_info = vk::PipelineLayoutCreateInfo{};
-}
-
-vk::PipelineLayout PipelineLayoutBuilder::Build() {
-    auto device = g_vk_instace->GetDevice();
-
-    auto result = device.createPipelineLayout(pipeline_layout_info);
-    if (!result) {
-        LOG_ERROR(Render_Vulkan, "Failed to create pipeline layout");
-        return VK_NULL_HANDLE;
-    }
-
-    return result;
-}
-
-void PipelineLayoutBuilder::AddDescriptorSet(vk::DescriptorSetLayout layout) {
-    assert(pipeline_layout_info.setLayoutCount < MAX_SETS);
-
-    sets[pipeline_layout_info.setLayoutCount++] = layout;
-    pipeline_layout_info.pSetLayouts = sets.data();
-}
-
-void PipelineLayoutBuilder::AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size) {
-    assert(pipeline_layout_info.pushConstantRangeCount < MAX_PUSH_CONSTANTS);
-
-    push_constants[pipeline_layout_info.pushConstantRangeCount++] = {stages, offset, size};
-    pipeline_layout_info.pPushConstantRanges = push_constants.data();
-}
-
-PipelineBuilder::PipelineBuilder() {
-    Clear();
-}
-
-void PipelineBuilder::Clear() {
-    pipeline_info = vk::GraphicsPipelineCreateInfo{};
-    shader_stages.clear();
-
-    vertex_input_state = vk::PipelineVertexInputStateCreateInfo{};
-    input_assembly = vk::PipelineInputAssemblyStateCreateInfo{};
-    rasterization_state = vk::PipelineRasterizationStateCreateInfo{};
-    depth_state = vk::PipelineDepthStencilStateCreateInfo{};
-
-    blend_state = vk::PipelineColorBlendStateCreateInfo{};
-    blend_attachment = vk::PipelineColorBlendAttachmentState{};
-    dynamic_info = vk::PipelineDynamicStateCreateInfo{};
-    dynamic_states.fill({});
-
-    viewport_state = vk::PipelineViewportStateCreateInfo{};
-    multisample_info = vk::PipelineMultisampleStateCreateInfo{};
-
-    // Set defaults
-    SetNoCullRasterizationState();
-    SetNoDepthTestState();
-    SetNoBlendingState();
-    SetPrimitiveTopology(vk::PrimitiveTopology::eTriangleList);
-
-    // Have to be specified even if dynamic
-    SetViewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
-    SetScissorRect(0, 0, 1, 1);
-    SetBlendConstants(1.0f, 1.0f, 1.0f, 1.0f);
-    SetMultisamples(vk::SampleCountFlagBits::e1, false);
-}
-
-vk::Pipeline PipelineBuilder::Build() {
-    auto device = g_vk_instace->GetDevice();
-
-    auto result = device.createGraphicsPipeline({}, pipeline_info);
-    if (result.result != vk::Result::eSuccess) {
-        LOG_CRITICAL(Render_Vulkan, "Failed to build vulkan pipeline!");
-        UNREACHABLE();
-    }
-
-    return result.value;
-}
-
-void PipelineBuilder::SetPipelineLayout(vk::PipelineLayout layout) {
-    pipeline_info.layout = layout;
-}
-
-void PipelineBuilder::SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module) {
-    auto result = std::ranges::find_if(shader_stages.begin(), shader_stages.end(), [stage](const auto& info) {
-       return info.stage == stage;
-    });
-
-    /* If the stage already exists, just replace the module */
-    if (result != shader_stages.end()) {
-        result->module = module;
-    }
-    else {
-        shader_stages.emplace_back(vk::PipelineShaderStageCreateFlags(), stage, module, "main");
-        pipeline_info.stageCount++;
-    }
-
-    pipeline_info.pStages = shader_stages.data();
-}
-
-void PipelineBuilder::AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate,
-                                      std::span<vk::VertexInputAttributeDescription> attributes) {
-    // Copy attributes to private array
-    auto loc = vertex_attributes.begin() + vertex_input_state.vertexAttributeDescriptionCount;
-    std::copy(attributes.begin(), attributes.end(), loc);
-
-    vertex_buffers[vertex_input_state.vertexBindingDescriptionCount++] = {binding, stride, input_rate};
-    vertex_input_state.vertexAttributeDescriptionCount += attributes.size();
-
-    vertex_input_state.pVertexBindingDescriptions = vertex_buffers.data();
-    vertex_input_state.pVertexAttributeDescriptions = vertex_attributes.data();
-
-    pipeline_info.pVertexInputState = &vertex_input_state;
-}
-
-void PipelineBuilder::SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart) {
-    input_assembly.topology = topology;
-    input_assembly.primitiveRestartEnable = enable_primitive_restart;
-    pipeline_info.pInputAssemblyState = &input_assembly;
-}
-
-void PipelineBuilder::SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode,
-                                            vk::FrontFace front_face) {
-    rasterization_state.polygonMode = polygon_mode;
-    rasterization_state.cullMode = cull_mode;
-    rasterization_state.frontFace = front_face;
-    pipeline_info.pRasterizationState = &rasterization_state;
-}
-
-void PipelineBuilder::SetLineWidth(float width) {
-    rasterization_state.lineWidth = width;
-    pipeline_info.pRasterizationState = &rasterization_state;
-}
-
-void PipelineBuilder::SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading) {
-    multisample_info.rasterizationSamples = samples;
-    multisample_info.sampleShadingEnable = per_sample_shading;
-    multisample_info.minSampleShading = (static_cast<u32>(samples) > 1) ? 1.0f : 0.0f;
-    pipeline_info.pMultisampleState = &multisample_info;
-}
-
-void PipelineBuilder::SetNoCullRasterizationState() {
-    SetRasterizationState(vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone, vk::FrontFace::eClockwise);
-}
-
-void PipelineBuilder::SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op) {
-    depth_state.depthTestEnable = depth_test;
-    depth_state.depthWriteEnable = depth_write;
-    depth_state.depthCompareOp = compare_op;
-    pipeline_info.pDepthStencilState = &depth_state;
-}
-
-void PipelineBuilder::SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back) {
-    depth_state.stencilTestEnable = stencil_test;
-    depth_state.front = front;
-    depth_state.back = back;
-    pipeline_info.pDepthStencilState = &depth_state;
-}
-
-void PipelineBuilder::SetNoStencilState() {
-    depth_state.stencilTestEnable = VK_FALSE;
-    depth_state.front = vk::StencilOpState{};
-    depth_state.back = vk::StencilOpState{};
-}
-
-void PipelineBuilder::SetNoDepthTestState() {
-    SetDepthState(false, false, vk::CompareOp::eAlways);
-}
-
-void PipelineBuilder::SetBlendConstants(float r, float g, float b, float a) {
-    blend_state.blendConstants = std::array<float, 4>{r, g, b, a};
-    pipeline_info.pColorBlendState = &blend_state;
-}
-
-void PipelineBuilder::SetBlendLogicOp(vk::LogicOp logic_op)  {
-    blend_state.logicOp = logic_op;
-    blend_state.logicOpEnable = false;
-}
-
-void PipelineBuilder::SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor,
-                                         vk::BlendOp op, vk::BlendFactor alpha_src_factor,
-                                         vk::BlendFactor alpha_dst_factor, vk::BlendOp alpha_op,
-                                         vk::ColorComponentFlags write_mask) {
-    blend_attachment.blendEnable = blend_enable;
-    blend_attachment.srcColorBlendFactor = src_factor;
-    blend_attachment.dstColorBlendFactor = dst_factor;
-    blend_attachment.colorBlendOp = op;
-    blend_attachment.srcAlphaBlendFactor = alpha_src_factor;
-    blend_attachment.dstAlphaBlendFactor = alpha_dst_factor;
-    blend_attachment.alphaBlendOp = alpha_op;
-    blend_attachment.colorWriteMask = write_mask;
-
-    blend_state.attachmentCount = 1;
-    blend_state.pAttachments = &blend_attachment;
-    pipeline_info.pColorBlendState = &blend_state;
-}
-
-void PipelineBuilder::SetNoBlendingState() {
-    SetBlendAttachment(false, vk::BlendFactor::eOne, vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::BlendFactor::eOne,
-        vk::BlendFactor::eZero, vk::BlendOp::eAdd, vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
-        vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
-}
-
-void PipelineBuilder::SetDynamicStates(const std::span<vk::DynamicState> states) {
-    if (states.size() > MAX_DYNAMIC_STATES) {
-        LOG_ERROR(Render_Vulkan, "Cannot include more dynamic states!");
-        UNREACHABLE();
-    }
-
-    // Copy the state data
-    std::copy(states.begin(), states.end(), dynamic_states.begin());
-    dynamic_info.dynamicStateCount = states.size();
-    dynamic_info.pDynamicStates = dynamic_states.data();
-    pipeline_info.pDynamicState = &dynamic_info;
-    return;
-}
-
-void PipelineBuilder::SetRenderingFormats(vk::Format color, vk::Format depth_stencil) {
-    color_format = color;
-    depth_stencil_format = depth_stencil;
-
-    auto IsStencil = [](vk::Format format) -> bool {
-        switch (format) {
-        case vk::Format::eD16UnormS8Uint:
-        case vk::Format::eD24UnormS8Uint:
-        case vk::Format::eD32SfloatS8Uint:
-            return true;
-        default:
-            return false;
-        };
-    };
-
-    const u32 color_attachment_count = color == vk::Format::eUndefined ? 0 : 1;
-    rendering_info = vk::PipelineRenderingCreateInfo{0, color_attachment_count, &color_format, depth_stencil_format,
-                        IsStencil(depth_stencil) ? depth_stencil : vk::Format::eUndefined};
-    pipeline_info.pNext = &rendering_info;
-}
-
-void PipelineBuilder::SetViewport(float x, float y, float width, float height, float min_depth, float max_depth) {
-    viewport = vk::Viewport{x, y, width, height, min_depth, max_depth};
-    viewport_state.pViewports = &viewport;
-    viewport_state.viewportCount = 1;
-    pipeline_info.pViewportState = &viewport_state;
-}
-
-void PipelineBuilder::SetScissorRect(s32 x, s32 y, u32 width, u32 height) {
-    scissor = vk::Rect2D{{x, y}, {width, height}};
-    viewport_state.scissorCount = 1u;
-    viewport_state.pScissors = &scissor;
-    pipeline_info.pViewportState = &viewport_state;
-}
-
-}  // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_builder.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_builder.h
@ -1,108 +0,0 @@
-// Copyright 2022 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <cstddef>
-#include <map>
-#include <memory>
-#include <string>
-#include <tuple>
-#include <unordered_map>
-#include "video_core/renderer_vulkan/vk_texture.h"
-
-namespace Vulkan {
-
-class PipelineLayoutBuilder {
-public:
-    PipelineLayoutBuilder();
-    ~PipelineLayoutBuilder() = default;
-
-    void Clear();
-    vk::PipelineLayout Build();
-
-    void AddDescriptorSet(vk::DescriptorSetLayout layout);
-    void AddPushConstants(vk::ShaderStageFlags stages, u32 offset, u32 size);
-
-private:
-    static constexpr u32 MAX_SETS = 8;
-    static constexpr u32 MAX_PUSH_CONSTANTS = 5;
-
-    vk::PipelineLayoutCreateInfo pipeline_layout_info;
-    std::array<vk::DescriptorSetLayout, MAX_SETS> sets;
-    std::array<vk::PushConstantRange, MAX_PUSH_CONSTANTS> push_constants;
-};
-
-class PipelineBuilder {
-public:
-    PipelineBuilder();
-    ~PipelineBuilder() = default;
-
-    void Clear();
-    vk::Pipeline Build();
-
-    void SetPipelineLayout(vk::PipelineLayout layout);
-    void AddVertexBuffer(u32 binding, u32 stride, vk::VertexInputRate input_rate,
-                         const std::span<vk::VertexInputAttributeDescription> attributes);
-    void SetShaderStage(vk::ShaderStageFlagBits stage, vk::ShaderModule module);
-
-    void SetPrimitiveTopology(vk::PrimitiveTopology topology, bool enable_primitive_restart = false);
-    void SetLineWidth(float width);
-    void SetMultisamples(vk::SampleCountFlagBits samples, bool per_sample_shading);
-    void SetRasterizationState(vk::PolygonMode polygon_mode, vk::CullModeFlags cull_mode,
-                               vk::FrontFace front_face);
-
-    void SetNoCullRasterizationState();
-    void SetDepthState(bool depth_test, bool depth_write, vk::CompareOp compare_op);
-    void SetStencilState(bool stencil_test, vk::StencilOpState front, vk::StencilOpState back);
-    void SetNoDepthTestState();
-    void SetNoStencilState();
-
-    void SetBlendConstants(float r, float g, float b, float a);
-    void SetNoBlendingState();
-    void SetBlendLogicOp(vk::LogicOp logic_op);
-    void SetBlendAttachment(bool blend_enable, vk::BlendFactor src_factor, vk::BlendFactor dst_factor,
-                            vk::BlendOp op, vk::BlendFactor alpha_src_factor, vk::BlendFactor alpha_dst_factor,
-                            vk::BlendOp alpha_op,vk::ColorComponentFlags write_mask);
-
-    void SetViewport(float x, float y, float width, float height, float min_depth, float max_depth);
-    void SetScissorRect(s32 x, s32 y, u32 width, u32 height);
-    void SetDynamicStates(const std::span<vk::DynamicState> states);
-    void SetRenderingFormats(vk::Format color, vk::Format depth_stencil = vk::Format::eUndefined);
-
-private:
-    static constexpr u32 MAX_DYNAMIC_STATES = 20;
-    static constexpr u32 MAX_SHADER_STAGES = 3;
-    static constexpr u32 MAX_VERTEX_BUFFERS = 8;
-    static constexpr u32 MAX_VERTEX_ATTRIBUTES = 16;
-
-    vk::GraphicsPipelineCreateInfo pipeline_info;
-    std::vector<vk::PipelineShaderStageCreateInfo> shader_stages;
-
-    vk::PipelineVertexInputStateCreateInfo vertex_input_state;
-    std::array<vk::VertexInputBindingDescription, MAX_VERTEX_BUFFERS> vertex_buffers;
-    std::array<vk::VertexInputAttributeDescription, MAX_VERTEX_ATTRIBUTES> vertex_attributes;
-
-    vk::PipelineInputAssemblyStateCreateInfo input_assembly;
-    vk::PipelineRasterizationStateCreateInfo rasterization_state;
-    vk::PipelineDepthStencilStateCreateInfo depth_state;
-
-    // Blending
-    vk::PipelineColorBlendStateCreateInfo blend_state;
-    vk::PipelineColorBlendAttachmentState blend_attachment;
-    vk::PipelineDynamicStateCreateInfo dynamic_info;
-    std::array<vk::DynamicState, MAX_DYNAMIC_STATES> dynamic_states;
-
-    vk::PipelineViewportStateCreateInfo viewport_state;
-    vk::Viewport viewport{0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
-    vk::Rect2D scissor;
-
-    // Multisampling
-    vk::PipelineMultisampleStateCreateInfo multisample_info;
-    vk::PipelineRenderingCreateInfo rendering_info;
-    vk::Format color_format, depth_stencil_format;
-};
-
-}  // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_platform.h
+++ b/src/video_core/renderer_vulkan/vk_platform.h
@ -0,0 +1,130 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+// Include the vulkan platform specific header
+#if defined(ANDROID) || defined (__ANDROID__)
+  #define VK_USE_PLATFORM_ANDROID_KHR 1
+#elif defined(_WIN32)
+  #define VK_USE_PLATFORM_WIN32_KHR 1
+#elif defined(__APPLE__)
+  #define VK_USE_PLATFORM_MACOS_MVK 1
+  #define VK_USE_PLATFORM_METAL_EXT 1
+#else
+  #ifdef WAYLAND_DISPLAY
+    #define VK_USE_PLATFORM_WAYLAND_KHR 1
+  #else // wayland
+    #define VK_USE_PLATFORM_XLIB_KHR 1
+  #endif
+#endif
+
+#define VULKAN_HPP_NO_CONSTRUCTORS
+#include <vector>
+#include "common/logging/log.h"
+#include "core/frontend/emu_window.h"
+#include "video_core/renderer_vulkan/vk_common.h"
+
+namespace VideoCore::Vulkan {
+
+inline vk::SurfaceKHR CreateSurface(const vk::Instance& instance, const Frontend::EmuWindow& emu_window) {
+    const auto& window_info = emu_window.GetWindowInfo();
+    vk::SurfaceKHR surface;
+
+#if VK_USE_PLATFORM_WIN32_KHR
+    if (window_info.type == Frontend::WindowSystemType::Windows) {
+        const vk::Win32SurfaceCreateInfoKHR win32_ci = {
+            .hinstance = nullptr,
+            .hwnd = static_cast<HWND>(window_info.render_surface)
+        };
+
+        if (instance.createWin32SurfaceKHR(&win32_ci, nullptr, &surface) != vk::Result::eSuccess) {
+            LOG_CRITICAL(Render_Vulkan, "Failed to initialize Win32 surface");
+        }
+    }
+#elif VK_USE_PLATFORM_XLIB_KHR
+    if (window_info.type == Frontend::WindowSystemType::X11) {
+        const vk::XlibSurfaceCreateInfoKHR xlib_ci{{},
+            static_cast<Display*>(window_info.display_connection),
+            reinterpret_cast<Window>(window_info.render_surface)};
+        if (instance.createXlibSurfaceKHR(&xlib_ci, nullptr, &surface) != vk::Result::eSuccess) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Xlib surface");
+            UNREACHABLE();
+        }
+    }
+
+#elif VK_USE_PLATFORM_WAYLAND_KHR
+    if (window_info.type == Frontend::WindowSystemType::Wayland) {
+        const vk::WaylandSurfaceCreateInfoKHR wayland_ci{{},
+            static_cast<wl_display*>(window_info.display_connection),
+            static_cast<wl_surface*>(window_info.render_surface)};
+        if (instance.createWaylandSurfaceKHR(&wayland_ci, nullptr, &surface) != vk::Result::eSuccess) {
+            LOG_ERROR(Render_Vulkan, "Failed to initialize Wayland surface");
+            UNREACHABLE();
+        }
+    }
+#endif
+
+    if (!surface) {
+        LOG_CRITICAL(Render_Vulkan, "Presentation not supported on this platform");
+    }
+
+    return surface;
+}
+
+inline auto GetInstanceExtensions(Frontend::WindowSystemType window_type, bool enable_debug_utils) {
+    const auto properties = vk::enumerateInstanceExtensionProperties();
+    if (properties.empty()) {
+        LOG_ERROR(Render_Vulkan, "Failed to query extension properties");
+        return std::vector<const char*>{};
+    }
+
+    // Add the windowing system specific extension
+    std::vector<const char*> extensions;
+    extensions.reserve(6);
+
+    switch (window_type) {
+    case Frontend::WindowSystemType::Headless:
+        break;
+#if VK_USE_PLATFORM_WIN32_KHR
+    case Frontend::WindowSystemType::Windows:
+        extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME);
+        break;
+#elif VK_USE_PLATFORM_XLIB_KHR
+    case Frontend::WindowSystemType::X11:
+        extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME);
+        break;
+#elif VK_USE_PLATFORM_WAYLAND_KHR
+    case Frontend::WindowSystemType::Wayland:
+        extensions.push_back(VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME);
+        break;
+#endif
+    default:
+        LOG_ERROR(Render_Vulkan, "Presentation not supported on this platform");
+        break;
+    }
+
+    if (window_type != Frontend::WindowSystemType::Headless) {
+        extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
+    }
+
+    if (enable_debug_utils) {
+        extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+    }
+
+    for (const char* extension : extensions) {
+        const auto iter = std::ranges::find_if(properties, [extension](const auto& prop) {
+            return std::strcmp(extension, prop.extensionName) == 0;
+        });
+
+        if (iter == properties.end()) {
+            LOG_ERROR(Render_Vulkan, "Required instance extension {} is not available", extension);
+            return std::vector<const char*>{};
+        }
+    }
+
+    return extensions;
+}
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -5,15 +5,10 @@
 #include <algorithm>
 #include <memory>
 #include <string>
-#include <tuple>
 #include <utility>
-#include <glad/glad.h>
 #include "common/alignment.h"
-#include "common/assert.h"
-#include "common/logging/log.h"
 #include "common/math_util.h"
 #include "common/microprofile.h"
-#include "common/scope_exit.h"
 #include "common/vector_math.h"
 #include "core/hw/gpu.h"
 #include "video_core/pica_state.h"
@ -21,27 +16,45 @@
 #include "video_core/regs_rasterizer.h"
 #include "video_core/regs_texturing.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_vulkan/vk_surface_params.h"
 #include "video_core/renderer_vulkan/pica_to_vulkan.h"
 #include "video_core/renderer_vulkan/renderer_vulkan.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_task_scheduler.h"
-#include "video_core/video_core.h"

 namespace Vulkan {

+MICROPROFILE_DEFINE(Vulkan_VS, "Vulkan", "Vertex Shader Setup", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_GS, "Vulkan", "Geometry Shader Setup", MP_RGB(128, 192, 128));
+MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Drawing", MP_RGB(128, 128, 192));
+MICROPROFILE_DEFINE(Vulkan_Blits, "Vulkan", "Blits", MP_RGB(100, 100, 255));
+MICROPROFILE_DEFINE(Vulkan_CacheManagement, "Vulkan", "Cache Management", MP_RGB(100, 255, 100));
+
 using PixelFormat = SurfaceParams::PixelFormat;
 using SurfaceType = SurfaceParams::SurfaceType;

-MICROPROFILE_DEFINE(OpenGL_VAO, "OpenGL", "Vertex Array Setup", MP_RGB(255, 128, 0));
-MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
-MICROPROFILE_DEFINE(OpenGL_GS, "OpenGL", "Geometry Shader Setup", MP_RGB(128, 192, 128));
-MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
-MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
-MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
+// They shall be big enough for about one frame.
+constexpr u32 VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
+constexpr u32 INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
+constexpr u32 UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
+constexpr u32 TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
+
+constexpr std::array LUT_LF_VIEWS = {
+    vk::Format::eR32G32Sfloat
+};
+
+constexpr std::array LUT_VIEWS = {
+    vk::Format::eR32G32Sfloat,
+    vk::Format::eR32G32B32A32Sfloat
+};
+
+RasterizerVulkan::RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window) :
+    scheduler(scheduler), vertex_buffer(scheduler, VERTEX_BUFFER_SIZE, BufferUsage::Vertex),
+    index_buffer(scheduler, INDEX_BUFFER_SIZE, BufferUsage::Index),
+    uniform_buffer(scheduler, UNIFORM_BUFFER_SIZE, BufferUsage::Uniform),
+    texture_buffer_lut_lf(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_LF_VIEWS),
+    texture_buffer_lut(scheduler, TEXTURE_BUFFER_SIZE, BufferUsage::UniformTexel, LUT_VIEWS) {

-RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
    // Implement shadow
    allow_shadow = false;

@ -65,29 +78,6 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
                                                           uniform_buffer_alignment);
    uniform_size_aligned_fs = Common::AlignUp<std::size_t>(sizeof(UniformData),
                                                           uniform_buffer_alignment);
-    // Allocate texture buffer LUTs
-    Buffer::Info texel_buffer_info = {
-        .size = TEXTURE_BUFFER_SIZE,
-        .properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
-        .usage = vk::BufferUsageFlagBits::eUniformTexelBuffer |
-        vk::BufferUsageFlagBits::eTransferDst,
-    };
-
-    texel_buffer_info.view_formats[0] = vk::Format::eR32G32Sfloat;
-    texture_buffer_lut_lf.Create(texel_buffer_info);
-
-    texel_buffer_info.view_formats[1] = vk::Format::eR32G32B32A32Sfloat;
-    texture_buffer_lut.Create(texel_buffer_info);
-
-    // Create and bind uniform buffers
-    Buffer::Info uniform_info = {
-        .size = UNIFORM_BUFFER_SIZE,
-        .properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
-        .usage = vk::BufferUsageFlagBits::eUniformBuffer |
-        vk::BufferUsageFlagBits::eTransferDst
-    };
-
-    uniform_buffer.Create(uniform_info);
    auto& state = VulkanState::Get();
    state.SetUniformBuffer(0, 0, uniform_size_aligned_vs, uniform_buffer);
    state.SetUniformBuffer(1, uniform_size_aligned_vs, uniform_size_aligned_fs, uniform_buffer);
@ -97,26 +87,8 @@ RasterizerVulkan::RasterizerVulkan(Frontend::EmuWindow& emu_window) {
    state.SetTexelBuffer(1, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 0);
    state.SetTexelBuffer(2, 0, TEXTURE_BUFFER_SIZE, texture_buffer_lut, 1);

-    // Create vertex and index buffers
-    Buffer::Info vertex_info = {
-        .size = VERTEX_BUFFER_SIZE,
-        .properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
-        .usage = vk::BufferUsageFlagBits::eVertexBuffer |
-                 vk::BufferUsageFlagBits::eTransferDst
-    };
-
-    Buffer::Info index_info = {
-        .size = INDEX_BUFFER_SIZE,
-        .properties = vk::MemoryPropertyFlagBits::eDeviceLocal,
-        .usage = vk::BufferUsageFlagBits::eIndexBuffer |
-        vk::BufferUsageFlagBits::eTransferDst
-    };
-
-    vertex_buffer.Create(vertex_info);
-    index_buffer.Create(index_info);
-
    // Set clear texture color
-    state.SetPlaceholderColor(255, 0, 0, 255);
+    state.SetPlaceholderColor(255, 255, 255, 255);

    SyncEntireState();
 }
@ -238,7 +210,7 @@ void RasterizerVulkan::DrawTriangles() {
 }

 bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
-    MICROPROFILE_SCOPE(OpenGL_Drawing);
+    MICROPROFILE_SCOPE(Vulkan_Drawing);
    const auto& regs = Pica::g_state.regs;
    auto& state = VulkanState::Get();

@ -252,6 +224,7 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {

    const bool using_color_fb =
        regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0;
+
    const bool using_depth_fb =
        !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
        (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0);
@ -304,15 +277,15 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {

    // Scissor checks are window-, not viewport-relative, which means that if the cached texture
    // sub-rect changes, the scissor bounds also need to be updated.
-    GLint scissor_x1 =
-        static_cast<GLint>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
-    GLint scissor_y1 =
-        static_cast<GLint>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
+    int scissor_x1 =
+        static_cast<int>(surfaces_rect.left + regs.rasterizer.scissor_test.x1 * res_scale);
+    int scissor_y1 =
+        static_cast<int>(surfaces_rect.bottom + regs.rasterizer.scissor_test.y1 * res_scale);
    // x2, y2 have +1 added to cover the entire pixel area, otherwise you might get cracks when
    // scaling or doing multisampling.
-    GLint scissor_x2 =
-        static_cast<GLint>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
-    GLint scissor_y2 = static_cast<GLint>(surfaces_rect.bottom +
+    int scissor_x2 =
+        static_cast<int>(surfaces_rect.left + (regs.rasterizer.scissor_test.x2 + 1) * res_scale);
+    int scissor_y2 = static_cast<int>(surfaces_rect.bottom +
                                          (regs.rasterizer.scissor_test.y2 + 1) * res_scale);

    if (uniform_block_data.data.scissor_x1 != scissor_x1 ||
@ -420,8 +393,6 @@ bool RasterizerVulkan::Draw(bool accelerate, bool is_indexed) {
        depth_surface->texture.Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
    }

-    g_vk_task_scheduler->Submit();
-
    return true;
 }

@ -924,22 +895,22 @@ void RasterizerVulkan::NotifyPicaRegisterChanged(u32 id) {
 }

 void RasterizerVulkan::FlushAll() {
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    MICROPROFILE_SCOPE(Vulkan_CacheManagement);
    res_cache.FlushAll();
 }

 void RasterizerVulkan::FlushRegion(PAddr addr, u32 size) {
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    MICROPROFILE_SCOPE(Vulkan_CacheManagement);
    res_cache.FlushRegion(addr, size);
 }

 void RasterizerVulkan::InvalidateRegion(PAddr addr, u32 size) {
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    MICROPROFILE_SCOPE(Vulkan_CacheManagement);
    res_cache.InvalidateRegion(addr, size, nullptr);
 }

 void RasterizerVulkan::FlushAndInvalidateRegion(PAddr addr, u32 size) {
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    MICROPROFILE_SCOPE(Vulkan_CacheManagement);
    res_cache.FlushRegion(addr, size);
    res_cache.InvalidateRegion(addr, size, nullptr);
 }
@ -949,7 +920,7 @@ void RasterizerVulkan::ClearAll(bool flush) {
 }

 bool RasterizerVulkan::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) {
-    MICROPROFILE_SCOPE(OpenGL_Blits);
+    MICROPROFILE_SCOPE(Vulkan_Blits);

    SurfaceParams src_params;
    src_params.addr = config.GetPhysicalInputAddress();
@ -1099,7 +1070,7 @@ bool RasterizerVulkan::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con
    if (framebuffer_addr == 0) {
        return false;
    }
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    MICROPROFILE_SCOPE(Vulkan_CacheManagement);

    SurfaceParams src_params;
    src_params.addr = framebuffer_addr;
@ -1421,7 +1392,7 @@ void RasterizerVulkan::SyncLightSpotDirection(int light_index) {
 }

 void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
-    GLfloat dist_atten_bias =
+    float dist_atten_bias =
        Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
            .ToFloat32();

@ -1432,7 +1403,7 @@ void RasterizerVulkan::SyncLightDistanceAttenuationBias(int light_index) {
 }

 void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {
-    GLfloat dist_atten_scale =
+    float dist_atten_scale =
        Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale)
            .ToFloat32();

@ -1444,8 +1415,8 @@ void RasterizerVulkan::SyncLightDistanceAttenuationScale(int light_index) {

 void RasterizerVulkan::SyncShadowBias() {
    const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
-    GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
-    GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
+    float constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
+    float linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();

    if (constant != uniform_block_data.data.shadow_bias_constant ||
        linear != uniform_block_data.data.shadow_bias_linear) {
@ -1456,7 +1427,7 @@ void RasterizerVulkan::SyncShadowBias() {
 }

 void RasterizerVulkan::SyncShadowTextureBias() {
-    GLint bias = Pica::g_state.regs.texturing.shadow.bias << 1;
+    int bias = Pica::g_state.regs.texturing.shadow.bias << 1;
    if (bias != uniform_block_data.data.shadow_texture_bias) {
        uniform_block_data.data.shadow_texture_bias = bias;
        uniform_block_data.dirty = true;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@ -5,21 +5,11 @@
 #pragma once

 #include <array>
-#include <cstddef>
-#include <cstring>
-#include <memory>
 #include <vector>
 #include <glm/glm.hpp>
-#include "common/bit_field.h"
 #include "common/common_types.h"
-#include "common/vector_math.h"
-#include "core/hw/gpu.h"
-#include "video_core/pica_state.h"
-#include "video_core/pica_types.h"
 #include "video_core/rasterizer_interface.h"
-#include "video_core/regs_framebuffer.h"
 #include "video_core/regs_lighting.h"
-#include "video_core/regs_rasterizer.h"
 #include "video_core/regs_texturing.h"
 #include "video_core/shader/shader.h"
 #include "video_core/renderer_vulkan/vk_state.h"
@ -31,7 +21,11 @@ class EmuWindow;

 namespace Vulkan {

-enum class UniformBindings : u32 { Common, VS, GS };
+enum class UniformBindings : u32 {
+    Common = 0,
+    VertexShader = 1,
+    GeometryShader = 2
+};

 struct LightSrc {
    alignas(16) glm::vec3 specular_0;
@ -79,14 +73,13 @@ struct UniformData {
    alignas(16) glm::vec4 clip_coef;
 };

-static_assert(
-    sizeof(UniformData) == 0x4F0,
+static_assert(sizeof(UniformData) == 0x4F0,
              "The size of the UniformData structure has changed, update the structure in the shader");
 static_assert(sizeof(UniformData) < 16384,
              "UniformData structure must be less than 16kb as per the OpenGL spec");

 /// Uniform struct for the Uniform Buffer Object that contains PICA vertex/geometry shader uniforms.
-// NOTE: the same rule from UniformData also applies here.
+/// NOTE: the same rule from UniformData also applies here.
 struct PicaUniformsData {
    void SetFromRegs(const Pica::ShaderRegs& regs, const Pica::Shader::ShaderSetup& setup);

@ -102,17 +95,18 @@ struct PicaUniformsData {
 struct VSUniformData {
    PicaUniformsData uniforms;
 };
-static_assert(
-    sizeof(VSUniformData) == 1856,
+
+static_assert(sizeof(VSUniformData) == 1856,
              "The size of the VSUniformData structure has changed, update the structure in the shader");
 static_assert(sizeof(VSUniformData) < 16384,
              "VSUniformData structure must be less than 16kb as per the OpenGL spec");

 struct ScreenInfo;
+class CommandScheduler;

 class RasterizerVulkan : public VideoCore::RasterizerInterface {
 public:
-    explicit RasterizerVulkan(Frontend::EmuWindow& emu_window);
+    explicit RasterizerVulkan(CommandScheduler& scheduler, Frontend::EmuWindow& emu_window);
    ~RasterizerVulkan() override;

    void LoadDiskResources(const std::atomic_bool& stop_loading,
@ -252,6 +246,7 @@ private:
    };

 private:
+    CommandScheduler& scheduler;
    RasterizerCacheVulkan res_cache;
    std::vector<HardwareVertex> vertex_batch;
    bool shader_dirty = true;
@ -269,13 +264,7 @@ private:
        bool dirty;
    } uniform_block_data = {};

-    // They shall be big enough for about one frame.
-    static constexpr std::size_t VERTEX_BUFFER_SIZE = 64 * 1024 * 1024;
-    static constexpr std::size_t INDEX_BUFFER_SIZE = 16 * 1024 * 1024;
-    static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
-    static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
-
-    Buffer vertex_buffer, index_buffer;
+    StreamBuffer vertex_buffer, index_buffer;
    StreamBuffer uniform_buffer, texture_buffer_lut_lf, texture_buffer_lut;

    u32 uniform_buffer_alignment;
@ -293,4 +282,4 @@ private:
    bool allow_shadow{};
 };

-} // namespace OpenGL
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.cpp
@ -16,19 +16,11 @@
 #include <boost/range/iterator_range.hpp>
 #include "common/alignment.h"
 #include "common/bit_field.h"
-#include "common/color.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
-#include "common/scope_exit.h"
-#include "common/texture.h"
 #include "common/vector_math.h"
-#include "core/core.h"
-#include "core/frontend/emu_window.h"
-#include "core/hle/kernel/process.h"
 #include "core/memory.h"
-#include "core/settings.h"
 #include "video_core/pica_state.h"
-#include "video_core/renderer_base.h"
 #include "video_core/renderer_vulkan/vk_task_scheduler.h"
 #include "video_core/renderer_vulkan/vk_rasterizer_cache.h"
 #include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
@ -375,7 +367,7 @@ static vk::Rect2D FromRect(Common::Rectangle<u32> rect) {

 // Allocate an uninitialized texture of appropriate size and format for the surface
 void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, vk::Format format,
-                                                        u32 width, u32 height) {
+                                                        u32 width, u32 height, bool framebuffer) {
    // First check if the texture can be recycled
    auto recycled_tex = host_texture_recycler.find({format, width, height});
    if (recycled_tex != host_texture_recycler.end()) {
@ -384,11 +376,12 @@ void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, v
        return;
    }

-    auto GetUsage = [](SurfaceType type) {
+    auto GetUsage = [framebuffer](SurfaceType type) {
        auto usage = vk::ImageUsageFlagBits::eSampled |
                vk::ImageUsageFlagBits::eTransferDst |
                vk::ImageUsageFlagBits::eTransferSrc;

+        if (framebuffer) {
            switch (type) {
            case SurfaceType::Color:
            case SurfaceType::Fill:
@ -402,12 +395,12 @@ void RasterizerCacheVulkan::AllocateTexture(Texture& target, SurfaceType type, v
            default:
                break;
            }
-
+        }
        return usage;
    };

    // Otherwise create a brand new texture
-    u32 levels = std::log2(std::max(width, height)) + 1;
+    u32 levels = static_cast<u32>(std::log2(std::max(width, height))) + 1;
    Texture::Info texture_info{
        .width = width,
        .height = height,
@ -516,8 +509,9 @@ void CachedSurface::LoadGPUBuffer(PAddr load_start, PAddr load_end) {
    const bool need_swap = (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);

    const u8* const texture_src_data = VideoCore::g_memory->GetPhysicalPointer(addr);
-    if (texture_src_data == nullptr)
+    if (texture_src_data == nullptr) {
        return;
+    }

    if (vk_buffer.empty()) {
        vk_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
@ -660,9 +654,9 @@ void CachedSurface::UploadGPUTexture(Common::Rectangle<u32> rect) {
    // Load data from memory to the surface
    auto buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format);
    auto update_size = rect.GetWidth() * rect.GetHeight() * GetBytesPerPixel(pixel_format);
-    std::span<u8> memory(vk_buffer.data() + buffer_offset, update_size);
+    std::span<const u8> memory{vk_buffer.data() + buffer_offset, update_size};

-    texture.Upload(0, 0, stride, FromRect(rect), memory);
+    texture.Upload(0, 0, stride, memory);

    InvalidateAllWatcher();
 }
@ -867,7 +861,8 @@ Surface RasterizerCacheVulkan::GetSurface(const SurfaceParams& params, ScaleMatc

 SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams& params,
                                                           ScaleMatch match_res_scale,
-                                                           bool load_if_create) {
+                                                           bool load_if_create,
+                                                           bool framebuffer) {
    if (params.addr == 0 || params.height * params.width == 0) {
        return std::make_tuple(nullptr, Common::Rectangle<u32>{});
    }
@ -887,7 +882,7 @@ SurfaceRect_Tuple RasterizerCacheVulkan::GetSurfaceSubRect(const SurfaceParams&
            SurfaceParams new_params = *surface;
            new_params.res_scale = params.res_scale;

-            surface = CreateSurface(new_params);
+            surface = CreateSurface(new_params, framebuffer);
            RegisterSurface(surface);
        }
    }
@ -1077,8 +1072,7 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces(
    // Make sure that framebuffers don't overlap if both color and depth are being used
    if (using_color_fb && using_depth_fb &&
        boost::icl::length(color_vp_interval & depth_vp_interval)) {
-        LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; "
-                                    "overlapping framebuffers not supported!");
+        LOG_CRITICAL(Render_Vulkan, "Color and depth framebuffer memory regions overlap!");
        using_depth_fb = false;
    }

@ -1086,13 +1080,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheVulkan::GetFramebufferSurfaces(
    Surface color_surface = nullptr;
    if (using_color_fb)
        std::tie(color_surface, color_rect) =
-            GetSurfaceSubRect(color_params, ScaleMatch::Exact, false);
+            GetSurfaceSubRect(color_params, ScaleMatch::Exact, false, true);

    Common::Rectangle<u32> depth_rect{};
    Surface depth_surface = nullptr;
    if (using_depth_fb)
        std::tie(depth_surface, depth_rect) =
-            GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false);
+            GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false, true);

    Common::Rectangle<u32> fb_rect{};
    if (color_surface != nullptr && depth_surface != nullptr) {
@ -1450,13 +1444,13 @@ void RasterizerCacheVulkan::InvalidateRegion(PAddr addr, u32 size, const Surface
    remove_surfaces.clear();
 }

-Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params) {
+Surface RasterizerCacheVulkan::CreateSurface(const SurfaceParams& params, bool framebuffer) {
    Surface surface = std::make_shared<CachedSurface>(*this);
    static_cast<SurfaceParams&>(*surface) = params;

    surface->invalid_regions.insert(surface->GetInterval());
    AllocateTexture(surface->texture, params.type, GetFormatTuple(surface->pixel_format),
-                    surface->GetScaledWidth(), surface->GetScaledHeight());
+                    surface->GetScaledWidth(), surface->GetScaledHeight(), framebuffer);
    return surface;
 }

--- a/src/video_core/renderer_vulkan/vk_rasterizer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer_cache.h
@ -14,6 +14,7 @@
 #include <boost/icl/interval_set.hpp>
 #include <unordered_map>
 #include <boost/functional/hash.hpp>
+#include <robin_hood.h>
 #include "common/assert.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@ -22,6 +23,10 @@
 #include "video_core/renderer_vulkan/vk_texture.h"
 #include "video_core/texture/texture_decode.h"

+// Can be changed later here
+template <typename Key, typename T, typename Hash = typename Key::Hash>
+using HashMap = robin_hood::unordered_flat_map<Key, T, Hash>;
+
 namespace Vulkan {

 class RasterizerCacheVulkan;
@ -31,32 +36,25 @@ class FormatReinterpreterVulkan;
 vk::Format GetFormatTuple(SurfaceParams::PixelFormat pixel_format);

 struct HostTextureTag {
-    vk::Format format;
-    u32 width;
-    u32 height;
-    bool operator==(const HostTextureTag& rhs) const noexcept {
-        return std::tie(format, width, height) == std::tie(rhs.format, rhs.width, rhs.height);
-    };
+    vk::Format format = vk::Format::eUndefined;
+    u32 width = 0, height = 0;
+
+    // Enable comparisons
+    auto operator<=>(const HostTextureTag& other) const = default;
 };

 struct TextureCubeConfig {
-    PAddr px;
-    PAddr nx;
-    PAddr py;
-    PAddr ny;
-    PAddr pz;
-    PAddr nz;
-    u32 width;
+    PAddr px = 0;
+    PAddr nx = 0;
+    PAddr py = 0;
+    PAddr ny = 0;
+    PAddr pz = 0;
+    PAddr nz = 0;
+    u32 width = 0;
    Pica::TexturingRegs::TextureFormat format;

-    bool operator==(const TextureCubeConfig& rhs) const {
-        return std::tie(px, nx, py, ny, pz, nz, width, format) ==
-               std::tie(rhs.px, rhs.nx, rhs.py, rhs.ny, rhs.pz, rhs.nz, rhs.width, rhs.format);
-    }
-
-    bool operator!=(const TextureCubeConfig& rhs) const {
-        return !(*this == rhs);
-    }
+    // Enable comparisons
+    auto operator<=>(const TextureCubeConfig& other) const = default;
 };

 } // namespace Vulkan
@ -98,6 +96,7 @@ using SurfaceRegions = boost::icl::interval_set<PAddr, std::less, SurfaceInterva
 using SurfaceMap =
    boost::icl::interval_map<PAddr, Surface, boost::icl::partial_absorber, std::less,
                             boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
+
 using SurfaceCache =
    boost::icl::interval_map<PAddr, SurfaceSet, boost::icl::partial_absorber, std::less,
                             boost::icl::inplace_plus, boost::icl::inter_section, SurfaceInterval>;
@ -109,8 +108,6 @@ static_assert(std::is_same<SurfaceRegions::interval_type, SurfaceCache::interval
 using SurfaceRect_Tuple = std::tuple<Surface, Common::Rectangle<u32>>;
 using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>;

-using PageMap = boost::icl::interval_map<u32, int>;
-
 enum class ScaleMatch {
    Exact,   // only accept same res scale
    Upscale, // only allow higher scale than params
@ -265,7 +262,7 @@ public:
    /// Attempt to find a subrect (resolution scaled) of a surface, otherwise loads a texture from
    /// 3DS memory to OpenGL and caches it (if not already cached)
    SurfaceRect_Tuple GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale,
-                                        bool load_if_create);
+                                        bool load_if_create, bool framebuffer = false);

    /// Get a surface based on the texture configuration
    Surface GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config);
@ -306,9 +303,9 @@ private:
    void ValidateSurface(const Surface& surface, PAddr addr, u32 size);

    // Returns false if there is a surface in the cache at the interval with the same bit-width,
-    bool NoUnimplementedReinterpretations(const Vulkan::Surface& surface,
-                                          Vulkan::SurfaceParams& params,
-                                          const Vulkan::SurfaceInterval& interval);
+    bool NoUnimplementedReinterpretations(const Surface& surface,
+                                          SurfaceParams& params,
+                                          const SurfaceInterval& interval);

    // Return true if a surface with an invalid pixel format exists at the interval
    bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval);
@ -318,7 +315,7 @@ private:
                                    const SurfaceInterval& interval);

    /// Create a new surface
-    Surface CreateSurface(const SurfaceParams& params);
+    Surface CreateSurface(const SurfaceParams& params, bool framebuffer = false);

    /// Register surface into the cache
    void RegisterSurface(const Surface& surface);
@ -330,20 +327,20 @@ private:
    void UpdatePagesCachedCount(PAddr addr, u32 size, int delta);

    SurfaceCache surface_cache;
-    PageMap cached_pages;
+    boost::icl::interval_map<u32, int> cached_pages;
    SurfaceMap dirty_regions;
    SurfaceSet remove_surfaces;

    u16 resolution_scale_factor;

+    // Texture cube cache
    std::unordered_map<TextureCubeConfig, CachedTextureCube> texture_cube_cache;

    std::recursive_mutex mutex;

 public:
    void AllocateTexture(Texture& target, SurfaceParams::SurfaceType type, vk::Format format,
-                                     u32 width, u32 height);
-    std::unique_ptr<FormatReinterpreterVulkan> format_reinterpreter;
+                                     u32 width, u32 height, bool framebuffer);
 };

-} // namespace OpenGL
+} // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader.cpp
@ -0,0 +1,234 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#define VULKAN_HPP_NO_CONSTRUCTORS
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "video_core/renderer_vulkan/vk_shader.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+#include <glslang/Public/ShaderLang.h>
+#include <glslang/SPIRV/GlslangToSpv.h>
+#include <glslang/Include/ResourceLimits.h>
+
+constexpr TBuiltInResource DefaultTBuiltInResource = {
+    .maxLights = 32,
+    .maxClipPlanes = 6,
+    .maxTextureUnits = 32,
+    .maxTextureCoords = 32,
+    .maxVertexAttribs = 64,
+    .maxVertexUniformComponents = 4096,
+    .maxVaryingFloats = 64,
+    .maxVertexTextureImageUnits = 32,
+    .maxCombinedTextureImageUnits = 80,
+    .maxTextureImageUnits = 32,
+    .maxFragmentUniformComponents = 4096,
+    .maxDrawBuffers = 32,
+    .maxVertexUniformVectors = 128,
+    .maxVaryingVectors = 8,
+    .maxFragmentUniformVectors = 16,
+    .maxVertexOutputVectors = 16,
+    .maxFragmentInputVectors = 15,
+    .minProgramTexelOffset = -8,
+    .maxProgramTexelOffset = 7,
+    .maxClipDistances = 8,
+    .maxComputeWorkGroupCountX = 65535,
+    .maxComputeWorkGroupCountY = 65535,
+    .maxComputeWorkGroupCountZ = 65535,
+    .maxComputeWorkGroupSizeX = 1024,
+    .maxComputeWorkGroupSizeY = 1024,
+    .maxComputeWorkGroupSizeZ = 64,
+    .maxComputeUniformComponents = 1024,
+    .maxComputeTextureImageUnits = 16,
+    .maxComputeImageUniforms = 8,
+    .maxComputeAtomicCounters = 8,
+    .maxComputeAtomicCounterBuffers = 1,
+    .maxVaryingComponents = 60,
+    .maxVertexOutputComponents = 64,
+    .maxGeometryInputComponents = 64,
+    .maxGeometryOutputComponents = 128,
+    .maxFragmentInputComponents = 128,
+    .maxImageUnits = 8,
+    .maxCombinedImageUnitsAndFragmentOutputs = 8,
+    .maxCombinedShaderOutputResources = 8,
+    .maxImageSamples = 0,
+    .maxVertexImageUniforms = 0,
+    .maxTessControlImageUniforms = 0,
+    .maxTessEvaluationImageUniforms = 0,
+    .maxGeometryImageUniforms = 0,
+    .maxFragmentImageUniforms = 8,
+    .maxCombinedImageUniforms = 8,
+    .maxGeometryTextureImageUnits = 16,
+    .maxGeometryOutputVertices = 256,
+    .maxGeometryTotalOutputComponents = 1024,
+    .maxGeometryUniformComponents = 1024,
+    .maxGeometryVaryingComponents = 64,
+    .maxTessControlInputComponents = 128,
+    .maxTessControlOutputComponents = 128,
+    .maxTessControlTextureImageUnits = 16,
+    .maxTessControlUniformComponents = 1024,
+    .maxTessControlTotalOutputComponents = 4096,
+    .maxTessEvaluationInputComponents = 128,
+    .maxTessEvaluationOutputComponents = 128,
+    .maxTessEvaluationTextureImageUnits = 16,
+    .maxTessEvaluationUniformComponents = 1024,
+    .maxTessPatchComponents = 120,
+    .maxPatchVertices = 32,
+    .maxTessGenLevel = 64,
+    .maxViewports = 16,
+    .maxVertexAtomicCounters = 0,
+    .maxTessControlAtomicCounters = 0,
+    .maxTessEvaluationAtomicCounters = 0,
+    .maxGeometryAtomicCounters = 0,
+    .maxFragmentAtomicCounters = 8,
+    .maxCombinedAtomicCounters = 8,
+    .maxAtomicCounterBindings = 1,
+    .maxVertexAtomicCounterBuffers = 0,
+    .maxTessControlAtomicCounterBuffers = 0,
+    .maxTessEvaluationAtomicCounterBuffers = 0,
+    .maxGeometryAtomicCounterBuffers = 0,
+    .maxFragmentAtomicCounterBuffers = 1,
+    .maxCombinedAtomicCounterBuffers = 1,
+    .maxAtomicCounterBufferSize = 16384,
+    .maxTransformFeedbackBuffers = 4,
+    .maxTransformFeedbackInterleavedComponents = 64,
+    .maxCullDistances = 8,
+    .maxCombinedClipAndCullDistances = 8,
+    .maxSamples = 4,
+    .maxMeshOutputVerticesNV = 256,
+    .maxMeshOutputPrimitivesNV = 512,
+    .maxMeshWorkGroupSizeX_NV = 32,
+    .maxMeshWorkGroupSizeY_NV = 1,
+    .maxMeshWorkGroupSizeZ_NV = 1,
+    .maxTaskWorkGroupSizeX_NV = 32,
+    .maxTaskWorkGroupSizeY_NV = 1,
+    .maxTaskWorkGroupSizeZ_NV = 1,
+    .maxMeshViewCountNV = 4,
+    .maxDualSourceDrawBuffersEXT = 1,
+    .limits = TLimits{
+        .nonInductiveForLoops = 1,
+        .whileLoops = 1,
+        .doWhileLoops = 1,
+        .generalUniformIndexing = 1,
+        .generalAttributeMatrixVectorIndexing = 1,
+        .generalVaryingIndexing = 1,
+        .generalSamplerIndexing = 1,
+        .generalVariableIndexing = 1,
+        .generalConstantMatrixVectorIndexing = 1,
+    }};
+
+
+namespace VideoCore::Vulkan {
+
+EShLanguage ToEshShaderStage(ShaderStage stage) {
+    switch (stage) {
+    case ShaderStage::Vertex:
+        return EShLanguage::EShLangVertex;
+    case ShaderStage::Geometry:
+        return EShLanguage::EShLangGeometry;
+    case ShaderStage::Fragment:
+        return EShLanguage::EShLangFragment;
+    case ShaderStage::Compute:
+        return EShLanguage::EShLangCompute;
+    default:
+        LOG_CRITICAL(Render_Vulkan, "Unkown shader stage");
+        UNREACHABLE();
+    }
+}
+
+bool InitializeCompiler() {
+    static bool glslang_initialized = false;
+
+    if (glslang_initialized) {
+        return true;
+    }
+
+    if (!glslang::InitializeProcess()) {
+        LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
+        return false;
+    }
+
+    std::atexit([]() { glslang::FinalizeProcess(); });
+
+    glslang_initialized = true;
+    return true;
+}
+
+Shader::Shader(Instance& instance, ShaderStage stage, std::string_view name,
+               std::string&& source) :
+    ShaderBase(stage, name, std::move(source)), instance(instance) {
+}
+
+Shader::~Shader() {
+    vk::Device device = instance.GetDevice();
+    device.destroyShaderModule(module);
+}
+
+bool Shader::Compile(ShaderOptimization level) {
+    if (!InitializeCompiler()) {
+        return false;
+    }
+
+    EProfile profile = ECoreProfile;
+    EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
+    EShLanguage lang = ToEshShaderStage(stage);
+
+    int default_version = 450;
+    const char* pass_source_code = source.c_str();
+    int pass_source_code_length = source.size();
+
+    auto shader = std::make_unique<glslang::TShader>(lang);
+    shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
+    shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
+
+    glslang::TShader::ForbidIncluder includer;
+    if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
+        LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
+        return false;
+    }
+
+    // Even though there's only a single shader, we still need to link it to generate SPV
+    auto program = std::make_unique<glslang::TProgram>();
+    program->addShader(shader.get());
+    if (!program->link(messages)) {
+        LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
+        return false;
+    }
+
+    glslang::TIntermediate* intermediate = program->getIntermediate(lang);
+    std::vector<u32> out_code;
+    spv::SpvBuildLogger logger;
+    glslang::SpvOptions options;
+
+    // Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
+    if (level == ShaderOptimization::Debug) {
+        intermediate->addSourceText(pass_source_code, pass_source_code_length);
+        options.generateDebugInfo = true;
+        options.disableOptimizer = true;
+        options.optimizeSize = false;
+        options.disassemble = false;
+        options.validate = true;
+    } else {
+        options.disableOptimizer = false;
+        options.stripDebugInfo = true;
+    }
+
+    glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
+
+    const std::string spv_messages = logger.getAllMessages();
+    if (!spv_messages.empty()) {
+        LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
+    }
+
+    const vk::ShaderModuleCreateInfo shader_info = {
+        .codeSize = out_code.size() * sizeof(u32),
+        .pCode = out_code.data()
+    };
+
+    vk::Device device = instance.GetDevice();
+    module = device.createShaderModule(shader_info);
+
+    return true;
+}
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader.h
+++ b/src/video_core/renderer_vulkan/vk_shader.h
@ -0,0 +1,32 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/common/shader.h"
+#include "video_core/renderer_vulkan/vk_common.h"
+
+namespace VideoCore::Vulkan {
+
+class Instance;
+
+class Shader : public VideoCore::ShaderBase {
+public:
+    Shader(Instance& instance, ShaderStage stage, std::string_view name,
+           std::string&& source);
+    ~Shader() override;
+
+    bool Compile(ShaderOptimization level) override;
+
+    /// Returns the underlying vulkan shader module handle
+    vk::ShaderModule GetHandle() const {
+        return module;
+    }
+
+private:
+    Instance& instance;
+    vk::ShaderModule module;
+};
+
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_gen.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.cpp
@ -2,28 +2,12 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <array>
-#include <cstddef>
 #include <string_view>
 #include "common/assert.h"
 #include "common/bit_field.h"
-#include "common/bit_set.h"
 #include "common/logging/log.h"
 #include "core/core.h"
-#include "video_core/regs_framebuffer.h"
-#include "video_core/regs_lighting.h"
-#include "video_core/regs_rasterizer.h"
-#include "video_core/regs_texturing.h"
-#include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_instance.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_shader_gen.h"
-#include "video_core/renderer_opengl/gl_shader_util.h"
-#include "video_core/video_core.h"
-
-#include <glslang/Public/ShaderLang.h>
-#include <glslang/SPIRV/GlslangToSpv.h>
-#include <glslang/Include/ResourceLimits.h>

 using Pica::FramebufferRegs;
 using Pica::LightingRegs;
@ -32,56 +16,7 @@ using Pica::TexturingRegs;
 using TevStageConfig = TexturingRegs::TevStageConfig;
 using VSOutputAttributes = RasterizerRegs::VSOutputAttributes;

-namespace Vulkan {
-
-static const char present_vertex_shader_source[] = R"(
-#version 450 core
-#extension GL_ARB_separate_shader_objects : enable
-layout (location = 0) in vec2 vert_position;
-layout (location = 1) in vec3 vert_tex_coord;
-layout (location = 0) out vec3 frag_tex_coord;
-
-layout (push_constant) uniform DrawInfo {
-    mat4 modelview_matrix;
-    vec4 i_resolution;
-    vec4 o_resolution;
-    int layer;
-};
-
-void main() {
-    vec4 position = vec4(vert_position, 0.0, 1.0) * modelview_matrix;
-    gl_Position = vec4(position.x, -position.y, 0.0, 1.0);
-    frag_tex_coord = vert_tex_coord;
-}
-)";
-
-static const char present_fragment_shader_source[] = R"(
-#version 450 core
-#extension GL_ARB_separate_shader_objects : enable
-layout (location = 0) in vec3 frag_tex_coord;
-layout (location = 0) out vec4 color;
-
-layout (push_constant) uniform DrawInfo {
-    mat3x2 modelview_matrix;
-    vec4 i_resolution;
-    vec4 o_resolution;
-    int layer;
-};
-
-layout (set = 0, binding = 0) uniform sampler2D screen_textures[3];
-
-void main() {
-    color = texture(screen_textures[int(frag_tex_coord.z)], frag_tex_coord.xy);
-}
-)";
-
-std::string GetPresentVertexShader() {
-    return present_vertex_shader_source;
-}
-
-std::string GetPresentFragmentShader() {
-    return present_fragment_shader_source;
-}
+namespace VideoCore::Vulkan {

 constexpr std::string_view UniformBlockDef = R"(
 #define NUM_TEV_STAGES 6
@ -162,184 +97,6 @@ static std::string GetVertexInterfaceDeclaration(bool is_output, bool separable_
    return out;
 }

-PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
-    PicaFSConfig res{};
-
-    auto& state = res.state;
-
-    state.scissor_test_mode = regs.rasterizer.scissor_test.mode;
-
-    state.depthmap_enable = regs.rasterizer.depthmap_enable;
-
-    state.alpha_test_func = regs.framebuffer.output_merger.alpha_test.enable
-                                ? regs.framebuffer.output_merger.alpha_test.func.Value()
-                                : FramebufferRegs::CompareFunc::Always;
-
-    state.texture0_type = regs.texturing.texture0.type;
-
-    state.texture2_use_coord1 = regs.texturing.main_config.texture2_use_coord1 != 0;
-
-    // We don't need these otherwise, reset them to avoid unnecessary shader generation
-    state.alphablend_enable = {};
-    state.logic_op = {};
-
-    // Copy relevant tev stages fields.
-    // We don't sync const_color here because of the high variance, it is a
-    // shader uniform instead.
-    const auto& tev_stages = regs.texturing.GetTevStages();
-    DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size());
-    for (std::size_t i = 0; i < tev_stages.size(); i++) {
-        const auto& tev_stage = tev_stages[i];
-        state.tev_stages[i].sources_raw = tev_stage.sources_raw;
-        state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw;
-        state.tev_stages[i].ops_raw = tev_stage.ops_raw;
-        state.tev_stages[i].scales_raw = tev_stage.scales_raw;
-    }
-
-    state.fog_mode = regs.texturing.fog_mode;
-    state.fog_flip = regs.texturing.fog_flip != 0;
-
-    state.combiner_buffer_input = regs.texturing.tev_combiner_buffer_input.update_mask_rgb.Value() |
-                                  regs.texturing.tev_combiner_buffer_input.update_mask_a.Value()
-                                      << 4;
-
-    // Fragment lighting
-
-    state.lighting.enable = !regs.lighting.disable;
-    state.lighting.src_num = regs.lighting.max_light_index + 1;
-
-    for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) {
-        unsigned num = regs.lighting.light_enable.GetNum(light_index);
-        const auto& light = regs.lighting.light[num];
-        state.lighting.light[light_index].num = num;
-        state.lighting.light[light_index].directional = light.config.directional != 0;
-        state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
-        state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
-        state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
-        state.lighting.light[light_index].dist_atten_enable =
-            !regs.lighting.IsDistAttenDisabled(num);
-        state.lighting.light[light_index].spot_atten_enable =
-            !regs.lighting.IsSpotAttenDisabled(num);
-        state.lighting.light[light_index].shadow_enable = !regs.lighting.IsShadowDisabled(num);
-    }
-
-    state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
-    state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
-    state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
-    state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
-
-    state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
-    state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
-    state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
-    state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
-
-    // this is a dummy field due to lack of the corresponding register
-    state.lighting.lut_sp.enable = true;
-    state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
-    state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
-    state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
-
-    state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
-    state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
-    state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
-    state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
-
-    state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
-    state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
-    state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
-    state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
-
-    state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
-    state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
-    state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
-    state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
-
-    state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
-    state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
-    state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
-    state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
-
-    state.lighting.config = regs.lighting.config0.config;
-    state.lighting.enable_primary_alpha = regs.lighting.config0.enable_primary_alpha;
-    state.lighting.enable_secondary_alpha = regs.lighting.config0.enable_secondary_alpha;
-    state.lighting.bump_mode = regs.lighting.config0.bump_mode;
-    state.lighting.bump_selector = regs.lighting.config0.bump_selector;
-    state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
-    state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
-
-    state.lighting.enable_shadow = regs.lighting.config0.enable_shadow != 0;
-    state.lighting.shadow_primary = regs.lighting.config0.shadow_primary != 0;
-    state.lighting.shadow_secondary = regs.lighting.config0.shadow_secondary != 0;
-    state.lighting.shadow_invert = regs.lighting.config0.shadow_invert != 0;
-    state.lighting.shadow_alpha = regs.lighting.config0.shadow_alpha != 0;
-    state.lighting.shadow_selector = regs.lighting.config0.shadow_selector;
-
-    state.proctex.enable = regs.texturing.main_config.texture3_enable;
-    if (state.proctex.enable) {
-        state.proctex.coord = regs.texturing.main_config.texture3_coordinates;
-        state.proctex.u_clamp = regs.texturing.proctex.u_clamp;
-        state.proctex.v_clamp = regs.texturing.proctex.v_clamp;
-        state.proctex.color_combiner = regs.texturing.proctex.color_combiner;
-        state.proctex.alpha_combiner = regs.texturing.proctex.alpha_combiner;
-        state.proctex.separate_alpha = regs.texturing.proctex.separate_alpha;
-        state.proctex.noise_enable = regs.texturing.proctex.noise_enable;
-        state.proctex.u_shift = regs.texturing.proctex.u_shift;
-        state.proctex.v_shift = regs.texturing.proctex.v_shift;
-        state.proctex.lut_width = regs.texturing.proctex_lut.width;
-        state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
-        state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
-        state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
-        state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
-        state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
-        state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
-        state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
-    }
-
-    state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
-                             FramebufferRegs::FragmentOperationMode::Shadow;
-
-    state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
-
-    return res;
-}
-
-void PicaShaderConfigCommon::Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
-    program_hash = setup.GetProgramCodeHash();
-    swizzle_hash = setup.GetSwizzleDataHash();
-    main_offset = regs.main_offset;
-    sanitize_mul = VideoCore::g_hw_shader_accurate_mul;
-
-    num_outputs = 0;
-    output_map.fill(16);
-
-    for (int reg : Common::BitSet<u32>(regs.output_mask)) {
-        output_map[reg] = num_outputs++;
-    }
-}
-
-void PicaGSConfigCommonRaw::Init(const Pica::Regs& regs) {
-    vs_output_attributes = Common::BitSet<u32>(regs.vs.output_mask).Count();
-    gs_output_attributes = vs_output_attributes;
-
-    semantic_maps.fill({16, 0});
-    for (u32 attrib = 0; attrib < regs.rasterizer.vs_output_total; ++attrib) {
-        const std::array semantics{
-            regs.rasterizer.vs_output_attributes[attrib].map_x.Value(),
-            regs.rasterizer.vs_output_attributes[attrib].map_y.Value(),
-            regs.rasterizer.vs_output_attributes[attrib].map_z.Value(),
-            regs.rasterizer.vs_output_attributes[attrib].map_w.Value(),
-        };
-        for (u32 comp = 0; comp < 4; ++comp) {
-            const auto semantic = semantics[comp];
-            if (static_cast<std::size_t>(semantic) < 24) {
-                semantic_maps[static_cast<std::size_t>(semantic)] = {attrib, comp};
-            } else if (semantic != VSOutputAttributes::INVALID) {
-                LOG_ERROR(Render_OpenGL, "Invalid/unknown semantic id: {}", semantic);
-            }
-        }
-    }
-}
-
 /// Detects if a TEV stage is configured to be skipped (to avoid generating unnecessary code)
 static bool IsPassThroughTevStage(const TevStageConfig& stage) {
    return (stage.color_op == TevStageConfig::Operation::Replace &&
@ -352,7 +109,7 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) {
 }

 static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_unit) {
-    const auto& state = config.state;
+    const auto& state = config;
    switch (texture_unit) {
    case 0:
        // Only unit 0 respects the texturing type
@ -628,23 +385,22 @@ static void AppendAlphaCombiner(std::string& out, TevStageConfig::Operation oper
 }

 /// Writes the if-statement condition used to evaluate alpha testing
-static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareFunc func) {
-    using CompareFunc = FramebufferRegs::CompareFunc;
+static void AppendAlphaTestCondition(std::string& out, Pica::CompareFunc func) {
    switch (func) {
-    case CompareFunc::Never:
+    case Pica::CompareFunc::Never:
        out += "true";
        break;
-    case CompareFunc::Always:
+    case Pica::CompareFunc::Always:
        out += "false";
        break;
-    case CompareFunc::Equal:
-    case CompareFunc::NotEqual:
-    case CompareFunc::LessThan:
-    case CompareFunc::LessThanOrEqual:
-    case CompareFunc::GreaterThan:
-    case CompareFunc::GreaterThanOrEqual: {
+    case Pica::CompareFunc::Equal:
+    case Pica::CompareFunc::NotEqual:
+    case Pica::CompareFunc::LessThan:
+    case Pica::CompareFunc::LessThanOrEqual:
+    case Pica::CompareFunc::GreaterThan:
+    case Pica::CompareFunc::GreaterThanOrEqual: {
        static constexpr std::array op{"!=", "==", ">=", ">", "<=", "<"};
-        const auto index = static_cast<u32>(func) - static_cast<u32>(CompareFunc::Equal);
+        const auto index = static_cast<u32>(func) - static_cast<u32>(Pica::CompareFunc::Equal);
        out += fmt::format("int(last_tex_env_out.a * 255.0) {} alphatest_ref", op[index]);
        break;
    }
@ -659,7 +415,7 @@ static void AppendAlphaTestCondition(std::string& out, FramebufferRegs::CompareF
 /// Writes the code to emulate the specified TEV stage
 static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned index) {
    const auto stage =
-        static_cast<const TexturingRegs::TevStageConfig>(config.state.tev_stages[index]);
+        static_cast<const TexturingRegs::TevStageConfig>(config.tev_stages[index]);
    if (!IsPassThroughTevStage(stage)) {
        const std::string index_name = std::to_string(index);

@ -716,7 +472,7 @@ static void WriteTevStage(std::string& out, const PicaFSConfig& config, unsigned

 /// Writes the code to emulate fragment lighting
 static void WriteLighting(std::string& out, const PicaFSConfig& config) {
-    const auto& lighting = config.state.lighting;
+    const auto& lighting = config.lighting;

    // Define lighting globals
    out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
@ -1119,7 +875,7 @@ float ProcTexLookupLUT(int offset, float coord) {
    )";

    // Noise utility
-    if (config.state.proctex.noise_enable) {
+    if (config.proctex.noise_enable) {
        // See swrasterizer/proctex.cpp for more information about these functions
        out += R"(
 int ProcTexNoiseRand1D(int v) {
@ -1159,16 +915,16 @@ float ProcTexNoiseCoef(vec2 x) {
    }

    out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n";
-    out += fmt::format("int lut_width = {} >> level;\n", config.state.proctex.lut_width);
+    out += fmt::format("int lut_width = {} >> level;\n", config.proctex.lut_width);
    // Offsets for level 4-7 seem to be hardcoded
    out += fmt::format("int lut_offsets[8] = int[]({}, {}, {}, {}, 0xF0, 0xF8, 0xFC, 0xFE);\n",
-                       config.state.proctex.lut_offset0, config.state.proctex.lut_offset1,
-                       config.state.proctex.lut_offset2, config.state.proctex.lut_offset3);
+                       config.proctex.lut_offset0, config.proctex.lut_offset1,
+                       config.proctex.lut_offset2, config.proctex.lut_offset3);
    out += "int lut_offset = lut_offsets[level];\n";
    // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
    out += "lut_coord *= float(lut_width - 1);\n";

-    switch (config.state.proctex.lut_filter) {
+    switch (config.proctex.lut_filter) {
    case ProcTexFilter::Linear:
    case ProcTexFilter::LinearMipmapLinear:
    case ProcTexFilter::LinearMipmapNearest:
@ -1191,8 +947,8 @@ float ProcTexNoiseCoef(vec2 x) {
    out += "}\n";

    out += "vec4 ProcTex() {\n";
-    if (config.state.proctex.coord < 3) {
-        out += fmt::format("vec2 uv = abs(texcoord{});\n", config.state.proctex.coord);
+    if (config.proctex.coord < 3) {
+        out += fmt::format("vec2 uv = abs(texcoord{});\n", config.proctex.coord);
    } else {
        LOG_CRITICAL(Render_OpenGL, "Unexpected proctex.coord >= 3");
        out += "vec2 uv = abs(texcoord0);\n";
@ -1205,23 +961,23 @@ float ProcTexNoiseCoef(vec2 x) {
    out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n";
    // unlike normal texture, the bias is inside the log2
    out += fmt::format("float lod = log2(abs(float({}) * proctex_bias) * (duv.x + duv.y));\n",
-                       config.state.proctex.lut_width);
+                       config.proctex.lut_width);
    out += "if (proctex_bias == 0.0) lod = 0.0;\n";
    out += fmt::format("lod = clamp(lod, {:#}, {:#});\n",
-                       std::max(0.0f, static_cast<float>(config.state.proctex.lod_min)),
-                       std::min(7.0f, static_cast<float>(config.state.proctex.lod_max)));
+                       std::max(0.0f, static_cast<float>(config.proctex.lod_min)),
+                       std::min(7.0f, static_cast<float>(config.proctex.lod_max)));
    // Get shift offset before noise generation
    out += "float u_shift = ";
-    AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
-                             config.state.proctex.u_clamp);
+    AppendProcTexShiftOffset(out, "uv.y", config.proctex.u_shift,
+                             config.proctex.u_clamp);
    out += ";\n";
    out += "float v_shift = ";
-    AppendProcTexShiftOffset(out, "uv.x", config.state.proctex.v_shift,
-                             config.state.proctex.v_clamp);
+    AppendProcTexShiftOffset(out, "uv.x", config.proctex.v_shift,
+                             config.proctex.v_clamp);
    out += ";\n";

    // Generate noise
-    if (config.state.proctex.noise_enable) {
+    if (config.proctex.noise_enable) {
        out += "uv += proctex_noise_a * ProcTexNoiseCoef(uv);\n"
               "uv = abs(uv);\n";
    }
@ -1231,16 +987,16 @@ float ProcTexNoiseCoef(vec2 x) {
           "float v = uv.y + v_shift;\n";

    // Clamp
-    AppendProcTexClamp(out, "u", config.state.proctex.u_clamp);
-    AppendProcTexClamp(out, "v", config.state.proctex.v_clamp);
+    AppendProcTexClamp(out, "u", config.proctex.u_clamp);
+    AppendProcTexClamp(out, "v", config.proctex.v_clamp);

    // Combine and map
    out += "float lut_coord = ";
-    AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner,
+    AppendProcTexCombineAndMap(out, config.proctex.color_combiner,
                               "proctex_color_map_offset");
    out += ";\n";

-    switch (config.state.proctex.lut_filter) {
+    switch (config.proctex.lut_filter) {
    case ProcTexFilter::Linear:
    case ProcTexFilter::Nearest:
        out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n";
@ -1258,11 +1014,11 @@ float ProcTexNoiseCoef(vec2 x) {
        break;
    }

-    if (config.state.proctex.separate_alpha) {
+    if (config.proctex.separate_alpha) {
        // Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
        // uses the output of CombineAndMap directly instead.
        out += "float final_alpha = ";
-        AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner,
+        AppendProcTexCombineAndMap(out, config.proctex.alpha_combiner,
                                   "proctex_alpha_map_offset");
        out += ";\n";
        out += "return vec4(final_color.xyz, final_alpha);\n}\n";
@ -1271,8 +1027,8 @@ float ProcTexNoiseCoef(vec2 x) {
    }
 }

-std::string GenerateFragmentShader(const PicaFSConfig& config) {
-    const auto& state = config.state;
+std::string ShaderGenerator::GenerateFragmentShader(const PicaFSConfig& config, bool seperable_shader) {
+    const auto& state = config;
    std::string out;

    out += R"(
@ -1387,7 +1143,7 @@ std::string GenerateFragmentShader(const PicaFSConfig& config) {
    vec4 shadowTexture(vec2 uv, float w) {
    )";

-    if (!config.state.shadow_texture_orthographic) {
+    if (!config.shadow_texture_orthographic) {
        out += "uv /= w;";
    }

@ -1501,7 +1257,7 @@ vec4 shadowTextureCube(vec2 uv, float w) {
 #endif
 )";

-    if (config.state.proctex.enable)
+    if (config.proctex.enable)
        AppendProcTexSampler(out, config);

    // We round the interpolated primary color to the nearest 1/255th
@ -1514,7 +1270,7 @@ vec4 secondary_fragment_color = vec4(0.0);
 )";

    // Do not do any sort of processing if it's obvious we're not going to pass the alpha test
-    if (state.alpha_test_func == FramebufferRegs::CompareFunc::Never) {
+    if (state.alpha_test_func == Pica::CompareFunc::Never) {
        out += "discard; }";
        return out;
    }
@ -1552,7 +1308,7 @@ vec4 secondary_fragment_color = vec4(0.0);
        WriteTevStage(out, config, static_cast<u32>(index));
    }

-    if (state.alpha_test_func != FramebufferRegs::CompareFunc::Always) {
+    if (state.alpha_test_func != Pica::CompareFunc::Always) {
        out += "if (";
        AppendAlphaTestCondition(out, state.alpha_test_func);
        out += ") discard;\n";
@ -1623,12 +1379,11 @@ do {
    return out;
 }

-std::string GenerateTrivialVertexShader(bool separable_shader) {
+std::string ShaderGenerator::GenerateTrivialVertexShader(bool separable_shader) {
    std::string out;
    out += "#version 450\n";
    out += "#extension GL_ARB_separate_shader_objects : enable\n";
-    out +=
-        fmt::format("layout(location = {}) in vec4 vert_position;\n"
+    out += fmt::format("layout(location = {}) in vec4 vert_position;\n"
                       "layout(location = {}) in vec4 vert_color;\n"
                       "layout(location = {}) in vec2 vert_texcoord0;\n"
                       "layout(location = {}) in vec2 vert_texcoord1;\n"
@ -1656,7 +1411,6 @@ void main() {

    gl_Position = vert_position;
    gl_Position.z = (gl_Position.z + gl_Position.w) / 2.0;
-    //gl_Position.y = -gl_Position.y;
    //gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
    //gl_ClipDistance[1] = dot(clip_coef, vert_position);
 }
@ -1665,205 +1419,15 @@ void main() {
    return out;
 }

-bool InitializeCompiler() {
-    static bool glslang_initialized = false;
-
-    if (glslang_initialized) {
-        return true;
-    }
-
-    if (!glslang::InitializeProcess()) {
-        LOG_CRITICAL(Render_Vulkan, "Failed to initialize glslang shader compiler");
-        return false;
-    }
-
-    std::atexit([]() { glslang::FinalizeProcess(); });
-
-    glslang_initialized = true;
-    return true;
+std::string ShaderGenerator::GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
+                                                  bool separable_shader) {
+    LOG_CRITICAL(Render_Vulkan, "Unimplemented!");
+    UNREACHABLE();
 }

-const TBuiltInResource DefaultTBuiltInResource = {
-    .maxLights = 32,
-    .maxClipPlanes = 6,
-    .maxTextureUnits = 32,
-    .maxTextureCoords = 32,
-    .maxVertexAttribs = 64,
-    .maxVertexUniformComponents = 4096,
-    .maxVaryingFloats = 64,
-    .maxVertexTextureImageUnits = 32,
-    .maxCombinedTextureImageUnits = 80,
-    .maxTextureImageUnits = 32,
-    .maxFragmentUniformComponents = 4096,
-    .maxDrawBuffers = 32,
-    .maxVertexUniformVectors = 128,
-    .maxVaryingVectors = 8,
-    .maxFragmentUniformVectors = 16,
-    .maxVertexOutputVectors = 16,
-    .maxFragmentInputVectors = 15,
-    .minProgramTexelOffset = -8,
-    .maxProgramTexelOffset = 7,
-    .maxClipDistances = 8,
-    .maxComputeWorkGroupCountX = 65535,
-    .maxComputeWorkGroupCountY = 65535,
-    .maxComputeWorkGroupCountZ = 65535,
-    .maxComputeWorkGroupSizeX = 1024,
-    .maxComputeWorkGroupSizeY = 1024,
-    .maxComputeWorkGroupSizeZ = 64,
-    .maxComputeUniformComponents = 1024,
-    .maxComputeTextureImageUnits = 16,
-    .maxComputeImageUniforms = 8,
-    .maxComputeAtomicCounters = 8,
-    .maxComputeAtomicCounterBuffers = 1,
-    .maxVaryingComponents = 60,
-    .maxVertexOutputComponents = 64,
-    .maxGeometryInputComponents = 64,
-    .maxGeometryOutputComponents = 128,
-    .maxFragmentInputComponents = 128,
-    .maxImageUnits = 8,
-    .maxCombinedImageUnitsAndFragmentOutputs = 8,
-    .maxCombinedShaderOutputResources = 8,
-    .maxImageSamples = 0,
-    .maxVertexImageUniforms = 0,
-    .maxTessControlImageUniforms = 0,
-    .maxTessEvaluationImageUniforms = 0,
-    .maxGeometryImageUniforms = 0,
-    .maxFragmentImageUniforms = 8,
-    .maxCombinedImageUniforms = 8,
-    .maxGeometryTextureImageUnits = 16,
-    .maxGeometryOutputVertices = 256,
-    .maxGeometryTotalOutputComponents = 1024,
-    .maxGeometryUniformComponents = 1024,
-    .maxGeometryVaryingComponents = 64,
-    .maxTessControlInputComponents = 128,
-    .maxTessControlOutputComponents = 128,
-    .maxTessControlTextureImageUnits = 16,
-    .maxTessControlUniformComponents = 1024,
-    .maxTessControlTotalOutputComponents = 4096,
-    .maxTessEvaluationInputComponents = 128,
-    .maxTessEvaluationOutputComponents = 128,
-    .maxTessEvaluationTextureImageUnits = 16,
-    .maxTessEvaluationUniformComponents = 1024,
-    .maxTessPatchComponents = 120,
-    .maxPatchVertices = 32,
-    .maxTessGenLevel = 64,
-    .maxViewports = 16,
-    .maxVertexAtomicCounters = 0,
-    .maxTessControlAtomicCounters = 0,
-    .maxTessEvaluationAtomicCounters = 0,
-    .maxGeometryAtomicCounters = 0,
-    .maxFragmentAtomicCounters = 8,
-    .maxCombinedAtomicCounters = 8,
-    .maxAtomicCounterBindings = 1,
-    .maxVertexAtomicCounterBuffers = 0,
-    .maxTessControlAtomicCounterBuffers = 0,
-    .maxTessEvaluationAtomicCounterBuffers = 0,
-    .maxGeometryAtomicCounterBuffers = 0,
-    .maxFragmentAtomicCounterBuffers = 1,
-    .maxCombinedAtomicCounterBuffers = 1,
-    .maxAtomicCounterBufferSize = 16384,
-    .maxTransformFeedbackBuffers = 4,
-    .maxTransformFeedbackInterleavedComponents = 64,
-    .maxCullDistances = 8,
-    .maxCombinedClipAndCullDistances = 8,
-    .maxSamples = 4,
-    .maxMeshOutputVerticesNV = 256,
-    .maxMeshOutputPrimitivesNV = 512,
-    .maxMeshWorkGroupSizeX_NV = 32,
-    .maxMeshWorkGroupSizeY_NV = 1,
-    .maxMeshWorkGroupSizeZ_NV = 1,
-    .maxTaskWorkGroupSizeX_NV = 32,
-    .maxTaskWorkGroupSizeY_NV = 1,
-    .maxTaskWorkGroupSizeZ_NV = 1,
-    .maxMeshViewCountNV = 4,
-    .maxDualSourceDrawBuffersEXT = 1,
-    .limits = TLimits{
-        .nonInductiveForLoops = 1,
-        .whileLoops = 1,
-        .doWhileLoops = 1,
-        .generalUniformIndexing = 1,
-        .generalAttributeMatrixVectorIndexing = 1,
-        .generalVaryingIndexing = 1,
-        .generalSamplerIndexing = 1,
-        .generalVariableIndexing = 1,
-        .generalConstantMatrixVectorIndexing = 1,
-    }};
-
-vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits vk_stage) {
-    if (!InitializeCompiler()) {
-        return VK_NULL_HANDLE;
-    }
-
-    EShLanguage stage;
-    switch (vk_stage) {
-    case vk::ShaderStageFlagBits::eVertex:
-        stage = EShLangVertex;
-        break;
-    case vk::ShaderStageFlagBits::eFragment:
-        stage = EShLangFragment;
-        break;
-    default:
-        LOG_CRITICAL(Render_Vulkan, "Unknown shader stage");
+std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) {
+    LOG_CRITICAL(Render_Vulkan, "Unimplemented!");
    UNREACHABLE();
-    }
-
-    std::unique_ptr<glslang::TShader> shader = std::make_unique<glslang::TShader>(stage);
-    std::unique_ptr<glslang::TProgram> program;
-    glslang::TShader::ForbidIncluder includer;
-    EProfile profile = ECoreProfile;
-    EShMessages messages = static_cast<EShMessages>(EShMsgDefault | EShMsgSpvRules | EShMsgVulkanRules);
-
-    int default_version = 450;
-    const char* pass_source_code = source.data();
-    int pass_source_code_length = static_cast<int>(source.size());
-
-    shader->setEnvTarget(glslang::EShTargetSpv, glslang::EShTargetLanguageVersion::EShTargetSpv_1_3);
-    shader->setStringsWithLengths(&pass_source_code, &pass_source_code_length, 1);
-
-    if (!shader->parse(&DefaultTBuiltInResource, default_version, profile, false, true, messages, includer)) {
-        LOG_CRITICAL(Render_Vulkan, "Shader Info Log:\n{}\n{}", shader->getInfoLog(), shader->getInfoDebugLog());
-        return VK_NULL_HANDLE;
-    }
-
-    // Even though there's only a single shader, we still need to link it to generate SPV
-    program = std::make_unique<glslang::TProgram>();
-    program->addShader(shader.get());
-    if (!program->link(messages)) {
-        LOG_CRITICAL(Render_Vulkan, "Program Info Log:\n{}\n{}", program->getInfoLog(), program->getInfoDebugLog());
-        return VK_NULL_HANDLE;
-    }
-
-    glslang::TIntermediate* intermediate = program->getIntermediate(stage);
-    std::vector<u32> out_code;
-    spv::SpvBuildLogger logger;
-    glslang::SpvOptions options;
-
-    // Compile the SPIR-V module without optimizations for easier debugging in RenderDoc.
-    if (true) {
-        intermediate->addSourceText(pass_source_code, pass_source_code_length);
-        options.generateDebugInfo = true;
-        options.disableOptimizer = true;
-        options.optimizeSize = false;
-        options.disassemble = false;
-        options.validate = true;
-    }
-    else {
-        options.disableOptimizer = false;
-        options.stripDebugInfo = true;
-    }
-
-    glslang::GlslangToSpv(*intermediate, out_code, &logger, &options);
-
-    const std::string spv_messages = logger.getAllMessages();
-    if (!spv_messages.empty()) {
-        LOG_INFO(Render_Vulkan, "SPIR-V conversion messages: {}", spv_messages);
-    }
-
-    vk::ShaderModuleCreateInfo shader_info{{}, out_code.size() * sizeof(u32), out_code.data()};
-    const vk::Device device = g_vk_instace->GetDevice();
-    vk::ShaderModule shader_module = device.createShaderModule(shader_info);
-    return shader_module;
-
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_shader_gen.h
+++ b/src/video_core/renderer_vulkan/vk_shader_gen.h
@ -4,46 +4,23 @@

 #pragma once

-#include <array>
-#include <cstring>
-#include <functional>
-#include <optional>
-#include <string>
-#include <type_traits>
-#include "common/hash.h"
-#include "video_core/regs.h"
-#include "video_core/shader/shader.h"
-#include "video_core/renderer_vulkan/vk_shader_state.h"
+#include "video_core/common/shader_gen.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-/**
- * Returns the vertex and fragment shader sources used for presentation
- * @returns String of shader source code
- */
-std::string GetPresentVertexShader();
-std::string GetPresentFragmentShader();
+class ShaderGenerator : public VideoCore::ShaderGeneratorBase {
+public:
+    ShaderGenerator() = default;
+    ~ShaderGenerator() override = default;

-/**
- * Generates the GLSL vertex shader program source code that accepts vertices from software shader
- * and directly passes them to the fragment shader.
- * @param separable_shader generates shader that can be used for separate shader object
- * @returns String of the shader source code
- */
-std::string GenerateTrivialVertexShader(bool separable_shader);
+    std::string GenerateTrivialVertexShader(bool separable_shader) override;

-/**
- * Generates the GLSL fragment shader program source code for the current Pica state
- * @param config ShaderCacheKey object generated for the current Pica state, used for the shader
- *               configuration (NOTE: Use state in this struct only, not the Pica registers!)
- * @param separable_shader generates shader that can be used for separate shader object
- * @returns String of the shader source code
- */
-std::string GenerateFragmentShader(const PicaFSConfig& config);
+    std::string GenerateVertexShader(const Pica::Shader::ShaderSetup& setup, const PicaVSConfig& config,
+                                     bool separable_shader) override;

-/**
- * Generates a SPRI-V shader module from the provided GLSL source code
- */
-vk::ShaderModule CompileShader(const std::string& source, vk::ShaderStageFlagBits stage);
+    std::string GenerateFixedGeometryShader(const PicaFixedGSConfig& config, bool separable_shader) override;

-} // namespace Vulkan
+    std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) override;
+};
+
+} // namespace VideoCore
--- a/src/video_core/renderer_vulkan/vk_shader_state.h
+++ b/src/video_core/renderer_vulkan/vk_shader_state.h
@ -14,10 +14,10 @@

 namespace Vulkan {

-/* Render vertex attributes */
-struct VertexBase {
-    VertexBase() = default;
-    VertexBase(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
+/// Structure that the hardware rendered vertices are composed of
+struct HardwareVertex {
+    HardwareVertex() = default;
+    HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
        position[0] = v.pos.x.ToFloat32();
        position[1] = v.pos.y.ToFloat32();
        position[2] = v.pos.z.ToFloat32();
@ -56,31 +56,12 @@ struct VertexBase {
    glm::vec3 view;
 };

-/// Structure that the hardware rendered vertices are composed of
-struct HardwareVertex : public VertexBase {
-    HardwareVertex() = default;
-    HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) : VertexBase(v, flip_quaternion) {};
-    static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(VertexBase));
-    static constexpr std::array<vk::VertexInputAttributeDescription, 8> attribute_desc =
-    {
-          vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, position)),
-          vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, color)),
-          vk::VertexInputAttributeDescription(2, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord0)),
-          vk::VertexInputAttributeDescription(3, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord1)),
-          vk::VertexInputAttributeDescription(4, 0, vk::Format::eR32G32Sfloat, offsetof(VertexBase, tex_coord2)),
-          vk::VertexInputAttributeDescription(5, 0, vk::Format::eR32Sfloat, offsetof(VertexBase, tex_coord0_w)),
-          vk::VertexInputAttributeDescription(6, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(VertexBase, normquat)),
-          vk::VertexInputAttributeDescription(7, 0, vk::Format::eR32G32B32Sfloat, offsetof(VertexBase, view)),
-    };
-};
-
 /**
 * Vertex structure that the drawn screen rectangles are composed of.
 */
-
-struct ScreenRectVertexBase {
-    ScreenRectVertexBase() = default;
-    ScreenRectVertexBase(float x, float y, float u, float v, float s) {
+struct ScreenRectVertex {
+    ScreenRectVertex() = default;
+    ScreenRectVertex(float x, float y, float u, float v, float s) {
        position.x = x;
        position.y = y;
        tex_coord.x = u;
@ -92,241 +73,4 @@ struct ScreenRectVertexBase {
    glm::vec3 tex_coord;
 };

-struct ScreenRectVertex : public ScreenRectVertexBase {
-    ScreenRectVertex() = default;
-    ScreenRectVertex(float x, float y, float u, float v, float s) : ScreenRectVertexBase(x, y, u, v, s) {};
-    static constexpr auto binding_desc = vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertexBase));
-    static constexpr std::array<vk::VertexInputAttributeDescription, 2> attribute_desc =
-    {
-          vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat, offsetof(ScreenRectVertexBase, position)),
-          vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32B32Sfloat, offsetof(ScreenRectVertexBase, tex_coord)),
-    };
-};
-
-enum class ProgramType : u32 { VS, GS, FS };
-
-enum Attributes {
-    ATTRIBUTE_POSITION,
-    ATTRIBUTE_COLOR,
-    ATTRIBUTE_TEXCOORD0,
-    ATTRIBUTE_TEXCOORD1,
-    ATTRIBUTE_TEXCOORD2,
-    ATTRIBUTE_TEXCOORD0_W,
-    ATTRIBUTE_NORMQUAT,
-    ATTRIBUTE_VIEW,
-};
-
-// Doesn't include const_color because we don't sync it, see comment in BuildFromRegs()
-struct TevStageConfigRaw {
-    u32 sources_raw;
-    u32 modifiers_raw;
-    u32 ops_raw;
-    u32 scales_raw;
-    explicit operator Pica::TexturingRegs::TevStageConfig() const noexcept {
-        Pica::TexturingRegs::TevStageConfig stage;
-        stage.sources_raw = sources_raw;
-        stage.modifiers_raw = modifiers_raw;
-        stage.ops_raw = ops_raw;
-        stage.const_color = 0;
-        stage.scales_raw = scales_raw;
-        return stage;
-    }
-};
-
-struct PicaFSConfigState {
-    Pica::FramebufferRegs::CompareFunc alpha_test_func;
-    Pica::RasterizerRegs::ScissorMode scissor_test_mode;
-    Pica::TexturingRegs::TextureConfig::TextureType texture0_type;
-    bool texture2_use_coord1;
-    std::array<TevStageConfigRaw, 6> tev_stages;
-    u8 combiner_buffer_input;
-
-    Pica::RasterizerRegs::DepthBuffering depthmap_enable;
-    Pica::TexturingRegs::FogMode fog_mode;
-    bool fog_flip;
-    bool alphablend_enable;
-    Pica::FramebufferRegs::LogicOp logic_op;
-
-    struct {
-        struct {
-            unsigned num;
-            bool directional;
-            bool two_sided_diffuse;
-            bool dist_atten_enable;
-            bool spot_atten_enable;
-            bool geometric_factor_0;
-            bool geometric_factor_1;
-            bool shadow_enable;
-        } light[8];
-
-        bool enable;
-        unsigned src_num;
-        Pica::LightingRegs::LightingBumpMode bump_mode;
-        unsigned bump_selector;
-        bool bump_renorm;
-        bool clamp_highlights;
-
-        Pica::LightingRegs::LightingConfig config;
-        bool enable_primary_alpha;
-        bool enable_secondary_alpha;
-
-        bool enable_shadow;
-        bool shadow_primary;
-        bool shadow_secondary;
-        bool shadow_invert;
-        bool shadow_alpha;
-        unsigned shadow_selector;
-
-        struct {
-            bool enable;
-            bool abs_input;
-            Pica::LightingRegs::LightingLutInput type;
-            float scale;
-        } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
-    } lighting;
-
-    struct {
-        bool enable;
-        u32 coord;
-        Pica::TexturingRegs::ProcTexClamp u_clamp, v_clamp;
-        Pica::TexturingRegs::ProcTexCombiner color_combiner, alpha_combiner;
-        bool separate_alpha;
-        bool noise_enable;
-        Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
-        u32 lut_width;
-        u32 lut_offset0;
-        u32 lut_offset1;
-        u32 lut_offset2;
-        u32 lut_offset3;
-        u32 lod_min;
-        u32 lod_max;
-        Pica::TexturingRegs::ProcTexFilter lut_filter;
-    } proctex;
-
-    bool shadow_rendering;
-    bool shadow_texture_orthographic;
-};
-
-/**
- * This struct contains all state used to generate the GLSL fragment shader that emulates the
- * current Pica register configuration. This struct is used as a cache key for generated GLSL shader
- * programs. The functions in gl_shader_gen.cpp should retrieve state from this struct only, not by
- * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where
- * Pica state is not being captured in the shader cache key, thereby resulting in (what should be)
- * two separate shaders sharing the same key.
- */
-struct PicaFSConfig : Common::HashableStruct<PicaFSConfigState> {
-
-    /// Construct a PicaFSConfig with the given Pica register configuration.
-    static PicaFSConfig BuildFromRegs(const Pica::Regs& regs);
-
-    bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
-        return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index));
-    }
-
-    bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
-        return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index));
-    }
-};
-
-/**
- * This struct contains common information to identify a GL vertex/geometry shader generated from
- * PICA vertex/geometry shader.
- */
-struct PicaShaderConfigCommon {
-    void Init(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup);
-
-    u64 program_hash;
-    u64 swizzle_hash;
-    u32 main_offset;
-    bool sanitize_mul;
-
-    u32 num_outputs;
-
-    // output_map[output register index] -> output attribute index
-    std::array<u32, 16> output_map;
-};
-
-/**
- * This struct contains information to identify a GL vertex shader generated from PICA vertex
- * shader.
- */
-struct PicaVSConfig : Common::HashableStruct<PicaShaderConfigCommon> {
-    explicit PicaVSConfig(const Pica::ShaderRegs& regs, Pica::Shader::ShaderSetup& setup) {
-        state.Init(regs, setup);
-    }
-    explicit PicaVSConfig(const PicaShaderConfigCommon& conf) {
-        state = conf;
-    }
-};
-
-struct PicaGSConfigCommonRaw {
-    void Init(const Pica::Regs& regs);
-
-    u32 vs_output_attributes;
-    u32 gs_output_attributes;
-
-    struct SemanticMap {
-        u32 attribute_index;
-        u32 component_index;
-    };
-
-    // semantic_maps[semantic name] -> GS output attribute index + component index
-    std::array<SemanticMap, 24> semantic_maps;
-};
-
-/**
- * This struct contains information to identify a GL geometry shader generated from PICA no-geometry
- * shader pipeline
- */
-struct PicaFixedGSConfig : Common::HashableStruct<PicaGSConfigCommonRaw> {
-    explicit PicaFixedGSConfig(const Pica::Regs& regs) {
-        state.Init(regs);
-    }
-};
-
-struct PipelineCacheKey {
-    vk::Format color, depth_stencil;
-    vk::PipelineColorBlendAttachmentState blend_config;
-    vk::LogicOp blend_logic_op;
-    PicaFSConfig fragment_config;
-
-    auto operator <=>(const PipelineCacheKey& other) const = default;
-
-    u64 Hash() const {
-        const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(PipelineCacheKey));
-        return static_cast<size_t>(hash);
-    }
-};
-
 } // namespace Vulkan
-
-namespace std {
-template <>
-struct hash<Vulkan::PicaFSConfig> {
-    std::size_t operator()(const Vulkan::PicaFSConfig& k) const noexcept {
-        return k.Hash();
-    }
-};
-
-template <>
-struct hash<Vulkan::PicaVSConfig> {
-    std::size_t operator()(const Vulkan::PicaVSConfig& k) const noexcept {
-        return k.Hash();
-    }
-};
-
-template <>
-struct hash<Vulkan::PicaFixedGSConfig> {
-    std::size_t operator()(const Vulkan::PicaFixedGSConfig& k) const noexcept {
-        return k.Hash();
-    }
-};
-
-template <>
-struct hash<Vulkan::PipelineCacheKey> {
-    size_t operator()(const Vulkan::PipelineCacheKey& k) const noexcept {
-        return k.Hash();
-    }
-};
-} // namespace std
--- a/src/video_core/renderer_vulkan/vk_state.h
+++ b/src/video_core/renderer_vulkan/vk_state.h
@ -7,9 +7,9 @@
 #include <array>
 #include <bitset>
 #include "video_core/regs.h"
+#include "video_core/renderer_vulkan/vk_buffer.h"
 #include "video_core/renderer_vulkan/vk_shader_state.h"
 #include "video_core/renderer_vulkan/vk_pipeline_builder.h"
-#include "video_core/renderer_vulkan/vk_texture.h"

 namespace Vulkan {

@ -69,7 +69,7 @@ public:
    bool StencilTestEnabled() const { return stencil_enabled && stencil_writes; }

    /// Configure drawing state
-    void SetVertexBuffer(const Buffer& buffer, vk::DeviceSize offset);
+    void SetVertexBuffer(const StreamBuffer& buffer, vk::DeviceSize offset);
    void SetViewport(vk::Viewport viewport);
    void SetScissor(vk::Rect2D scissor);
    void SetCullMode(vk::CullModeFlags flags);
@ -100,9 +100,9 @@ public:
    void EndRendering();

    /// Configure shader resources
-    void SetUniformBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer);
+    void SetUniformBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer);
    void SetTexture(u32 binding,  const Texture& texture);
-    void SetTexelBuffer(u32 binding, u32 offset, u32 size, const Buffer& buffer, u32 view_index);
+    void SetTexelBuffer(u32 binding, u32 offset, u32 size, const StreamBuffer& buffer, u32 view_index);
    void SetPresentTextures(vk::ImageView view0, vk::ImageView view1, vk::ImageView view2);
    void SetPresentData(DrawInfo data);
    void SetPlaceholderColor(u8 red, u8 green, u8 blue, u8 alpha);
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@ -2,60 +2,69 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#define VULKAN_HPP_NO_CONSTRUCTORS
 #include <array>
 #include "common/logging/log.h"
 #include "video_core/renderer_vulkan/vk_swapchain.h"
 #include "video_core/renderer_vulkan/vk_instance.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-Swapchain::Swapchain(vk::SurfaceKHR surface_) : surface(surface_) {
+Swapchain::Swapchain(Instance& instance, vk::SurfaceKHR surface) :
+    instance(instance), surface(surface) {

 }

 Swapchain::~Swapchain() {
-    auto device = g_vk_instace->GetDevice();
-    auto instance = g_vk_instace->GetInstance();
-    device.waitIdle();
-
+    // Destroy swapchain resources
+    vk::Device device = instance.GetDevice();
    device.destroySemaphore(render_finished);
    device.destroySemaphore(image_available);
    device.destroySwapchainKHR(swapchain);
-    instance.destroySurfaceKHR(surface);
 }

-bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
+void Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
    is_outdated = false;
    is_suboptimal = false;

    // Fetch information about the provided surface
-    PopulateSwapchainDetails(surface, width, height);
+    Configure(width, height);

-    const std::array indices {
-        g_vk_instace->GetGraphicsQueueFamilyIndex(),
-        g_vk_instace->GetPresentQueueFamilyIndex(),
+    const std::array queue_family_indices = {
+        instance.GetGraphicsQueueFamilyIndex(),
+        instance.GetPresentQueueFamilyIndex(),
    };

+    const bool exclusive = queue_family_indices[0] == queue_family_indices[1];
+    const u32 queue_family_indices_count = exclusive ? 2u : 1u;
+    const vk::SharingMode sharing_mode = exclusive ? vk::SharingMode::eExclusive :
+                                                     vk::SharingMode::eConcurrent;
+
    // Now we can actually create the swapchain
-    vk::SwapchainCreateInfoKHR swapchain_info{{}, surface, details.image_count, details.format.format,
-                details.format.colorSpace, details.extent, 1, vk::ImageUsageFlagBits::eColorAttachment,
-                vk::SharingMode::eExclusive, 1, indices.data(), details.transform,
-                vk::CompositeAlphaFlagBitsKHR::eOpaque, details.present_mode, true, swapchain};
+    const vk::SwapchainCreateInfoKHR swapchain_info = {
+        .surface = surface,
+        .minImageCount = image_count,
+        .imageFormat = surface_format.format,
+        .imageColorSpace = surface_format.colorSpace,
+        .imageExtent = extent,
+        .imageArrayLayers = 1,
+        .imageUsage = vk::ImageUsageFlagBits::eColorAttachment,
+        .imageSharingMode = sharing_mode,
+        .queueFamilyIndexCount = queue_family_indices_count,
+        .pQueueFamilyIndices   = queue_family_indices.data(),
+        .preTransform = transform,
+        .presentMode = present_mode,
+        .clipped = true,
+        .oldSwapchain = swapchain
+    };

-    // For dedicated present queues, select concurrent sharing mode
-    if (indices[0] != indices[1]) {
-        swapchain_info.imageSharingMode = vk::SharingMode::eConcurrent;
-        swapchain_info.queueFamilyIndexCount = 2;
-    }
-
-    auto device = g_vk_instace->GetDevice();
-    auto new_swapchain = device.createSwapchainKHR(swapchain_info);
+    vk::Device device = instance.GetDevice();
+    vk::SwapchainKHR new_swapchain = device.createSwapchainKHR(swapchain_info);

    // If an old swapchain exists, destroy it and move the new one to its place.
-    if (swapchain) {
-        device.destroy(swapchain);
+    if (vk::SwapchainKHR old_swapchain = std::exchange(swapchain, new_swapchain); old_swapchain) {
+        device.destroySwapchainKHR(old_swapchain);
    }
-    swapchain = new_swapchain;

    // Create sync objects if not already created
    if (!image_available) {
@ -67,19 +76,17 @@ bool Swapchain::Create(u32 width, u32 height, bool vsync_enabled) {
    }

    // Create framebuffer and image views
-    swapchain_images.clear();
-    SetupImages();
-
-    return true;
+    images = device.getSwapchainImagesKHR(swapchain);
 }

 // Wait for maximum of 1 second
 constexpr u64 ACQUIRE_TIMEOUT = 1000000000;

 void Swapchain::AcquireNextImage() {
-    auto result = g_vk_instace->GetDevice().acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT,
+    vk::Device device = instance.GetDevice();
+    vk::Result result = device.acquireNextImageKHR(swapchain, ACQUIRE_TIMEOUT,
                                                   image_available, VK_NULL_HANDLE,
-                                                                &image_index);
+                                                   &current_image);
    switch (result) {
    case vk::Result::eSuccess:
        break;
@ -90,15 +97,21 @@ void Swapchain::AcquireNextImage() {
        is_outdated = true;
        break;
    default:
-        LOG_ERROR(Render_Vulkan, "acquireNextImageKHR returned unknown result");
+        LOG_ERROR(Render_Vulkan, "vkAcquireNextImageKHR returned unknown result");
        break;
    }
 }

 void Swapchain::Present() {
-    const auto present_queue = g_vk_instace->GetPresentQueue();
+    const vk::PresentInfoKHR present_info = {
+        .waitSemaphoreCount = 1,
+        .pWaitSemaphores = &render_finished,
+        .swapchainCount = 1,
+        .pSwapchains = &swapchain,
+        .pImageIndices = &current_image
+    };

-    vk::PresentInfoKHR present_info(render_finished, swapchain, image_index);
+    vk::Queue present_queue = instance.GetPresentQueue();
    vk::Result result = present_queue.presentKHR(present_info);

    switch (result) {
@ -115,91 +128,68 @@ void Swapchain::Present() {
        break;
    }

-    frame_index = (frame_index + 1) % swapchain_images.size();
+    current_frame = (current_frame + 1) % images.size();
 }

-void Swapchain::PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height) {
-    auto gpu = g_vk_instace->GetPhysicalDevice();
+void Swapchain::Configure(u32 width, u32 height) {
+    vk::PhysicalDevice physical = instance.GetPhysicalDevice();

    // Choose surface format
-    auto formats = gpu.getSurfaceFormatsKHR(surface);
-    details.format = formats[0];
+    auto formats = physical.getSurfaceFormatsKHR(surface);
+    surface_format = formats[0];

    if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) {
-        details.format = { vk::Format::eB8G8R8A8Unorm };
-    }
-    else {
-        for (const auto& format : formats) {
-            if (format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
-                format.format == vk::Format::eB8G8R8A8Unorm) {
-                details.format = format;
-                break;
-            }
+        surface_format = vk::SurfaceFormatKHR{
+            .format = vk::Format::eB8G8R8A8Unorm
+        };
+    } else {
+        auto iter = std::find_if(formats.begin(), formats.end(), [](vk::SurfaceFormatKHR format) -> bool {
+            return format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear &&
+                format.format == vk::Format::eB8G8R8A8Unorm;
+        });
+
+        if (iter == formats.end()) {
+            LOG_CRITICAL(Render_Vulkan, "Unable to find required swapchain format!");
        }
    }

    // Checks if a particular mode is supported, if it is, returns that mode.
-    auto modes = gpu.getSurfacePresentModesKHR(surface);
-    auto ModePresent = [&modes](vk::PresentModeKHR check_mode) {
-        auto it = std::find_if(modes.begin(), modes.end(), [check_mode](const auto& mode) {
-                return check_mode == mode;
-        });
-
-        return it != modes.end();
-    };
+    auto modes = physical.getSurfacePresentModesKHR(surface);

    // FIFO is guaranteed by the Vulkan standard to be available
-    details.present_mode = vk::PresentModeKHR::eFifo;
+    present_mode = vk::PresentModeKHR::eFifo;
+
+    auto iter = std::find_if(modes.begin(), modes.end(), [](vk::PresentModeKHR mode) {
+        return vk::PresentModeKHR::eMailbox == mode;
+    });

    // Prefer Mailbox if present for lowest latency
-    if (ModePresent(vk::PresentModeKHR::eMailbox)) {
-        details.present_mode = vk::PresentModeKHR::eMailbox;
+    if (iter != modes.end()) {
+        present_mode = vk::PresentModeKHR::eMailbox;
    }

    // Query surface extent
-    auto capabilities = gpu.getSurfaceCapabilitiesKHR(surface);
-    details.extent = capabilities.currentExtent;
+    auto capabilities = physical.getSurfaceCapabilitiesKHR(surface);
+    extent = capabilities.currentExtent;

    if (capabilities.currentExtent.width == std::numeric_limits<u32>::max()) {
-        details.extent.width = std::clamp(width, capabilities.minImageExtent.width,
+        extent.width = std::clamp(width, capabilities.minImageExtent.width,
                                          capabilities.maxImageExtent.width);
-        details.extent.height = std::clamp(height, capabilities.minImageExtent.height,
+        extent.height = std::clamp(height, capabilities.minImageExtent.height,
                                           capabilities.maxImageExtent.height);
    }

    // Select number of images in swap chain, we prefer one buffer in the background to work on
-    details.image_count = capabilities.minImageCount + 1;
+    image_count = capabilities.minImageCount + 1;
    if (capabilities.maxImageCount > 0) {
-        details.image_count = std::min(details.image_count, capabilities.maxImageCount);
+        image_count = std::min(image_count, capabilities.maxImageCount);
    }

    // Prefer identity transform if possible
-    details.transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
-    if (!(capabilities.supportedTransforms & details.transform)) {
-        details.transform = capabilities.currentTransform;
+    transform = vk::SurfaceTransformFlagBitsKHR::eIdentity;
+    if (!(capabilities.supportedTransforms & transform)) {
+        transform = capabilities.currentTransform;
    }
 }

-void Swapchain::SetupImages() {
-    // Get the swap chain images
-    auto device = g_vk_instace->GetDevice();
-    auto images = device.getSwapchainImagesKHR(swapchain);
-
-    Texture::Info image_info{
-        .width = details.extent.width,
-        .height = details.extent.height,
-        .format = details.format.format,
-        .type = vk::ImageType::e2D,
-        .view_type = vk::ImageViewType::e2D,
-        .usage = vk::ImageUsageFlagBits::eColorAttachment
-    };
-
-    // Create the swapchain buffers containing the image and imageview
-    swapchain_images.resize(images.size());
-    for (int i = 0; i < swapchain_images.size(); i++) {
-        // Wrap swapchain images with Texture
-        swapchain_images[i].Adopt(image_info, images[i]);
-    }
-}
-
-} // namespace Vulkan
+} // namespace VideoCore::Vulkan
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@ -4,62 +4,90 @@

 #pragma once

-#include <string_view>
 #include <vector>
-#include "core/frontend/emu_window.h"
-#include "video_core/renderer_vulkan/vk_texture.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/vk_common.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-struct SwapChainDetails {
-    vk::SurfaceFormatKHR format;
+class Instance;
+
+class Swapchain {
+public:
+    Swapchain(Instance& instance, vk::SurfaceKHR surface);
+    ~Swapchain();
+
+    /// Creates (or recreates) the swapchain with a given size.
+    void Create(u32 width, u32 height, bool vsync_enabled);
+
+    /// Acquire the next image in the swapchain.
+    void AcquireNextImage();
+
+    /// Present the current image and move to the next one
+    void Present();
+
+    /// Return current swapchain state
+    inline vk::Extent2D GetExtent() const {
+        return extent;
+    }
+
+    /// Return the swapchain surface
+    inline vk::SurfaceKHR GetSurface() const {
+        return surface;
+    }
+
+    /// Return the swapchain format
+    inline vk::SurfaceFormatKHR GetSurfaceFormat() const {
+        return surface_format;
+    }
+
+    /// Return the Vulkan swapchain handle
+    inline vk::SwapchainKHR GetHandle() const {
+        return swapchain;
+    }
+
+    /// Return the semaphore that will be signaled when vkAcquireNextImageKHR completes
+    inline vk::Semaphore GetAvailableSemaphore() const {
+        return image_available;
+    }
+
+    /// Return the semaphore that will signal when the current image will be presented
+    inline vk::Semaphore GetPresentSemaphore() const {
+        return render_finished;
+    }
+
+    /// Return the current swapchain image
+    inline vk::Image GetCurrentImage() {
+        return images[current_image];
+    }
+
+    /// Returns true when the swapchain should be recreated
+    inline bool NeedsRecreation() const {
+        return is_suboptimal || is_outdated;
+    }
+
+private:
+    void Configure(u32 width, u32 height);
+
+private:
+    Instance& instance;
+    vk::SwapchainKHR swapchain = VK_NULL_HANDLE;
+    vk::SurfaceKHR surface = VK_NULL_HANDLE;
+
+    // Swapchain properties
+    vk::SurfaceFormatKHR surface_format;
    vk::PresentModeKHR present_mode;
    vk::Extent2D extent;
    vk::SurfaceTransformFlagBitsKHR transform;
    u32 image_count;
-};

-class Swapchain {
-public:
-    Swapchain(vk::SurfaceKHR surface);
-    ~Swapchain();
-
-    /// Creates (or recreates) the swapchain with a given size.
-    bool Create(u32 width, u32 height, bool vsync_enabled);
-
-    /// Acquire the next image in the swapchain.
-    void AcquireNextImage();
-    void Present();
-
-    /// Returns true when the swapchain needs to be recreated.
-    bool NeedsRecreation() const { return IsSubOptimal() || IsOutDated(); }
-    bool IsOutDated() const { return is_outdated; }
-    bool IsSubOptimal() const { return is_suboptimal; }
-    bool IsVSyncEnabled() const { return vsync_enabled; }
-    u32 GetCurrentImageIndex() const { return image_index; }
-
-    /// Get current swapchain state
-    vk::Extent2D GetSize() const { return details.extent; }
-    vk::SurfaceKHR GetSurface() const { return surface; }
-    vk::SurfaceFormatKHR GetSurfaceFormat() const { return details.format; }
-    vk::SwapchainKHR GetSwapChain() const { return swapchain; }
-    const vk::Semaphore& GetAvailableSemaphore() const { return image_available; }
-    const vk::Semaphore& GetRenderSemaphore() const { return render_finished; }
-    Texture& GetCurrentImage() { return swapchain_images[image_index]; }
-
-private:
-    void PopulateSwapchainDetails(vk::SurfaceKHR surface, u32 width, u32 height);
-    void SetupImages();
-
-private:
-    SwapChainDetails details{};
-    vk::SurfaceKHR surface;
+    // Swapchain state
+    std::vector<vk::Image> images;
    vk::Semaphore image_available, render_finished;
-    bool vsync_enabled{false}, is_outdated{true}, is_suboptimal{true};
-
-    vk::SwapchainKHR swapchain{VK_NULL_HANDLE};
-    std::vector<Texture> swapchain_images;
-    u32 image_index{0}, frame_index{0};
+    u32 current_image = 0, current_frame = 0;
+    bool vsync_enabled = false;
+    bool is_outdated = true;
+    bool is_suboptimal = true;
 };

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp
@ -2,232 +2,185 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

+#define VULKAN_HPP_NO_CONSTRUCTORS
+#include "common/logging/log.h"
 #include "video_core/renderer_vulkan/vk_task_scheduler.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
-#include "video_core/renderer_vulkan/vk_state.h"
-#include "video_core/renderer_vulkan/vk_swapchain.h"
-#include "common/assert.h"
-#include "common/thread.h"
+#include "video_core/renderer_vulkan/vk_buffer.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-TaskScheduler::~TaskScheduler() {
+// 16MB should be enough for a single frame
+constexpr BufferInfo STAGING_INFO = {
+    .capacity = 16 * 1024 * 1024,
+    .usage = BufferUsage::Staging
+};
+
+CommandScheduler::CommandScheduler(Instance& instance) : instance(instance) {
+
+}
+
+CommandScheduler::~CommandScheduler() {
    // Destroy Vulkan resources
-    auto device = g_vk_instace->GetDevice();
-    device.waitIdle();
+    vk::Device device = instance.GetDevice();
+    VmaAllocator allocator = instance.GetAllocator();

-    for (auto& task : tasks) {
-        task.staging.Destroy();
-        device.destroyDescriptorPool(task.pool);
+    for (auto& command : commands) {
+        device.destroyFence(command.fence);
+
+        // Clean up any scheduled resources
+        for (auto& func : command.cleanups) {
+            func(device, allocator);
+        }
    }

-    SyncToGPU();
    device.destroyCommandPool(command_pool);
-    device.destroySemaphore(timeline);
 }

-std::tuple<u8*, u32> TaskScheduler::RequestStaging(u32 size) {
-    auto& task = tasks[current_task];
-    if (size > STAGING_BUFFER_SIZE - task.current_offset) {
-        // If we run out of space, allocate a new buffer.
-        // The old one will be safely destroyed when the task finishes
-        task.staging.Recreate();
-        task.current_offset = 0;
-
-        return std::make_tuple(task.staging.GetHostPointer(), 0);
-    }
-
-    u8* ptr = task.staging.GetHostPointer() + task.current_offset;
-    std::memset(ptr, 0, size);
-
-    task.current_offset += size;
-    return std::make_tuple(ptr, task.current_offset - size);
-}
-
-Buffer& TaskScheduler::GetStaging() {
-    return tasks[current_task].staging;
-}
-
-bool TaskScheduler::Create() {
-    auto device = g_vk_instace->GetDevice();
-
-    // Create command pool
-    vk::CommandPoolCreateInfo pool_info(vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
-                                        g_vk_instace->GetGraphicsQueueFamilyIndex());
-    command_pool = device.createCommandPool(pool_info);
-
-    // Create timeline semaphore for syncronization
-    vk::SemaphoreTypeCreateInfo timeline_info{vk::SemaphoreType::eTimeline, 0};
-    vk::SemaphoreCreateInfo semaphore_info{{}, &timeline_info};
-
-    timeline = device.createSemaphore(semaphore_info);
-
-    Buffer::Info staging_info{
-        .size = STAGING_BUFFER_SIZE,
-        .properties = vk::MemoryPropertyFlagBits::eHostVisible |
-                      vk::MemoryPropertyFlagBits::eHostCoherent,
-        .usage = vk::BufferUsageFlagBits::eTransferSrc |
-                vk::BufferUsageFlagBits::eTransferDst
+bool CommandScheduler::Create() {
+    vk::Device device = instance.GetDevice();
+    const vk::CommandPoolCreateInfo pool_info = {
+        .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
+        .queueFamilyIndex = instance.GetGraphicsQueueFamilyIndex()
    };

-    // Should be enough for a single frame
-    const vk::DescriptorPoolSize pool_size{vk::DescriptorType::eCombinedImageSampler, 64};
-    vk::DescriptorPoolCreateInfo pool_create_info{{}, 1024, pool_size};
+    // Create command pool
+    command_pool = device.createCommandPool(pool_info);

-    for (auto& task : tasks) {
-        // Create command buffers
-        vk::CommandBufferAllocateInfo buffer_info{command_pool, vk::CommandBufferLevel::ePrimary, 2};
-        auto buffers = device.allocateCommandBuffers(buffer_info);
-        std::ranges::copy_n(buffers.begin(), 2, task.command_buffers.begin());
+    vk::CommandBufferAllocateInfo buffer_info = {
+        .commandPool = command_pool,
+        .level = vk::CommandBufferLevel::ePrimary,
+        .commandBufferCount = 2 * SCHEDULER_COMMAND_COUNT
+    };

-        // Create staging buffer
-        task.staging.Create(staging_info);
+    // Allocate all command buffers
+    const auto command_buffers = device.allocateCommandBuffers(buffer_info);

-        // Create descriptor pool
-        task.pool = device.createDescriptorPool(pool_create_info);
+    // Initialize command slots
+    for (std::size_t i = 0; i < commands.size(); i++) {
+        commands[i] = CommandSlot{
+            .render_command_buffer = command_buffers[2 * i],
+            .upload_command_buffer = command_buffers[2 * i + 1],
+            .fence = device.createFence({}),
+            .upload_buffer = std::make_unique<Buffer>(instance, *this, STAGING_INFO)
+        };
    }

    return true;
 }

-vk::CommandBuffer TaskScheduler::GetRenderCommandBuffer() const {
-    const auto& task = tasks[current_task];
-    return task.command_buffers[1];
-}
-
-vk::CommandBuffer TaskScheduler::GetUploadCommandBuffer() {
-    auto& task = tasks[current_task];
-    if (!task.use_upload_buffer) {
-        auto& cmdbuffer = task.command_buffers[0];
-        cmdbuffer.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
-        task.use_upload_buffer = true;
-    }
-
-    return task.command_buffers[0];
-}
-
-vk::DescriptorPool TaskScheduler::GetDescriptorPool() const {
-    const auto& task = tasks[current_task];
-    return task.pool;
-}
-
-void TaskScheduler::SyncToGPU(u64 task_index) {
-    // No need to sync if the GPU already has finished the task
-    auto tick = GetGPUTick();
-    if (tasks[task_index].task_id <= tick) {
+void CommandScheduler::Synchronize() {
+    // Don't synchronize the same command twicec
+    CommandSlot& command = commands[current_command];
+    if (command.fence_counter <= completed_fence_counter) {
        return;
    }

-    // Wait for the task to complete
-    vk::SemaphoreWaitInfo wait_info{{}, timeline, tasks[task_index].task_id};
-    auto result = g_vk_instace->GetDevice().waitSemaphores(wait_info, UINT64_MAX);
-
-    if (result != vk::Result::eSuccess) {
-        LOG_CRITICAL(Render_Vulkan, "Failed waiting for timeline semaphore!");
+    // Wait for this command buffer to be completed.
+    vk::Device device = instance.GetDevice();
+    if (device.waitForFences(command.fence, true, UINT64_MAX) != vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "Waiting for fences failed!");
    }
+
+    // Cleanup resources for command buffers that have completed along with the current one
+    const u64 now_fence_counter = command.fence_counter;
+    VmaAllocator allocator = instance.GetAllocator();
+    for (CommandSlot& command : commands) {
+        if (command.fence_counter < now_fence_counter &&
+            command.fence_counter > completed_fence_counter) {
+            for (auto& func: command.cleanups) {
+                func(device, allocator);
+            }
+
+            command.cleanups.clear();
+        }
+    }
+
+    completed_fence_counter = now_fence_counter;
 }

-void TaskScheduler::SyncToGPU() {
-    SyncToGPU(current_task);
-}
-
-u64 TaskScheduler::GetCPUTick() const {
-    return current_task_id;
-}
-
-u64 TaskScheduler::GetGPUTick() const {
-    auto device = g_vk_instace->GetDevice();
-    return device.getSemaphoreCounterValue(timeline);
-}
-
-void TaskScheduler::Submit(bool wait_completion, bool present, Swapchain* swapchain) {
-    // End the current task recording.
-    auto& task = tasks[current_task];
+void CommandScheduler::Submit(bool wait_completion,
+                              vk::Semaphore wait_semaphore,
+                              vk::Semaphore signal_semaphore) {
+    const CommandSlot& command = commands[current_command];

    // End command buffers
-    task.command_buffers[1].end();
-    if (task.use_upload_buffer) {
-        task.command_buffers[0].end();
+    command.render_command_buffer.end();
+    if (command.use_upload_buffer) {
+        command.upload_command_buffer.end();
    }

-    const u32 num_signal_semaphores = present ? 2U : 1U;
-    const std::array signal_values{task.task_id, u64(0)};
-    std::array signal_semaphores{timeline, vk::Semaphore{}};
-
-    const u32 num_wait_semaphores = present ? 2U : 1U;
-    const std::array wait_values{task.task_id - 1, u64(1)};
-    std::array wait_semaphores{timeline, vk::Semaphore{}};
-
-    // When the task completes the timeline will increment to the task id
-    const vk::TimelineSemaphoreSubmitInfoKHR timeline_si{num_wait_semaphores, wait_values.data(),
-                                                         num_signal_semaphores, signal_values.data()};
-
-    static constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks{
+    constexpr std::array<vk::PipelineStageFlags, 2> wait_stage_masks{
        vk::PipelineStageFlagBits::eAllCommands,
        vk::PipelineStageFlagBits::eColorAttachmentOutput,
    };

-    const u32 cmdbuffer_count = task.use_upload_buffer ? 2u : 1u;
-    const vk::SubmitInfo submit_info{num_wait_semaphores, wait_semaphores.data(), wait_stage_masks.data(), cmdbuffer_count,
-                                     &task.command_buffers[2 - cmdbuffer_count], num_signal_semaphores, signal_semaphores.data(),
-                                     &timeline_si};
+    const u32 signal_semaphore_count = signal_semaphore ? 1u : 0u;
+    const u32 wait_semaphore_count = wait_semaphore ? 1u : 0u;
+    const u32 command_buffer_count = command.use_upload_buffer ? 2u : 1u;
+    const std::array command_buffers = { command.render_command_buffer,
+                                         command.upload_command_buffer };

-    // Wait for new swapchain image
-    if (present) {
-        signal_semaphores[1] = swapchain->GetRenderSemaphore();
-        wait_semaphores[1] = swapchain->GetAvailableSemaphore();
-    }
+    // Prepeare submit info
+    const vk::SubmitInfo submit_info = {
+        .waitSemaphoreCount = wait_semaphore_count,
+        .pWaitSemaphores = &wait_semaphore,
+        .pWaitDstStageMask = wait_stage_masks.data(),
+        .commandBufferCount = command_buffer_count,
+        .pCommandBuffers = command_buffers.data(),
+        .signalSemaphoreCount = signal_semaphore_count,
+        .pSignalSemaphores = &signal_semaphore,
+    };

    // Submit the command buffer
-    auto queue = g_vk_instace->GetGraphicsQueue();
-    queue.submit(submit_info);
-
-    // Present the image when rendering has finished
-    if (present) {
-        swapchain->Present();
-    }
+    vk::Queue queue = instance.GetGraphicsQueue();
+    queue.submit(submit_info, command.fence);

    // Block host until the GPU catches up
    if (wait_completion) {
-        SyncToGPU();
+        Synchronize();
    }

    // Switch to next cmdbuffer.
-    BeginTask();
+    SwitchSlot();
 }

-void TaskScheduler::Schedule(std::function<void()> func) {
-    auto& task = tasks[current_task];
-    task.cleanups.push_back(func);
+void CommandScheduler::Schedule(Deleter&& func) {
+    auto& command = commands[current_command];
+    command.cleanups.push_back(func);
 }

-void TaskScheduler::BeginTask() {
-    u32 next_task_index = (current_task + 1) % TASK_COUNT;
-    auto& task = tasks[next_task_index];
-    auto device = g_vk_instace->GetDevice();
+vk::CommandBuffer CommandScheduler::GetUploadCommandBuffer() {
+    CommandSlot& command = commands[current_command];
+    if (!command.use_upload_buffer) {
+        const vk::CommandBufferBeginInfo begin_info = {
+            .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
+        };

-    // Wait for the GPU to finish with all resources for this task.
-    SyncToGPU(next_task_index);
-
-    // Delete all resources that can be freed now
-    for (auto& func : task.cleanups) {
-        func();
+        command.upload_command_buffer.begin(begin_info);
+        command.use_upload_buffer = true;
    }

-    device.resetDescriptorPool(task.pool);
-    task.command_buffers[1].begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit});
-
-    // Move to the next command buffer.
-    current_task = next_task_index;
-    task.task_id = ++current_task_id;
-    task.current_offset = 0;
-    task.use_upload_buffer = false;
-    task.cleanups.clear();
-
-    auto& state = VulkanState::Get();
-    state.InitDescriptorSets();
+    return command.upload_command_buffer;
 }

-std::unique_ptr<TaskScheduler> g_vk_task_scheduler;
+void CommandScheduler::SwitchSlot() {
+    current_command = (current_command + 1) % SCHEDULER_COMMAND_COUNT;
+    CommandSlot& command = commands[current_command];
+
+    // Wait for the GPU to finish with all resources for this command.
+    Synchronize();
+
+    const vk::CommandBufferBeginInfo begin_info = {
+        .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit
+    };
+
+    // Move to the next command buffer.
+    vk::Device device = instance.GetDevice();
+    device.resetFences(command.fence);
+    command.render_command_buffer.begin(begin_info);
+    command.fence_counter = next_fence_counter++;
+    command.use_upload_buffer = false;
+}

 }  // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_task_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h
@ -4,68 +4,81 @@

 #pragma once

+#include <memory>
 #include <array>
-#include "video_core/renderer_vulkan/vk_buffer.h"
+#include <functional>
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/vk_common.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-constexpr u32 TASK_COUNT = 5;
-constexpr u32 STAGING_BUFFER_SIZE = 16 * 1024 * 1024;
+constexpr u32 SCHEDULER_COMMAND_COUNT = 4;

-class Swapchain;
+using Deleter = std::function<void(vk::Device, VmaAllocator)>;

-/// Wrapper class around command buffer execution. Handles an arbitrary
-/// number of tasks that can be submitted concurrently. This allows the host
-/// to start recording the next frame while the GPU is working on the
-/// current one. Larger values can be used with caution, as they can cause
-/// frame latency if the CPU is too far ahead of the GPU
-class TaskScheduler {
+class Buffer;
+class Instance;
+
+class CommandScheduler {
 public:
-    TaskScheduler() = default;
-    ~TaskScheduler();
+    CommandScheduler(Instance& instance);
+    ~CommandScheduler();

    /// Create and initialize the work scheduler
    bool Create();

-    /// Retrieve either of the current frame's command buffers
-    vk::CommandBuffer GetRenderCommandBuffer() const;
+    /// Block host until the current command completes execution
+    void Synchronize();
+
+    /// Defer operation until the current command completes execution
+    void Schedule(Deleter&& func);
+
+    /// Submits the current command to the graphics queue
+    void Submit(bool wait_completion = false, vk::Semaphore wait = VK_NULL_HANDLE,
+                vk::Semaphore signal = VK_NULL_HANDLE);
+
+    /// Returns the command buffer used for early upload operations.
+    /// This is useful for vertex/uniform buffer uploads that happen once per frame
    vk::CommandBuffer GetUploadCommandBuffer();
-    vk::DescriptorPool GetDescriptorPool() const;

-    /// Access the staging buffer of the current task
-    std::tuple<u8*, u32> RequestStaging(u32 size);
-    Buffer& GetStaging();
+    /// Returns the command buffer used for rendering
+    inline vk::CommandBuffer GetRenderCommandBuffer() const {
+        const CommandSlot& command = commands[current_command];
+        return command.render_command_buffer;
+    }

-    /// Query and/or synchronization CPU and GPU
-    u64 GetCPUTick() const;
-    u64 GetGPUTick() const;
-    void SyncToGPU();
-    void SyncToGPU(u64 task_index);
+    /// Returns the upload buffer of the active command slot
+    inline Buffer& GetCommandUploadBuffer() {
+        CommandSlot& command = commands[current_command];
+        return *command.upload_buffer;
+    }

-    void Schedule(std::function<void()> func);
-    void Submit(bool wait_completion = false, bool present = false, Swapchain* swapchain = nullptr);
-
-    void BeginTask();
+    /// Returns the index of the current command slot
+    inline u32 GetCurrentSlotIndex() const {
+        return current_command;
+    }

 private:
-    struct Task {
+    /// Activates the next command slot and optionally waits for its completion
+    void SwitchSlot();
+
+private:
+    Instance& instance;
+    u64 next_fence_counter = 1;
+    u64 completed_fence_counter = 0;
+
+    struct CommandSlot {
        bool use_upload_buffer = false;
-        u64 current_offset = 0, task_id = 0;
-        std::array<vk::CommandBuffer, 2> command_buffers;
-        std::vector<std::function<void()>> cleanups;
-        vk::DescriptorPool pool;
-        Buffer staging;
+        u64 fence_counter = 0;
+        vk::CommandBuffer render_command_buffer, upload_command_buffer;
+        vk::Fence fence = VK_NULL_HANDLE;
+        std::unique_ptr<Buffer> upload_buffer;
+        std::vector<Deleter> cleanups;
    };

-    vk::Semaphore timeline;
-    vk::CommandPool command_pool;
-    u64 current_task_id = 0;
-
-    // Each task contains unique resources
-    std::array<Task, TASK_COUNT> tasks;
-    u64 current_task = -1;
+    vk::CommandPool command_pool = VK_NULL_HANDLE;
+    std::array<CommandSlot, SCHEDULER_COMMAND_COUNT> commands;
+    u32 current_command = 0;
 };

-extern std::unique_ptr<TaskScheduler> g_vk_task_scheduler;
-
 }  // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_texture.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture.cpp
@ -2,288 +2,229 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.

-#include <fstream>
-#include <iostream>
+#define VULKAN_HPP_NO_CONSTRUCTORS
 #include "common/assert.h"
 #include "common/logging/log.h"
+#include "video_core/renderer_vulkan/pica_to_vulkan.h"
+#include "video_core/renderer_vulkan/vk_buffer.h"
 #include "video_core/renderer_vulkan/vk_texture.h"
-#include "video_core/renderer_vulkan/vk_task_scheduler.h"
-#include "video_core/renderer_vulkan/vk_state.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
+#include "video_core/renderer_vulkan/vk_task_scheduler.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-static int BytesPerPixel(vk::Format format) {
+inline vk::Format ToVkFormat(TextureFormat format) {
    switch (format) {
-    case vk::Format::eD32SfloatS8Uint:
-        return 5;
-    case vk::Format::eD32Sfloat:
-    case vk::Format::eB8G8R8A8Unorm:
-    case vk::Format::eR8G8B8A8Uint:
-    case vk::Format::eR8G8B8A8Unorm:
-    case vk::Format::eD24UnormS8Uint:
-        return 4;
-    case vk::Format::eR8G8B8Unorm:
-    case vk::Format::eR8G8B8Srgb:
-        return 3;
-    case vk::Format::eR5G6B5UnormPack16:
-    case vk::Format::eR5G5B5A1UnormPack16:
-    case vk::Format::eR4G4B4A4UnormPack16:
-    case vk::Format::eD16Unorm:
-        return 2;
+    case TextureFormat::RGBA8:
+        return vk::Format::eR8G8B8A8Unorm;
+    case TextureFormat::RGB8:
+        return vk::Format::eR8G8B8Unorm;
+    case TextureFormat::RGB5A1:
+        return vk::Format::eR5G5B5A1UnormPack16;
+    case TextureFormat::RGB565:
+        return vk::Format::eR5G6B5UnormPack16;
+    case TextureFormat::RGBA4:
+        return vk::Format::eR4G4B4A4UnormPack16;
+    case TextureFormat::D16:
+        return vk::Format::eD16Unorm;
+    case TextureFormat::D24:
+        return vk::Format::eX8D24UnormPack32;
+    case TextureFormat::D24S8:
+        return vk::Format::eD24UnormS8Uint;
    default:
-        UNREACHABLE();
+        LOG_ERROR(Render_Vulkan, "Unknown texture format {}!", format);
+        return vk::Format::eUndefined;
    }
 }

-vk::ImageAspectFlags GetImageAspect(vk::Format format) {
-    vk::ImageAspectFlags flags;
-    switch (format) {
-    case vk::Format::eD16UnormS8Uint:
-    case vk::Format::eD24UnormS8Uint:
-    case vk::Format::eD32SfloatS8Uint:
-        flags = vk::ImageAspectFlagBits::eStencil | vk::ImageAspectFlagBits::eDepth;
-        break;
-    case vk::Format::eD16Unorm:
-    case vk::Format::eD32Sfloat:
-        flags = vk::ImageAspectFlagBits::eDepth;
-        break;
+inline vk::ImageType ToVkImageType(TextureType type) {
+    switch (type) {
+    case TextureType::Texture1D:
+        return vk::ImageType::e1D;
+    case TextureType::Texture2D:
+        return vk::ImageType::e2D;
+    case TextureType::Texture3D:
+        return vk::ImageType::e3D;
    default:
-        flags = vk::ImageAspectFlagBits::eColor;
+        LOG_ERROR(Render_Vulkan, "Unknown texture type {}!", type);
+        return vk::ImageType::e2D;
    }
+}

-    return flags;
+inline vk::ImageViewType ToVkImageViewType(TextureViewType view_type) {
+    switch (view_type) {
+    case TextureViewType::View1D:
+        return vk::ImageViewType::e1D;
+    case TextureViewType::View2D:
+        return vk::ImageViewType::e2D;
+    case TextureViewType::View3D:
+        return vk::ImageViewType::e3D;
+    case TextureViewType::ViewCube:
+        return vk::ImageViewType::eCube;
+    case TextureViewType::View1DArray:
+        return vk::ImageViewType::e1DArray;
+    case TextureViewType::View2DArray:
+        return vk::ImageViewType::e2DArray;
+    case TextureViewType::ViewCubeArray:
+        return vk::ImageViewType::eCubeArray;
+    default:
+        LOG_ERROR(Render_Vulkan, "Unknown texture view type {}!", view_type);
+        return vk::ImageViewType::e2D;
+    }
+}
+
+Texture::Texture(Instance& instance, CommandScheduler& scheduler) :
+    instance(instance), scheduler(scheduler) {}
+
+Texture::Texture(Instance& instance, CommandScheduler& scheduler,
+                 const TextureInfo& info) : TextureBase(info),
+    instance(instance), scheduler(scheduler) {
+
+    // Convert the input format to another that supports attachments
+    advertised_format = ToVkFormat(info.format);
+    internal_format = instance.GetFormatAlternative(advertised_format);
+    aspect = GetImageAspect(advertised_format);
+
+    vk::Device device = instance.GetDevice();
+    const vk::ImageCreateInfo image_info = {
+        .flags = info.view_type == TextureViewType::ViewCube ?
+                 vk::ImageCreateFlagBits::eCubeCompatible :
+                 vk::ImageCreateFlags{},
+        .imageType = ToVkImageType(info.type),
+        .format = internal_format,
+        .extent = {info.width, info.height, 1},
+        .mipLevels = info.levels,
+        .arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u,
+        .samples = vk::SampleCountFlagBits::e1,
+        .usage = GetImageUsage(aspect),
+    };
+
+    const VmaAllocationCreateInfo alloc_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE
+    };
+
+    VkImage unsafe_image = VK_NULL_HANDLE;
+    VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
+    VmaAllocator allocator = instance.GetAllocator();
+
+    // Allocate texture memory
+    vmaCreateImage(allocator, &unsafe_image_info, &alloc_info, &unsafe_image, &allocation, nullptr);
+    image = vk::Image{unsafe_image};
+
+    const vk::ImageViewCreateInfo view_info = {
+        .image = image,
+        .viewType = ToVkImageViewType(info.view_type),
+        .format = internal_format,
+        .subresourceRange = {aspect, 0, info.levels, 0, 1}
+    };
+
+    // Create image view
+    image_view = device.createImageView(view_info);
+}
+
+Texture::Texture(Instance& instance, CommandScheduler& scheduler,
+                 vk::Image image, const TextureInfo& info) : TextureBase(info),
+    instance(instance), scheduler(scheduler), image(image),
+    is_texture_owned(false) {
+
+    const vk::ImageViewCreateInfo view_info = {
+        .image = image,
+        .viewType = ToVkImageViewType(info.view_type),
+        .format = internal_format,
+        .subresourceRange = {aspect, 0, info.levels, 0, 1}
+    };
+
+    // Create image view
+    vk::Device device = instance.GetDevice();
+    image_view = device.createImageView(view_info);
 }

 Texture::~Texture() {
-    Destroy();
-}
-
-Texture::Texture(Texture&& other) noexcept {
-    info = std::exchange(other.info, Info{});
-    texture = std::exchange(other.texture, VK_NULL_HANDLE);
-    aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone);
-    view = std::exchange(other.view, VK_NULL_HANDLE);
-    memory = std::exchange(other.memory, VK_NULL_HANDLE);
-    image_size = std::exchange(other.image_size, 0);
-    adopted = std::exchange(other.adopted, false);
-    is_rgb = std::exchange(other.is_rgb, false);
-    is_d24s8 = std::exchange(other.is_d24s8, false);
-}
-
-Texture& Texture::operator=(Texture&& other) noexcept {
-    Destroy();
-    info = std::exchange(other.info, Info{});
-    texture = std::exchange(other.texture, VK_NULL_HANDLE);
-    aspect = std::exchange(other.aspect, vk::ImageAspectFlagBits::eNone);
-    view = std::exchange(other.view, VK_NULL_HANDLE);
-    memory = std::exchange(other.memory, VK_NULL_HANDLE);
-    image_size = std::exchange(other.image_size, 0);
-    adopted = std::exchange(other.adopted, false);
-    is_rgb = std::exchange(other.is_rgb, false);
-    is_d24s8 = std::exchange(other.is_d24s8, false);
-    return *this;
-}
-
-void Texture::Create(const Info& create_info) {
-    auto device = g_vk_instace->GetDevice();
-    info = create_info;
-
-    // Emulate RGB8 format with RGBA8
-    is_rgb = false;
-    if (info.format == vk::Format::eR8G8B8Unorm) {
-        is_rgb = true;
-        info.format = vk::Format::eR8G8B8A8Unorm;
-    }
-
-    is_d24s8 = false;
-    if (info.format == vk::Format::eD24UnormS8Uint) {
-        is_d24s8 = true;
-        info.format = vk::Format::eD32SfloatS8Uint;
-    }
-
-    // Create the texture
-    image_size = info.width * info.height * BytesPerPixel(info.format);
-    aspect = GetImageAspect(info.format);
-
-    vk::ImageCreateFlags flags{};
-    if (info.view_type == vk::ImageViewType::eCube) {
-        flags = vk::ImageCreateFlagBits::eCubeCompatible;
-    }
-
-    vk::ImageCreateInfo image_info {
-        flags, info.type, info.format,
-        { info.width, info.height, 1 }, info.levels, info.layers,
-        static_cast<vk::SampleCountFlagBits>(info.multisamples),
-        vk::ImageTiling::eOptimal, info.usage
-    };
-
-    texture = device.createImage(image_info);
-
-    // Create texture memory
-    auto requirements = device.getImageMemoryRequirements(texture);
-    auto memory_index = Buffer::FindMemoryType(requirements.memoryTypeBits,
-                                                 vk::MemoryPropertyFlagBits::eDeviceLocal);
-    vk::MemoryAllocateInfo alloc_info(requirements.size, memory_index);
-
-    memory = device.allocateMemory(alloc_info);
-    device.bindImageMemory(texture, memory, 0);
-
-    // Create texture view
-    vk::ImageViewCreateInfo view_info {
-        {}, texture, info.view_type, info.format, {},
-        {aspect, 0, info.levels, 0, info.layers}
-    };
-
-    view = device.createImageView(view_info);
-}
-
-void Texture::Create(Texture& other) {
-    auto info = other.info;
-    Create(info);
-
-    // Copy the buffer contents
-    auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
-    Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal);
-
-    auto old_layout = other.GetLayout();
-    other.Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal);
-
-    u32 copy_count = 0;
-    std::array<vk::ImageCopy, 16> copy_regions;
-
-    for (u32 i = 0; i < info.levels; i++) {
-        copy_regions[copy_count++] = vk::ImageCopy{
-            vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0},
-            vk::ImageSubresourceLayers{aspect, i, 0, 1}, {0},
-            {info.width, info.height, 0}
-        };
-    }
-
-    cmdbuffer.copyImage(other.GetHandle(), vk::ImageLayout::eTransferSrcOptimal,
-                        texture, vk::ImageLayout::eTransferDstOptimal, copy_count,
-                        copy_regions.data());
-
-    Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
-    other.Transition(cmdbuffer, old_layout);
-}
-
-void Texture::Adopt(const Info& create_info, vk::Image image) {
-    info = create_info;
-    image_size = info.width * info.height * BytesPerPixel(info.format);
-    aspect = GetImageAspect(info.format);
-    texture = image;
-
-    // Create texture view
-    vk::ImageViewCreateInfo view_info {
-        {}, texture, info.view_type, info.format, {},
-        {aspect, 0, info.levels, 0, info.layers}
-    };
-
-    auto device = g_vk_instace->GetDevice();
-    view = device.createImageView(view_info);
-    adopted = true;
-}
-
-void Texture::Destroy() {
-    if (texture && !adopted) {
-        // Make sure to unbind the texture before destroying it
-        auto& state = VulkanState::Get();
-        state.UnbindTexture(*this);
-
-        auto deleter = [texture = texture,
-                        view = view,
-                        memory = memory]() {
-            auto device = g_vk_instace->GetDevice();
-            if (texture) {
-                device.destroyImage(texture);
+    if (image && is_texture_owned) {
+        auto deleter = [image = image, allocation = allocation,
+                        view = image_view](vk::Device device, VmaAllocator allocator) {
            device.destroyImageView(view);
-                device.freeMemory(memory);
-            }
+            vmaDestroyImage(allocator, static_cast<VkImage>(image), allocation);
        };

-        // Schedule deletion of the texture after it's no longer used
-        // by the GPU
-        g_vk_task_scheduler->Schedule(deleter);
-    }
-
-    // If the image was adopted (probably from the swapchain) then only
-    // destroy the view
-    if (adopted) {
-        g_vk_task_scheduler->Schedule([view = view](){
-            auto device = g_vk_instace->GetDevice();
-            device.destroyImageView(view);
-        });
+        // Schedule deletion of the texture after it's no longer used by the GPU
+        scheduler.Schedule(deleter);
+    } else if (!is_texture_owned) {
+        // If the texture is not owning, destroy the view immediately as
+        // synchronization is the caller's responsibility
+        vk::Device device = instance.GetDevice();
+        device.destroyImageView(image_view);
    }
 }

-void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout) {
-    Transition(cmdbuffer, new_layout, 0, info.levels, 0, info.layers);
-}
+void Texture::Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
+                         u32 level, u32 level_count) {
+    ASSERT(level + level_count < TEXTURE_MAX_LEVELS);

-void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout,
-                           u32 start_level, u32 level_count, u32 start_layer, u32 layer_count) {
-    if (new_layout == layout) {
+    // Ensure all miplevels in the range have the same layout
+    vk::ImageLayout old_layout = layouts[level];
+    if (old_layout != vk::ImageLayout::eUndefined) {
+        for (u32 i = 0; i < level_count; i++) {
+            ASSERT(layouts[level + i] == old_layout);
+        }
+    }
+
+    // Don't do anything if the image is already in the wanted layout
+    if (new_layout == old_layout) {
        return;
    }

    struct LayoutInfo {
-        vk::ImageLayout layout;
        vk::AccessFlags access;
        vk::PipelineStageFlags stage;
    };

    // Get optimal transition settings for every image layout. Settings taken from Dolphin
-    auto layout_info = [](vk::ImageLayout layout) -> LayoutInfo {
-        LayoutInfo info{ .layout = layout };
+    auto GetLayoutInfo = [](vk::ImageLayout layout) -> LayoutInfo {
+        LayoutInfo info;
        switch (layout) {
        case vk::ImageLayout::eUndefined:
            // Layout undefined therefore contents undefined, and we don't care what happens to it.
            info.access = vk::AccessFlagBits::eNone;
            info.stage = vk::PipelineStageFlagBits::eTopOfPipe;
            break;
-
        case vk::ImageLayout::ePreinitialized:
            // Image has been pre-initialized by the host, so ensure all writes have completed.
            info.access = vk::AccessFlagBits::eHostWrite;
            info.stage = vk::PipelineStageFlagBits::eHost;
            break;
-
        case vk::ImageLayout::eColorAttachmentOptimal:
            // Image was being used as a color attachment, so ensure all writes have completed.
-            info.access = vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite;
+            info.access = vk::AccessFlagBits::eColorAttachmentRead |
+                    vk::AccessFlagBits::eColorAttachmentWrite;
            info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
            break;
-
        case vk::ImageLayout::eDepthStencilAttachmentOptimal:
            // Image was being used as a depthstencil attachment, so ensure all writes have completed.
-            info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead | vk::AccessFlagBits::eDepthStencilAttachmentWrite;
-            info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests | vk::PipelineStageFlagBits::eLateFragmentTests;
+            info.access = vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                    vk::AccessFlagBits::eDepthStencilAttachmentWrite;
+            info.stage = vk::PipelineStageFlagBits::eEarlyFragmentTests |
+                    vk::PipelineStageFlagBits::eLateFragmentTests;
            break;
-
        case vk::ImageLayout::ePresentSrcKHR:
            info.access = vk::AccessFlagBits::eNone;
            info.stage = vk::PipelineStageFlagBits::eBottomOfPipe;
            break;
-
        case vk::ImageLayout::eShaderReadOnlyOptimal:
            // Image was being used as a shader resource, make sure all reads have finished.
            info.access = vk::AccessFlagBits::eShaderRead;
            info.stage = vk::PipelineStageFlagBits::eFragmentShader;
            break;
-
        case vk::ImageLayout::eTransferSrcOptimal:
            // Image was being used as a copy source, ensure all reads have finished.
            info.access = vk::AccessFlagBits::eTransferRead;
            info.stage = vk::PipelineStageFlagBits::eTransfer;
            break;
-
        case vk::ImageLayout::eTransferDstOptimal:
            // Image was being used as a copy destination, ensure all writes have finished.
            info.access = vk::AccessFlagBits::eTransferWrite;
            info.stage = vk::PipelineStageFlagBits::eTransfer;
            break;
-
        default:
            LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
            UNREACHABLE();
@ -292,220 +233,286 @@ void Texture::Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout
        return info;
    };

+    LayoutInfo source = GetLayoutInfo(old_layout);
+    LayoutInfo dest = GetLayoutInfo(new_layout);
+
+    const vk::ImageMemoryBarrier barrier = {
+        .srcAccessMask = source.access,
+        .dstAccessMask = dest.access,
+        .oldLayout = old_layout,
+        .newLayout = new_layout,
+        .image = image,
+        .subresourceRange = {aspect, level, level_count, 0, 1}
+    };
+
    // Submit pipeline barrier
-    LayoutInfo source = layout_info(layout), dst = layout_info(new_layout);
-    vk::ImageMemoryBarrier barrier {
-        source.access, dst.access,
-        source.layout, dst.layout,
-        VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
-        texture,
-        vk::ImageSubresourceRange{aspect, start_level, level_count, start_layer, layer_count}
+    command_buffer.pipelineBarrier(source.stage, dest.stage,
+                                   vk::DependencyFlagBits::eByRegion,
+                                   {}, {}, barrier);
+
+    // Update layouts
+    SetLayout(new_layout, level, level_count);
+}
+
+void Texture::SetLayout(vk::ImageLayout new_layout, u32 level, u32 level_count) {
+    std::fill_n(layouts.begin() + level, level_count, new_layout);
+}
+
+void Texture::Upload(Rect2D rectangle, u32 stride, std::span<const u8> data, u32 level) {
+    const u64 byte_count = data.size();
+    vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
+
+    // If the adverised format supports blitting then use GPU accelerated
+    // format conversion.
+    if (internal_format != advertised_format &&
+        instance.IsFormatSupported(advertised_format,
+                                   vk::FormatFeatureFlagBits::eBlitSrc)) {
+        // Creating a new staging texture for each upload/download is expensive
+        // but this path is not common. TODO: Profile this
+        StagingTexture staging{instance, scheduler, info};
+
+        const std::array offsets = {
+            vk::Offset3D{rectangle.x, rectangle.y, 0},
+            vk::Offset3D{static_cast<s32>(rectangle.x + rectangle.width),
+                         static_cast<s32>(rectangle.y + rectangle.height), 0}
        };

-    cmdbuffer.pipelineBarrier(source.stage, dst.stage, vk::DependencyFlagBits::eByRegion, {}, {}, barrier);
-    layout = new_layout;
-}
-
-void Texture::OverrideImageLayout(vk::ImageLayout new_layout) {
-    layout = new_layout;
-}
-
-void Texture::Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> pixels) {
-    u32 request_size = is_rgb ? (pixels.size() / 3) * 4 :
-                       (is_d24s8 ? (pixels.size() / 4) * 5 : pixels.size());
-    auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size);
-    if (!buffer) {
-        LOG_ERROR(Render_Vulkan, "Cannot upload pixels without staging buffer!");
-    }
-
-    // Copy pixels to staging buffer
-    auto& state = VulkanState::Get();
-    state.EndRendering();
-
-    auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
-
-    // Automatically convert RGB to RGBA
-    if (is_rgb) {
-        auto data = RGBToRGBA(pixels);
-        std::memcpy(buffer, data.data(), data.size());
-    }
-    else if (is_d24s8) {
-        auto data = D24S8ToD32S8(pixels);
-        std::memcpy(buffer, data.data(), data.size() * sizeof(data[0]));
-    }
-    else {
-        std::memcpy(buffer, pixels.data(), pixels.size());
-    }
-
-    std::array<vk::BufferImageCopy, 2> copy_regions;
-    u32 region_count = 1;
-
-    copy_regions[0] = vk::BufferImageCopy{
-        offset, row_length, region.extent.height,
-        {aspect, level, layer, 1},
-        {region.offset.x, region.offset.y, 0},
-        {region.extent.width, region.extent.height, 1}
+        const vk::ImageBlit image_blit = {
+            .srcSubresource = {aspect, level, 0, 1},
+            .srcOffsets = offsets,
+            .dstSubresource = {aspect, level, 0, 1},
+            .dstOffsets = offsets
        };

-    if (aspect & vk::ImageAspectFlagBits::eDepth &&
-            aspect & vk::ImageAspectFlagBits::eStencil) {
-        // Copying both depth and stencil requires two seperate regions
-        copy_regions[1] = copy_regions[0];
-        copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
-        copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
+        // Copy data to staging texture
+        std::memcpy(staging.GetMappedPtr(), data.data(), byte_count);
+        staging.Commit(byte_count);

-        region_count++;
-    }
+        Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level);

-    // Transition image to transfer format
-    Transition(cmdbuffer, vk::ImageLayout::eTransferDstOptimal);
+        // Blit
+        command_buffer.blitImage(staging.GetHandle(), vk::ImageLayout::eGeneral,
+                                 image, vk::ImageLayout::eTransferDstOptimal,
+                                 image_blit, vk::Filter::eNearest);

-    cmdbuffer.copyBufferToImage(g_vk_task_scheduler->GetStaging().GetBuffer(),
-                                texture, vk::ImageLayout::eTransferDstOptimal, region_count,
-                                copy_regions.data());
+    // Otherwise use normal staging buffer path with possible CPU conversion
+    } else {
+        Buffer& staging = scheduler.GetCommandUploadBuffer();
+        const u64 staging_offset = staging.GetCurrentOffset();

-    // Prepare image for shader reads
-    Transition(cmdbuffer, vk::ImageLayout::eShaderReadOnlyOptimal);
-}
+        // Copy pixels to the staging buffer
+        auto slice = staging.Map(byte_count);
+        std::memcpy(slice.data(), data.data(), byte_count);
+        staging.Commit(byte_count);

-void Texture::Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> memory) {
-    u32 request_size = is_rgb ? (memory.size() / 3) * 4 :
-                       (is_d24s8 ? (memory.size() / 4) * 8 : memory.size());
-    auto [buffer, offset] = g_vk_task_scheduler->RequestStaging(request_size);
-    if (!buffer) {
-        LOG_ERROR(Render_Vulkan, "Cannot download texture without staging buffer!");
-    }
+        // TODO: Handle depth and stencil uploads
+        ASSERT(aspect == vk::ImageAspectFlagBits::eColor &&
+               advertised_format == internal_format);

-    auto& state = VulkanState::Get();
-    state.EndRendering();
-
-    auto cmdbuffer = g_vk_task_scheduler->GetRenderCommandBuffer();
-
-    std::array<vk::BufferImageCopy, 2> copy_regions;
-    u32 region_count = 1;
-
-    copy_regions[0] = vk::BufferImageCopy{
-        offset, row_length, region.extent.height,
-        {aspect, level, layer, 1},
-        {region.offset.x, region.offset.y, 0},
-        {region.extent.width, region.extent.height, 1}
+        const vk::BufferImageCopy copy_region = {
+            .bufferOffset = staging_offset,
+            .bufferRowLength = stride,
+            .bufferImageHeight = rectangle.height,
+            .imageSubresource = {
+                .aspectMask = aspect,
+                .mipLevel = level,
+                .baseArrayLayer = 0,
+                .layerCount = 1
+            },
+            .imageOffset = {rectangle.x, rectangle.y, 0},
+            .imageExtent = {rectangle.width, rectangle.height, 1}
        };

-    if (aspect & vk::ImageAspectFlagBits::eDepth &&
-            aspect & vk::ImageAspectFlagBits::eStencil) {
-        // Copying both depth and stencil requires two seperate regions
-        copy_regions[1] = copy_regions[0];
-        copy_regions[0].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
-        copy_regions[1].imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
+        vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
+        Transition(command_buffer, vk::ImageLayout::eTransferDstOptimal, level);

-        region_count++;
+        // Copy staging buffer to the texture
+        command_buffer.copyBufferToImage(staging.GetHandle(), image,
+                                         vk::ImageLayout::eTransferDstOptimal,
+                                         copy_region);
    }

-    // Transition image to transfer format
-    auto old_layout = GetLayout();
-    Transition(cmdbuffer, vk::ImageLayout::eTransferSrcOptimal);
+    Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal);
+}

-    cmdbuffer.copyImageToBuffer(texture, vk::ImageLayout::eTransferSrcOptimal,
-                                g_vk_task_scheduler->GetStaging().GetBuffer(),
-                                region_count, copy_regions.data());
+void Texture::Download(Rect2D rectangle, u32 stride, std::span<u8> data, u32 level) {
+    const u64 byte_count = data.size();
+    vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();

-    // Restore layout
-    Transition(cmdbuffer, old_layout);
+    // If the adverised format supports blitting then use GPU accelerated
+    // format conversion.
+    if (internal_format != advertised_format &&
+        instance.IsFormatSupported(advertised_format,
+                                   vk::FormatFeatureFlagBits::eBlitDst)) {
+        // Creating a new staging texture for each upload/download is expensive
+        // but this path is not common. TODO: Profile this
+        StagingTexture staging{instance, scheduler, info};

-    // Wait for the data to be available
-    // NOTE: This is really slow and should be reworked
-    g_vk_task_scheduler->Submit(true);
+        const std::array offsets = {
+            vk::Offset3D{rectangle.x, rectangle.y, 0},
+            vk::Offset3D{static_cast<s32>(rectangle.x + rectangle.width),
+                         static_cast<s32>(rectangle.y + rectangle.height), 0}
+        };

-    // Automatically convert RGB to RGBA
-    if (is_rgb) {
-        auto data = RGBAToRGB(std::span(buffer, request_size));
-        std::memcpy(memory.data(), data.data(), memory.size());
-    }
-    else if (is_d24s8) {
-        auto data = D32S8ToD24S8(std::span(buffer, request_size));
-        std::memcpy(memory.data(), data.data(), memory.size());
-    }
-    else {
-        std::memcpy(memory.data(), buffer, memory.size());
+        const vk::ImageBlit image_blit = {
+            .srcSubresource = {aspect, level, 0, 1},
+            .srcOffsets = offsets,
+            .dstSubresource = {aspect, level, 0, 1},
+            .dstOffsets = offsets
+        };
+
+        Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level);
+
+        // Blit
+        command_buffer.blitImage(image, vk::ImageLayout::eTransferSrcOptimal,
+                                 staging.GetHandle(), vk::ImageLayout::eGeneral,
+                                 image_blit, vk::Filter::eNearest);
+
+        // TODO: Async downloads
+        scheduler.Submit(true);
+
+        // Copy data to the destination
+        staging.Commit(byte_count);
+        std::memcpy(data.data(), staging.GetMappedPtr(), byte_count);
+
+    // Otherwise use normal staging buffer path with possible CPU conversion
+    } else {
+        Buffer& staging = scheduler.GetCommandUploadBuffer();
+        const u64 staging_offset = staging.GetCurrentOffset();
+
+        const vk::BufferImageCopy copy_region = {
+            .bufferOffset = staging_offset,
+            .bufferRowLength = stride,
+            .bufferImageHeight = rectangle.height,
+            .imageSubresource = {
+                .aspectMask = aspect,
+                .mipLevel = level,
+                .baseArrayLayer = 0,
+                .layerCount = 1
+            },
+            .imageOffset = {rectangle.x, rectangle.y, 0},
+            .imageExtent = {rectangle.width, rectangle.height, 1}
+        };
+
+        Transition(command_buffer, vk::ImageLayout::eTransferSrcOptimal, level);
+
+        // Copy pixel data to the staging buffer
+        command_buffer.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal,
+                                         staging.GetHandle(), copy_region);
+
+        Transition(command_buffer, vk::ImageLayout::eShaderReadOnlyOptimal);
+
+        // TODO: Async downloads
+        scheduler.Submit(true);
+
+        // Copy data to the destination
+        auto memory = staging.Map(byte_count);
+        std::memcpy(data.data(), memory.data(), byte_count);
    }
 }

-template <typename Out, typename In>
-std::span<Out> SpanCast(std::span<In> span) {
-    return std::span(reinterpret_cast<Out*>(span.data()), span.size_bytes() / sizeof(Out));
+StagingTexture::StagingTexture(Instance& instance, CommandScheduler& scheduler,
+                               const TextureInfo& info) :
+    TextureBase(info), instance(instance), scheduler(scheduler) {
+
+    format = ToVkFormat(info.format);
+    const vk::ImageCreateInfo image_info = {
+        .flags = info.view_type == TextureViewType::ViewCube ?
+                 vk::ImageCreateFlagBits::eCubeCompatible :
+                 vk::ImageCreateFlags{},
+        .imageType = ToVkImageType(info.type),
+        .format = format,
+        .extent = {info.width, info.height, 1},
+        .mipLevels = info.levels,
+        .arrayLayers = info.view_type == TextureViewType::ViewCube ? 6u : 1u,
+        .samples = vk::SampleCountFlagBits::e1,
+        .usage = vk::ImageUsageFlagBits::eTransferSrc |
+                 vk::ImageUsageFlagBits::eTransferDst,
+    };
+
+    const VmaAllocationCreateInfo alloc_create_info = {
+        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
+                 VMA_ALLOCATION_CREATE_MAPPED_BIT,
+        .usage = VMA_MEMORY_USAGE_AUTO
+    };
+
+    VkImage unsafe_image = VK_NULL_HANDLE;
+    VkImageCreateInfo unsafe_image_info = static_cast<VkImageCreateInfo>(image_info);
+    VmaAllocationInfo alloc_info;
+    VmaAllocator allocator = instance.GetAllocator();
+
+    // Allocate texture memory
+    vmaCreateImage(allocator, &unsafe_image_info, &alloc_create_info,
+                   &unsafe_image, &allocation, &alloc_info);
+    image = vk::Image{unsafe_image};
+
+    // Map memory
+    mapped_ptr = alloc_info.pMappedData;
+
+    // Transition image to VK_IMAGE_LAYOUT_GENERAL. This layout is convenient
+    // for staging textures since it allows for well defined host access and
+    // works with vkCmdBlitImage, thus eliminating the need for layout transitions
+    const vk::ImageMemoryBarrier barrier = {
+        .srcAccessMask = vk::AccessFlagBits::eNone,
+        .dstAccessMask = vk::AccessFlagBits::eNone,
+        .oldLayout = vk::ImageLayout::eUndefined,
+        .newLayout = vk::ImageLayout::eGeneral,
+        .image = image,
+        .subresourceRange = {vk::ImageAspectFlagBits::eColor, 0, info.levels, 0, 1}
+    };
+
+    vk::CommandBuffer command_buffer = scheduler.GetUploadCommandBuffer();
+    command_buffer.pipelineBarrier(vk::PipelineStageFlagBits::eBottomOfPipe,
+                                   vk::PipelineStageFlagBits::eTransfer,
+                                   vk::DependencyFlagBits::eByRegion,
+                                   {}, {}, barrier);
 }

-std::vector<u8> Texture::RGBToRGBA(std::span<u8> data) {
-    ASSERT(data.size() % 3 == 0);
+StagingTexture::~StagingTexture() {
+    if (image) {
+        auto deleter = [allocation = allocation,
+                        image = image](vk::Device device, VmaAllocator allocator) {
+            vmaDestroyImage(allocator, static_cast<VkImage>(image), allocation);
+        };

-    u32 new_size = (data.size() / 3) * 4;
-    std::vector<u8> rgba(new_size);
-
-    u32 dst_pos = 0;
-    for (u32 i = 0; i < data.size(); i += 3) {
-        std::memcpy(rgba.data() + dst_pos, data.data() + i, 3);
-        rgba[dst_pos + 3] = 255u;
-        dst_pos += 4;
+        // Schedule deletion of the texture after it's no longer used by the GPU
+        scheduler.Schedule(deleter);
    }
-
-    return rgba;
 }

-std::vector<u64> Texture::D24S8ToD32S8(std::span<u8> data) {
-    ASSERT(data.size() % 4 == 0);
-
-    std::vector<u64> d32s8;
-    std::span<u32> d24s8 = SpanCast<u32>(data);
-
-    d32s8.reserve(data.size() * 2);
-    std::ranges::transform(d24s8, std::back_inserter(d32s8), [](u32 comp) -> u64 {
-        // Convert normalized 24bit depth component to floating point
-        float fdepth = static_cast<float>(comp & 0xFFFFFF) / 0xFFFFFF;
-        u64 result = static_cast<u64>(comp) << 8;
-
-        // Use std::memcpy to avoid the unsafe casting required to preserve the floating
-        // point bits
-        std::memcpy(&result, &fdepth, 4);
-        return result;
-    });
-
-    return d32s8;
+void StagingTexture::Commit(u32 size) {
+    VmaAllocator allocator = instance.GetAllocator();
+    vmaFlushAllocation(allocator, allocation, 0, size);
 }

-std::vector<u8> Texture::RGBAToRGB(std::span<u8> data) {
-    ASSERT(data.size() % 4 == 0);
+Sampler::Sampler(Instance& instance, SamplerInfo info) :
+    SamplerBase(info), instance(instance) {

-    u32 new_size = (data.size() / 4) * 3;
-    std::vector<u8> rgb(new_size);
+    auto properties = instance.GetPhysicalDevice().getProperties();
+    const auto filtering = PicaToVK::TextureFilterMode(info.mag_filter,
+                                                       info.min_filter,
+                                                       info.mip_filter);
+    const vk::SamplerCreateInfo sampler_info = {
+        .magFilter = filtering.mag_filter,
+        .minFilter = filtering.min_filter,
+        .mipmapMode = filtering.mip_mode,
+        .addressModeU = PicaToVK::WrapMode(info.wrap_s),
+        .addressModeV = PicaToVK::WrapMode(info.wrap_t),
+        .anisotropyEnable = true,
+        .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
+        .compareEnable = false,
+        .compareOp = vk::CompareOp::eAlways,
+        .borderColor = vk::BorderColor::eIntOpaqueBlack,
+        .unnormalizedCoordinates = false
+    };

-    u32 dst_pos = 0;
-    for (u32 i = 0; i < data.size(); i += 4) {
-        std::memcpy(rgb.data() + dst_pos, data.data() + i, 3);
-        dst_pos += 3;
-    }
-
-    return rgb;
+    vk::Device device = instance.GetDevice();
+    sampler = device.createSampler(sampler_info);
 }

-std::vector<u32> Texture::D32S8ToD24S8(std::span<u8> data) {
-    ASSERT(data.size() % 8 == 0);
-
-    std::vector<u32> d24s8;
-    std::span<u64> d32s8 = SpanCast<u64>(data);
-
-    d24s8.reserve(data.size() / 2);
-    std::ranges::transform(d32s8, std::back_inserter(d24s8), [](u64 comp) -> u32 {
-        // Convert floating point to 24bit normalized depth
-        float fdepth = 0.f;
-        u32 depth = comp & 0xFFFFFFFF;
-        std::memcpy(&fdepth, &depth, 4);
-
-        u32 stencil = (comp >> 32) & 0xFF;
-        u64 result = static_cast<u32>(fdepth * 0xFFFFFF) | (stencil << 24);
-        return result;
-    });
-
-    return d24s8;
+Sampler::~Sampler() {
+    vk::Device device = instance.GetDevice();
+    device.destroySampler(sampler);
 }

 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_texture.h
+++ b/src/video_core/renderer_vulkan/vk_texture.h
@ -4,80 +4,147 @@

 #pragma once

-#include <memory>
-#include <span>
-#include <functional>
-#include <glm/glm.hpp>
-#include "common/math_util.h"
-#include "video_core/renderer_vulkan/vk_buffer.h"
-#include "video_core/renderer_vulkan/vk_surface_params.h"
+#include "video_core/common/texture.h"
+#include "video_core/renderer_vulkan/vk_common.h"

-namespace Vulkan {
+namespace VideoCore::Vulkan {

-/// Vulkan texture object
-class Texture final : public NonCopyable {
+// PICA texture have at most 8 mipmap levels
+constexpr u32 TEXTURE_MAX_LEVELS = 8;
+
+class Instance;
+class CommandScheduler;
+
+/**
+ * A texture located in GPU memory
+ */
+class Texture : public VideoCore::TextureBase {
 public:
-    /// Information for the creation of the target texture
-    struct Info {
-        u32 width, height;
-        vk::Format format;
-        vk::ImageType type;
-        vk::ImageViewType view_type;
-        vk::ImageUsageFlags usage;
-        u32 multisamples = 1;
-        u32 levels = 1, layers = 1;
-    };
+    // Default constructor
+    Texture(Instance& instance, CommandScheduler& scheduler);
+
+    // Constructor for texture creation
+    Texture(Instance& instance, CommandScheduler& scheduler,
+            const TextureInfo& info);
+
+    // Constructor for not owning textures (swapchain)
+    Texture(Instance& instance, CommandScheduler& scheduler,
+            vk::Image image, const TextureInfo& info);

-    Texture() = default;
    ~Texture();

-    /// Enable move operations
-    Texture(Texture&& other) noexcept;
-    Texture& operator=(Texture&& other) noexcept;
+    /// Uploads pixel data to the GPU memory
+    void Upload(Rect2D rectangle, u32 stride, std::span<const u8> data,
+                u32 level = 0) override;

-    /// Create a new Vulkan texture object
-    void Create(const Info& info);
-    void Create(Texture& texture);
-    void Adopt(const Info& info, vk::Image image);
-    void Destroy();
+    /// Downloads pixel data from GPU memory
+    void Download(Rect2D rectangle, u32 stride, std::span<u8> data,
+                  u32 level = 0) override;

-    /// Query objects
-    bool IsValid() const { return texture; }
-    vk::Image GetHandle() const { return texture; }
-    vk::ImageView GetView() const { return view; }
-    vk::Format GetFormat() const { return info.format; }
-    vk::ImageLayout GetLayout() const { return layout; }
-    u32 GetSamples() const { return info.multisamples; }
-    u32 GetSize() const { return image_size; }
-    vk::Rect2D GetArea() const { return {{0, 0},{info.width, info.height}}; }
+    /// Copies the rectangle area specified to the destionation texture
+    void BlitTo(TextureHandle dest, Rect2D src_rectangle, Rect2D dest_rect,
+                u32 src_level = 0, u32 dest_level = 0) override;

-    /// Copies CPU side pixel data to the GPU texture buffer
-    void Upload(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> pixels);
-    void Download(u32 level, u32 layer, u32 row_length, vk::Rect2D region, std::span<u8> dst);
+    /// Overrides the layout of provided image subresource
+    void SetLayout(vk::ImageLayout new_layout, u32 level = 0, u32 level_count = 1);

-    /// Used to transition the image to an optimal layout during transfers
-    void OverrideImageLayout(vk::ImageLayout new_layout);
-    void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout);
-    void Transition(vk::CommandBuffer cmdbuffer, vk::ImageLayout new_layout, u32 start_level, u32 level_count,
-                    u32 start_layer, u32 layer_count);
+    /// Transitions part of the image to the provided layout
+    void Transition(vk::CommandBuffer command_buffer, vk::ImageLayout new_layout,
+                    u32 level = 0, u32 level_count = 1);
+
+    /// Returns the underlying vulkan image handle
+    vk::Image GetHandle() const {
+        return image;
+    }
+
+    /// Returns the Vulka image view
+    vk::ImageView GetView() const {
+        return image_view;
+    }
+
+    /// Returns the internal format backing the texture.
+    /// It may not match the input pixel format.
+    vk::Format GetInternalFormat() const {
+        return internal_format;
+    }
+
+    /// Returns the current image layout
+    vk::ImageLayout GetLayout(u32 level = 0) const {
+        return layouts.at(level);
+    }
+
+    /// Returns a rectangle that represents the complete area of the texture
+    vk::Rect2D GetArea() const {
+        return {{0, 0},{info.width, info.height}};
+    }

 private:
-    std::vector<u8> RGBToRGBA(std::span<u8> data);
-    std::vector<u64> D24S8ToD32S8(std::span<u8> data);
+    Instance& instance;
+    CommandScheduler& scheduler;

-    std::vector<u8> RGBAToRGB(std::span<u8> data);
-    std::vector<u32> D32S8ToD24S8(std::span<u8> data);
+    // Vulkan texture handle
+    vk::Image image = VK_NULL_HANDLE;
+    vk::ImageView image_view = VK_NULL_HANDLE;
+    VmaAllocation allocation = nullptr;
+    bool is_texture_owned = true;
+
+    // Texture properties
+    vk::Format advertised_format = vk::Format::eUndefined;
+    vk::Format internal_format = vk::Format::eUndefined;
+    vk::ImageAspectFlags aspect = vk::ImageAspectFlagBits::eNone;
+    std::array<vk::ImageLayout, TEXTURE_MAX_LEVELS> layouts;
+};
+
+/**
+ * Staging texture located in CPU memory. Used for intermediate format
+ * conversions
+ */
+class StagingTexture : public VideoCore::TextureBase {
+public:
+    StagingTexture(Instance& instance, CommandScheduler& scheduler,
+                   const TextureInfo& info);
+    ~StagingTexture();
+
+    /// Flushes any writes made to texture memory
+    void Commit(u32 size);
+
+    /// Returns a span of the mapped texture memory
+    void* GetMappedPtr() {
+        return mapped_ptr;
+    }
+
+    /// Returns the staging image handle
+    vk::Image GetHandle() const {
+        return image;
+    }

 private:
-    Texture::Info info{};
-    vk::ImageLayout layout{};
-    vk::ImageAspectFlags aspect{};
-    vk::Image texture;
-    vk::ImageView view;
-    vk::DeviceMemory memory;
-    u32 image_size{};
-    bool adopted{false};
-    bool is_rgb{false}, is_d24s8{false};
+    Instance& instance;
+    CommandScheduler& scheduler;
+
+    vk::Image image = VK_NULL_HANDLE;
+    VmaAllocation allocation = VK_NULL_HANDLE;
+    vk::Format format = vk::Format::eUndefined;
+    u32 capacity = 0;
+    void* mapped_ptr = nullptr;
+};
+
+/**
+ * Vulkan sampler object
+ */
+class Sampler : public VideoCore::SamplerBase {
+public:
+    Sampler(Instance& instance, SamplerInfo info);
+    ~Sampler() override;
+
+    /// Returns the underlying vulkan sampler handle
+    vk::Sampler GetHandle() const {
+        return sampler;
+    }
+
+private:
+    Instance& instance;
+    vk::Sampler sampler;
 };

 } // namespace Vulkan
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@ -56,9 +56,9 @@ struct OutputVertex {
    Common::Vec2<float24> tc0;
    Common::Vec2<float24> tc1;
    float24 tc0_w;
-    INSERT_PADDING_WORDS(1);
+    INSERT_PADDING_WORDS_NOINIT(1);
    Common::Vec3<float24> view;
-    INSERT_PADDING_WORDS(1);
+    INSERT_PADDING_WORDS_NOINIT(1);
    Common::Vec2<float24> tc2;

    static void ValidateSemantics(const RasterizerRegs& regs);
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@ -164,8 +164,10 @@ static void LogCritical(const char* msg) {

 void JitShader::Compile_Assert(bool condition, const char* msg) {
    if (!condition) {
+        ABI_PushRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
        mov(ABI_PARAM1, reinterpret_cast<std::size_t>(msg));
        CallFarFunction(*this, LogCritical);
+        ABI_PopRegistersAndAdjustStack(*this, PersistentCallerSavedRegs(), 0);
    }
 }

@ -595,11 +597,11 @@ void JitShader::Compile_END(Instruction instr) {
 }

 void JitShader::Compile_BREAKC(Instruction instr) {
-    Compile_Assert(looping, "BREAKC must be inside a LOOP");
-    if (looping) {
+    Compile_Assert(loop_depth, "BREAKC must be inside a LOOP");
+    if (loop_depth) {
        Compile_EvaluateCondition(instr);
-        ASSERT(loop_break_label);
-        jnz(*loop_break_label);
+        ASSERT(!loop_break_labels.empty());
+        jnz(loop_break_labels.back(), T_NEAR);
    }
 }

@ -725,9 +727,11 @@ void JitShader::Compile_IF(Instruction instr) {
 void JitShader::Compile_LOOP(Instruction instr) {
    Compile_Assert(instr.flow_control.dest_offset >= program_counter,
                   "Backwards loops not supported");
-    Compile_Assert(!looping, "Nested loops not supported");
-
-    looping = true;
+    Compile_Assert(loop_depth < 1, "Nested loops may not be supported");
+    if (loop_depth++) {
+        const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
+        ABI_PushRegistersAndAdjustStack(*this, loop_save_regs, 0);
+    }

    // This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
    // The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
@ -746,16 +750,20 @@ void JitShader::Compile_LOOP(Instruction instr) {
    Label l_loop_start;
    L(l_loop_start);

-    loop_break_label = Xbyak::Label();
+    loop_break_labels.emplace_back(Xbyak::Label());
    Compile_Block(instr.flow_control.dest_offset + 1);

    add(LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
    sub(LOOPCOUNT, 1);           // Increment loop count by 1
    jnz(l_loop_start);           // Loop if not equal
-    L(*loop_break_label);
-    loop_break_label.reset();

-    looping = false;
+    L(loop_break_labels.back());
+    loop_break_labels.pop_back();
+
+    if (--loop_depth) {
+        const auto loop_save_regs = BuildRegSet({LOOPCOUNT_REG, LOOPINC, LOOPCOUNT});
+        ABI_PopRegistersAndAdjustStack(*this, loop_save_regs, 0);
+    }
 }

 void JitShader::Compile_JMP(Instruction instr) {
@ -892,7 +900,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
    // Reset flow control state
    program = (CompiledShader*)getCurr();
    program_counter = 0;
-    looping = false;
+    loop_depth = 0;
    instruction_labels.fill(Xbyak::Label());

    // Find all `CALL` instructions and identify return locations
--- a/src/video_core/shader/shader_jit_x64_compiler.h
+++ b/src/video_core/shader/shader_jit_x64_compiler.h
@ -120,15 +120,15 @@ private:
    /// Mapping of Pica VS instructions to pointers in the emitted code
    std::array<Xbyak::Label, MAX_PROGRAM_CODE_LENGTH> instruction_labels;

-    /// Label pointing to the end of the current LOOP block. Used by the BREAKC instruction to break
-    /// out of the loop.
-    std::optional<Xbyak::Label> loop_break_label;
+    /// Labels pointing to the end of each nested LOOP block. Used by the BREAKC instruction to
+    /// break out of a loop.
+    std::vector<Xbyak::Label> loop_break_labels;

    /// Offsets in code where a return needs to be inserted
    std::vector<unsigned> return_offsets;

    unsigned program_counter = 0; ///< Offset of the next instruction to decode
-    bool looping = false;         ///< True if compiling a loop, used to check for nested loops
+    u8 loop_depth = 0;            ///< Depth of the (nested) loops currently compiled

    using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
    CompiledShader* program = nullptr;
--- a/src/video_core/swrasterizer/clipper.h
+++ b/src/video_core/swrasterizer/clipper.h
@ -13,7 +13,7 @@ namespace Clipper {

 using Shader::OutputVertex;

-void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2);
+void ProcessTriangle(const & v0, const OutputVertex& v1, const OutputVertex& v2);

 } // namespace Clipper
 } // namespace Pica
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@ -46,7 +46,6 @@ ResultStatus Init(Frontend::EmuWindow& emu_window, Memory::MemorySystem& memory)

    OpenGL::GLES = Settings::values.use_gles;

-    //g_renderer = std::make_unique<OpenGL::RendererOpenGL>(emu_window);
    g_renderer = std::make_unique<Vulkan::RendererVulkan>(emu_window);
    ResultStatus result = g_renderer->Init();

--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@ -6,8 +6,8 @@

 #include <atomic>
 #include <iostream>
-#include <functional>
 #include <memory>
+#include <functional>
 #include "core/frontend/emu_window.h"

 namespace Frontend {