VideoCore: implement channels on gpu caches.

2021-11-05 15:52:31 +01:00
parent c77b8df12e
commit 139ea93512
50 changed files with 1469 additions and 817 deletions
--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp
@@ -10,13 +10,17 @@
 #include "core/hle/service/nvdrv/core/container.h"
 #include "core/hle/service/nvdrv/core/nvmap.h"
 #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
+#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
+#include "core/hle/service/nvdrv/nvdrv.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

 namespace Service::Nvidia::Devices {

-nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core)
-    : nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
+nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
+    : nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
+      gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
 nvhost_as_gpu::~nvhost_as_gpu() = default;

 NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<

    const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
    if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
-        params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
+        params.offset = *(gmmu->AllocateFixed(params.offset, size));
    } else {
-        params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
+        params.offset = gmmu->Allocate(size, params.align);
    }

    auto result = NvResult::Success;
@@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
    LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
              params.pages, params.page_size);

-    system.GPU().MemoryManager().Unmap(params.offset,
-                                       static_cast<std::size_t>(params.pages) * params.page_size);
+    gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);

    std::memcpy(output.data(), &params, output.size());
    return NvResult::Success;
@@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
            // If nvmap handle is null, we should unmap instead.
            const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
            const auto size{static_cast<u64>(entry.pages) << 0x10};
-            system.GPU().MemoryManager().Unmap(offset, size);
+            gmmu->Unmap(offset, size);
            continue;
        }

@@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
        const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
        const auto size{static_cast<u64>(entry.pages) << 0x10};
        const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
-        const auto addr{
-            system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)};
+        const auto addr{gmmu->Map(object->address + map_offset, offset, size)};

        if (!addr) {
            LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
@@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
              params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
              params.offset);

-    auto& gpu = system.GPU();
    if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
        if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
            const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
            const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};

-            if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
+            if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
                LOG_CRITICAL(Service_NVDRV,
                             "remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
                             "mapping_size = {}, offset={}",
@@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8

    const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
    if (is_alloc) {
-        params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
+        params.offset = gmmu->MapAllocate(physical_address, size, page_size);
    } else {
-        params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
+        params.offset = gmmu->Map(physical_address, params.offset, size);
    }

    auto result = NvResult::Success;
@@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
    LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);

    if (const auto size{RemoveBufferMap(params.offset)}; size) {
-        system.GPU().MemoryManager().Unmap(params.offset, *size);
+        gmmu->Unmap(params.offset, *size);
    } else {
        LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
    }
@@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
 NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
    IoctlBindChannel params{};
    std::memcpy(&params, input.data(), input.size());
-    LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
+    LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);

-    channel = params.fd;
+    auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
+    gpu_channel_device->channel_state->memory_manager = gmmu;
    return NvResult::Success;
 }

--- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h
@@ -13,6 +13,14 @@
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"

+namespace Tegra {
+class MemoryManager;
+} // namespace Tegra
+
+namespace Service::Nvidia {
+class Module;
+}
+
 namespace Service::Nvidia::NvCore {
 class Container;
 class NvMap;
@@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);

 class nvhost_as_gpu final : public nvdevice {
 public:
-    explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core);
+    explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
    ~nvhost_as_gpu() override;

    NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
@@ -187,9 +195,13 @@ private:
    void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
    std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);

+    Module& module;
+
    NvCore::Container& container;
    NvCore::NvMap& nvmap;

+    std::shared_ptr<Tegra::MemoryManager> gmmu;
+
    // This is expected to be ordered, therefore we must use a map, not unordered_map
    std::map<GPUVAddr, BufferMap> buffer_mappings;
 };
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -11,12 +11,14 @@
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/hle/service/nvdrv/nvdrv.h"
 #include "core/memory.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/engines/puller.h"
 #include "video_core/gpu.h"

 namespace Service::Nvidia::Devices {
 namespace {
-Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
-    Tegra::GPU::FenceAction result{};
+Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
+    Tegra::Engines::Puller::FenceAction result{};
    result.op.Assign(op);
    result.syncpoint_id.Assign(syncpoint_id);
    return {result.raw};
@@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi
 nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
                       NvCore::Container& core_)
    : nvdevice{system_}, events_interface{events_interface_}, core{core_},
-      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {
+      syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
+      channel_state{system.GPU().AllocateChannel()} {
    channel_fence.id = syncpoint_manager.AllocateSyncpoint();
    channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
    sm_exception_breakpoint_int_report_event =
@@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
                params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
                params.unk3);

+    if (channel_state->initiated) {
+        LOG_CRITICAL(Service_NVDRV, "Already allocated!");
+        return NvResult::AlreadyAllocated;
+    }
+
+    system.GPU().InitChannel(*channel_state);
    channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);

    params.fence_out = channel_fence;
@@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
        {fence.value},
        Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
                                  Tegra::SubmissionMode::Increasing),
-        BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
+        BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
    };
 }

@@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
    for (u32 count = 0; count < add_increment; ++count) {
        result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
                                                      Tegra::SubmissionMode::Increasing));
-        result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
+        result.emplace_back(
+            BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
    }

    return result;
@@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>

    auto& gpu = system.GPU();

+    const auto bind_id = channel_state->bind_id;
+
    params.fence_out.id = channel_fence.id;

    if (params.flags.add_wait.Value() &&
        !syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
-        gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
+        gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
    }

    if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
@@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
        params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
    }

-    gpu.PushGPUEntries(std::move(entries));
+    gpu.PushGPUEntries(bind_id, std::move(entries));

    if (params.flags.add_increment.Value()) {
        if (params.flags.suppress_wfi) {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList(
+                                            params.fence_out, params.AddIncrementValue())});
        } else {
-            gpu.PushGPUEntries(Tegra::CommandList{
-                BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
+            gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList(
+                                            params.fence_out, params.AddIncrementValue())});
        }
    }

--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -13,6 +13,12 @@
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "video_core/dma_pusher.h"

+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
 namespace Service::Nvidia {

 namespace NvCore {
@@ -26,6 +32,7 @@ class EventInterface;

 namespace Service::Nvidia::Devices {

+class nvhost_as_gpu;
 class nvmap;
 class nvhost_gpu final : public nvdevice {
 public:
@@ -46,6 +53,7 @@ public:
    Kernel::KEvent* QueryEvent(u32 event_id) override;

 private:
+    friend class nvhost_as_gpu;
    enum class CtxObjects : u32_le {
        Ctx2D = 0x902D,
        Ctx3D = 0xB197,
@@ -204,6 +212,7 @@ private:
    NvCore::Container& core;
    NvCore::SyncpointManager& syncpoint_manager;
    NvCore::NvMap& nvmap;
+    std::shared_ptr<Tegra::Control::ChannelState> channel_state;
    NvFence channel_fence;

    // Events
--- a/src/core/hle/service/nvdrv/devices/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp
@@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
    IocFromIdParams params;
    std::memcpy(&params, input.data(), sizeof(params));

-    LOG_DEBUG(Service_NVDRV, "called, id:{}");
+    LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);

    // Handles and IDs are always the same value in nvmap however IDs can be used globally given the
    // right permissions.
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -74,7 +74,7 @@ Module::Module(Core::System& system)
    : service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} {
    builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
        std::shared_ptr<Devices::nvdevice> device =
-            std::make_shared<Devices::nvhost_as_gpu>(system, container);
+            std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
        return open_files.emplace(fd, device).first;
    };
    builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -35,6 +35,12 @@ add_library(video_core STATIC
    command_classes/vic.h
    compatible_formats.cpp
    compatible_formats.h
+    control/channel_state.cpp
+    control/channel_state.h
+    control/channel_state_cache.cpp
+    control/channel_state_cache.h
+    control/scheduler.cpp
+    control/scheduler.h
    delayed_destruction_ring.h
    dirty_flags.cpp
    dirty_flags.h
@@ -54,6 +60,8 @@ add_library(video_core STATIC
    engines/maxwell_3d.h
    engines/maxwell_dma.cpp
    engines/maxwell_dma.h
+    engines/puller.cpp
+    engines/puller.h
    framebuffer_config.h
    macro/macro.cpp
    macro/macro.h
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,7 +5,6 @@

 #include <algorithm>
 #include <array>
-#include <deque>
 #include <memory>
 #include <mutex>
 #include <numeric>
@@ -23,6 +22,7 @@
 #include "common/settings.h"
 #include "core/memory.h"
 #include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
 using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;

 template <typename P>
-class BufferCache {
+class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {

    // Page size for caching purposes.
    // This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
    static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);

    explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                         Runtime& runtime_);
+                         Core::Memory::Memory& cpu_memory_, Runtime& runtime_);

    void TickFrame();

@@ -367,9 +364,6 @@ private:
    void ClearDownload(IntervalType subtract_interval);

    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
    Core::Memory::Memory& cpu_memory;

    SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:

 template <class P>
 BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
-                            Tegra::Engines::Maxwell3D& maxwell3d_,
-                            Tegra::Engines::KeplerCompute& kepler_compute_,
-                            Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                            Runtime& runtime_)
-    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
+                            Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
+    : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
    // Ensure the first slot is used for the null buffer
    void(slot_buffers.insert(runtime, NullBufferParams{}));
    common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {

 template <class P>
 bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
-    const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
-    const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+    const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
+    const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
    if (!cpu_src_address || !cpu_dest_address) {
        return false;
    }
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am

 template <class P>
 bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
-    const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+    const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
    if (!cpu_dst_address) {
        return false;
    }
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
 template <class P>
 void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
                                               u32 size) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    const Binding binding{
        .cpu_addr = *cpu_addr,
        .size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
    if (is_indexed) {
        BindHostIndexBuffer();
    } else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
-        const auto& regs = maxwell3d.regs;
+        const auto& regs = maxwell3d->regs;
        if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
            runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
        }
@@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
    enabled_storage_buffers[stage] |= 1U << ssbo_index;
    written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;

-    const auto& cbufs = maxwell3d.state.shader_stages[stage];
+    const auto& cbufs = maxwell3d->state.shader_stages[stage];
    const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
    storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
 }
@@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
    enabled_compute_storage_buffers |= 1U << ssbo_index;
    written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;

-    const auto& launch_desc = kepler_compute.launch_description;
+    const auto& launch_desc = kepler_compute->launch_description;
    ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);

    const auto& cbufs = launch_desc.const_buffer_config;
@@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
    const u32 size = index_buffer.size;
    SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
    if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
-        const u32 new_offset = offset + maxwell3d.regs.index_array.first *
-                                            maxwell3d.regs.index_array.FormatSizeInBytes();
+        const u32 new_offset = offset + maxwell3d->regs.index_array.first *
+                                            maxwell3d->regs.index_array.FormatSizeInBytes();
        runtime.BindIndexBuffer(buffer, new_offset, size);
    } else {
-        runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
-                                maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
-                                buffer, offset, size);
+        runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
+                                maxwell3d->regs.index_array.first,
+                                maxwell3d->regs.index_array.count, buffer, offset, size);
    }
 }

 template <class P>
 void BufferCache<P>::BindHostVertexBuffers() {
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
        const Binding& binding = vertex_buffers[index];
        Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
        }
        flags[Dirty::VertexBuffer0 + index] = false;

-        const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+        const u32 stride = maxwell3d->regs.vertex_array[index].stride;
        const u32 offset = buffer.Offset(binding.cpu_addr);
        runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
    }
@@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {

 template <class P>
 void BufferCache<P>::BindHostTransformFeedbackBuffers() {
-    if (maxwell3d.regs.tfb_enabled == 0) {
+    if (maxwell3d->regs.tfb_enabled == 0) {
        return;
    }
    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1262,8 +1252,8 @@ template <class P>
 void BufferCache<P>::UpdateIndexBuffer() {
    // We have to check for the dirty flags and index count
    // The index count is currently changed without updating the dirty flags
-    const auto& index_array = maxwell3d.regs.index_array;
-    auto& flags = maxwell3d.dirty.flags;
+    const auto& index_array = maxwell3d->regs.index_array;
+    auto& flags = maxwell3d->dirty.flags;
    if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
        return;
    }
@@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() {

    const GPUVAddr gpu_addr_begin = index_array.StartAddress();
    const GPUVAddr gpu_addr_end = index_array.EndAddress();
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
    const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
    const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() {

 template <class P>
 void BufferCache<P>::UpdateVertexBuffers() {
-    auto& flags = maxwell3d.dirty.flags;
-    if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+    auto& flags = maxwell3d->dirty.flags;
+    if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
        return;
    }
    flags[Dirty::VertexBuffers] = false;
@@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() {

 template <class P>
 void BufferCache<P>::UpdateVertexBuffer(u32 index) {
-    if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+    if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
        return;
    }
-    const auto& array = maxwell3d.regs.vertex_array[index];
-    const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+    const auto& array = maxwell3d->regs.vertex_array[index];
+    const auto& limit = maxwell3d->regs.vertex_array_limit[index];
    const GPUVAddr gpu_addr_begin = array.StartAddress();
    const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
-    u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
-    if (address_size >= 64_MiB) {
-        // Reported vertex buffer size is very large, cap to mapped buffer size
-        GPUVAddr submapped_addr_end = gpu_addr_begin;
-
-        const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
-        if (ranges.size() > 0) {
-            const auto& [addr, size] = *ranges.begin();
-            submapped_addr_end = addr + size;
-        }
-
-        address_size =
-            std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
-    }
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
+    const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
    const u32 size = address_size; // TODO: Analyze stride and number of vertices
    if (array.enable == 0 || size == 0 || !cpu_addr) {
        vertex_buffers[index] = NULL_BINDING;
@@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {

 template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffers() {
-    if (maxwell3d.regs.tfb_enabled == 0) {
+    if (maxwell3d->regs.tfb_enabled == 0) {
        return;
    }
    for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {

 template <class P>
 void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
-    const auto& binding = maxwell3d.regs.tfb_bindings[index];
+    const auto& binding = maxwell3d->regs.tfb_bindings[index];
    const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
    const u32 size = binding.buffer_size;
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
        transform_feedback_buffers[index] = NULL_BINDING;
        return;
@@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
    ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
        Binding& binding = compute_uniform_buffers[index];
        binding = NULL_BINDING;
-        const auto& launch_desc = kepler_compute.launch_description;
+        const auto& launch_desc = kepler_compute->launch_description;
        if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
            const auto& cbuf = launch_desc.const_buffer_config[index];
-            const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+            const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
            if (cpu_addr) {
                binding.cpu_addr = *cpu_addr;
                binding.size = cbuf.size;
@@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
        dirty_uniform_buffers.fill(~u32{0});
        uniform_buffer_binding_sizes.fill({});
    }
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
    flags[Dirty::IndexBuffer] = true;
    flags[Dirty::VertexBuffers] = true;
    for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() {

 template <class P>
 typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
-    const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
-    const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
+    const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    if (!cpu_addr || size == 0) {
        return NULL_BINDING;
    }
@@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
 template <class P>
 typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
    GPUVAddr gpu_addr, u32 size, PixelFormat format) {
-    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    TextureBufferBinding binding;
    if (!cpu_addr || size == 0) {
        binding.cpu_addr = 0;
--- a/src/video_core/control/channel_state.cpp
+++ b/src/video_core/control/channel_state.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Control {
+
+ChannelState::ChannelState(s32 bind_id_) {
+    bind_id = bind_id_;
+    initiated = false;
+}
+
+void ChannelState::Init(Core::System& system, GPU& gpu) {
+    ASSERT(memory_manager);
+    dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
+    maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
+    fermi_2d = std::make_unique<Engines::Fermi2D>();
+    kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
+    maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
+    kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
+    initiated = true;
+}
+
+void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+    dma_pusher->BindRasterizer(rasterizer);
+    memory_manager->BindRasterizer(rasterizer);
+    maxwell_3d->BindRasterizer(rasterizer);
+    fermi_2d->BindRasterizer(rasterizer);
+    kepler_memory->BindRasterizer(rasterizer);
+    kepler_compute->BindRasterizer(rasterizer);
+    maxwell_dma->BindRasterizer(rasterizer);
+}
+
+} // namespace Tegra::Control
--- a/src/video_core/control/channel_state.h
+++ b/src/video_core/control/channel_state.h
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra {
+
+class GPU;
+
+namespace Engines {
+class Puller;
+class Fermi2D;
+class Maxwell3D;
+class MaxwellDMA;
+class KeplerCompute;
+class KeplerMemory;
+} // namespace Engines
+
+class MemoryManager;
+class DmaPusher;
+
+namespace Control {
+
+struct ChannelState {
+    ChannelState(s32 bind_id);
+    ChannelState(const ChannelState& state) = delete;
+    ChannelState& operator=(const ChannelState&) = delete;
+    ChannelState(ChannelState&& other) noexcept = default;
+    ChannelState& operator=(ChannelState&& other) noexcept = default;
+
+    void Init(Core::System& system, GPU& gpu);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    s32 bind_id = -1;
+    /// 3D engine
+    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+    /// 2D engine
+    std::unique_ptr<Engines::Fermi2D> fermi_2d;
+    /// Compute engine
+    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
+    /// DMA engine
+    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+    /// Inline memory engine
+    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+
+    std::shared_ptr<MemoryManager> memory_manager;
+
+    std::unique_ptr<DmaPusher> dma_pusher;
+
+    bool initiated{};
+};
+
+} // namespace Control
+
+} // namespace Tegra
--- a/src/video_core/control/channel_state_cache.cpp
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -0,0 +1,5 @@
+#include "video_core/control/channel_state_cache.inc"
+
+namespace VideoCommon {
+template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
+}
--- a/src/video_core/control/channel_state_cache.h
+++ b/src/video_core/control/channel_state_cache.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <deque>
+#include <limits>
+#include <unordered_map>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+class KeplerCompute;
+} // namespace Engines
+
+class MemoryManager;
+
+namespace Control {
+struct ChannelState;
+}
+
+} // namespace Tegra
+
+namespace VideoCommon {
+
+class ChannelInfo {
+public:
+    ChannelInfo() = delete;
+    ChannelInfo(Tegra::Control::ChannelState& state);
+    ChannelInfo(const ChannelInfo& state) = delete;
+    ChannelInfo& operator=(const ChannelInfo&) = delete;
+    ChannelInfo(ChannelInfo&& other) = default;
+    ChannelInfo& operator=(ChannelInfo&& other) = default;
+
+    Tegra::Engines::Maxwell3D& maxwell3d;
+    Tegra::Engines::KeplerCompute& kepler_compute;
+    Tegra::MemoryManager& gpu_memory;
+};
+
+template <class P>
+class ChannelSetupCaches {
+public:
+    /// Operations for seting the channel of execution.
+
+    /// Create channel state.
+    void CreateChannel(Tegra::Control::ChannelState& channel);
+
+    /// Bind a channel for execution.
+    void BindToChannel(s32 id);
+
+    /// Erase channel's state.
+    void EraseChannel(s32 id);
+
+protected:
+    static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
+    std::deque<P> channel_storage;
+    std::deque<size_t> free_channel_ids;
+    std::unordered_map<s32, size_t> channel_map;
+
+    P* channel_state;
+    size_t current_channel_id{UNSET_CHANNEL};
+    Tegra::Engines::Maxwell3D* maxwell3d;
+    Tegra::Engines::KeplerCompute* kepler_compute;
+    Tegra::MemoryManager* gpu_memory;
+};
+
+} // namespace VideoCommon
--- a/src/video_core/control/channel_state_cache.inc
+++ b/src/video_core/control/channel_state_cache.inc
@@ -0,0 +1,64 @@
+#include "video_core/control/channel_state.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+
+namespace VideoCommon {
+
+ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
+    : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
+      gpu_memory{*channel_state.memory_manager} {}
+
+template <class P>
+void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+    ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+    auto new_id = [this, &channel]() {
+        if (!free_channel_ids.empty()) {
+            auto id = free_channel_ids.front();
+            free_channel_ids.pop_front();
+            new (&channel_storage[id]) ChannelInfo(channel);
+            return id;
+        }
+        channel_storage.emplace_back(channel);
+        return channel_storage.size() - 1;
+    }();
+    channel_map.emplace(channel.bind_id, new_id);
+    if (current_channel_id != UNSET_CHANNEL) {
+        channel_state = &channel_storage[current_channel_id];
+    }
+}
+
+/// Bind a channel for execution.
+template <class P>
+void ChannelSetupCaches<P>::BindToChannel(s32 id) {
+    auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    current_channel_id = it->second;
+    channel_state = &channel_storage[current_channel_id];
+    maxwell3d = &channel_state->maxwell3d;
+    kepler_compute = &channel_state->kepler_compute;
+    gpu_memory = &channel_state->gpu_memory;
+}
+
+/// Erase channel's channel_state.
+template <class P>
+void ChannelSetupCaches<P>::EraseChannel(s32 id) {
+    const auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    const auto this_id = it->second;
+    free_channel_ids.push_back(this_id);
+    channel_map.erase(it);
+    if (this_id == current_channel_id) {
+        current_channel_id = UNSET_CHANNEL;
+        channel_state = nullptr;
+        maxwell3d = nullptr;
+        kepler_compute = nullptr;
+        gpu_memory = nullptr;
+    } else if (current_channel_id != UNSET_CHANNEL) {
+        channel_state = &channel_storage[current_channel_id];
+    }
+}
+
+
+} // namespace VideoCommon
--- a/src/video_core/control/scheduler.cpp
+++ b/src/video_core/control/scheduler.cpp
@@ -0,0 +1,31 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
+#include "video_core/gpu.h"
+
+namespace Tegra::Control {
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
+
+Scheduler::~Scheduler() = default;
+
+void Scheduler::Push(s32 channel, CommandList&& entries) {
+    std::unique_lock<std::mutex> lk(scheduling_guard);
+    auto it = channels.find(channel);
+    auto channel_state = it->second;
+    gpu.BindChannel(channel_state->bind_id);
+    channel_state->dma_pusher->Push(std::move(entries));
+    channel_state->dma_pusher->DispatchCalls();
+}
+
+void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
+    s32 channel = new_channel->bind_id;
+    std::unique_lock<std::mutex> lk(scheduling_guard);
+    channels.emplace(channel, new_channel);
+}
+
+} // namespace Tegra::Control
--- a/src/video_core/control/scheduler.h
+++ b/src/video_core/control/scheduler.h
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+
+class GPU;
+
+namespace Control {
+
+struct ChannelState;
+
+class Scheduler {
+public:
+    Scheduler(GPU& gpu_);
+    ~Scheduler();
+
+    void Push(s32 channel, CommandList&& entries);
+
+    void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
+
+private:
+    std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
+    std::mutex scheduling_guard;
+    GPU& gpu;
+};
+
+} // namespace Control
+
+} // namespace Tegra
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,7 +12,10 @@

 namespace Tegra {

-DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
+DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+                     Control::ChannelState& channel_state_)
+    : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
+                                                                          *this, channel_state_} {}

 DmaPusher::~DmaPusher() = default;

@@ -76,11 +79,11 @@ bool DmaPusher::Step() {
        // Push buffer non-empty, read a word
        command_headers.resize(command_list_header.size);
        if (Settings::IsGPULevelHigh()) {
-            gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
-                                          command_list_header.size * sizeof(u32));
+            memory_manager.ReadBlock(dma_get, command_headers.data(),
+                                     command_list_header.size * sizeof(u32));
        } else {
-            gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
-                                                command_list_header.size * sizeof(u32));
+            memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
+                                           command_list_header.size * sizeof(u32));
        }
    }
    for (std::size_t index = 0; index < command_headers.size();) {
@@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {

 void DmaPusher::CallMethod(u32 argument) const {
    if (dma_state.method < non_puller_methods) {
-        gpu.CallMethod(GPU::MethodCall{
+        puller.CallPullerMethod(Engines::Puller::MethodCall{
            dma_state.method,
            argument,
            dma_state.subchannel,
@@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const {

 void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
    if (dma_state.method < non_puller_methods) {
-        gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
-                            dma_state.method_count);
+        puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+                               dma_state.method_count);
    } else {
        subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
                                                           num_methods, dma_state.method_count);
    }
 }

+void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+    puller.BindRasterizer(rasterizer);
+}
+
 } // namespace Tegra
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "video_core/engines/engine_interface.h"
+#include "video_core/engines/puller.h"

 namespace Core {
 class System;
@@ -17,7 +18,12 @@ class System;

 namespace Tegra {

+namespace Control {
+struct ChannelState;
+}
+
 class GPU;
+class MemoryManager;

 enum class SubmissionMode : u32 {
    IncreasingOld = 0,
@@ -102,7 +108,8 @@ struct CommandList final {
 */
 class DmaPusher final {
 public:
-    explicit DmaPusher(Core::System& system_, GPU& gpu_);
+    explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+                       Control::ChannelState& channel_state_);
    ~DmaPusher();

    void Push(CommandList&& entries) {
@@ -115,6 +122,8 @@ public:
        subchannels[subchannel_id] = engine;
    }

+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
 private:
    static constexpr u32 non_puller_methods = 0x40;
    static constexpr u32 max_subchannels = 8;
@@ -148,6 +157,8 @@ private:

    GPU& gpu;
    Core::System& system;
+    MemoryManager& memory_manager;
+    mutable Engines::Puller puller;
 };

 } // namespace Tegra
--- a/src/video_core/engines/puller.cpp
+++ b/src/video_core/engines/puller.cpp
@@ -0,0 +1,297 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/core.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra::Engines {
+
+Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
+               Control::ChannelState& channel_state_)
+    : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
+                                                                               channel_state_} {}
+
+Puller::~Puller() = default;
+
+void Puller::ProcessBindMethod(const MethodCall& method_call) {
+    // Bind the current subchannel to the desired engine id.
+    LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+              method_call.argument);
+    const auto engine_id = static_cast<EngineID>(method_call.argument);
+    bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+    switch (engine_id) {
+    case EngineID::FERMI_TWOD_A:
+        dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_B:
+        dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+    }
+}
+
+void Puller::ProcessFenceActionMethod() {
+    switch (regs.fence_action.op) {
+    case Puller::FenceOperation::Acquire:
+        // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+        // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+        break;
+    case Puller::FenceOperation::Increment:
+        rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
+    }
+}
+
+void Puller::ProcessWaitForInterruptMethod() {
+    // TODO(bunnei) ImplementMe
+    LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
+void Puller::ProcessSemaphoreTriggerMethod() {
+    const auto semaphoreOperationMask = 0xF;
+    const auto op =
+        static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+    if (op == GpuSemaphoreOperation::WriteLong) {
+        struct Block {
+            u32 sequence;
+            u32 zeros = 0;
+            u64 timestamp;
+        };
+
+        Block block{};
+        block.sequence = regs.semaphore_sequence;
+        // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
+        // CoreTiming
+        block.timestamp = gpu.GetTicks();
+        memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
+    } else {
+        const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+        if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
+            (op == GpuSemaphoreOperation::AcquireGequal &&
+             static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
+            (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
+            // Nothing to do in this case
+        } else {
+            regs.acquire_source = true;
+            regs.acquire_value = regs.semaphore_sequence;
+            if (op == GpuSemaphoreOperation::AcquireEqual) {
+                regs.acquire_active = true;
+                regs.acquire_mode = false;
+            } else if (op == GpuSemaphoreOperation::AcquireGequal) {
+                regs.acquire_active = true;
+                regs.acquire_mode = true;
+            } else if (op == GpuSemaphoreOperation::AcquireMask) {
+                // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
+                // semaphore_sequence, gives a non-0 result
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
+            } else {
+                LOG_ERROR(HW_GPU, "Invalid semaphore operation");
+            }
+        }
+    }
+}
+
+void Puller::ProcessSemaphoreRelease() {
+    memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
+}
+
+void Puller::ProcessSemaphoreAcquire() {
+    const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+    const auto value = regs.semaphore_acquire;
+    if (word != value) {
+        regs.acquire_active = true;
+        regs.acquire_value = value;
+        // TODO(kemathe73) figure out how to do the acquire_timeout
+        regs.acquire_mode = false;
+        regs.acquire_source = false;
+    }
+}
+
+/// Calls a GPU puller method.
+void Puller::CallPullerMethod(const MethodCall& method_call) {
+    regs.reg_array[method_call.method] = method_call.argument;
+    const auto method = static_cast<BufferMethods>(method_call.method);
+
+    switch (method) {
+    case BufferMethods::BindObject: {
+        ProcessBindMethod(method_call);
+        break;
+    }
+    case BufferMethods::Nop:
+    case BufferMethods::SemaphoreAddressHigh:
+    case BufferMethods::SemaphoreAddressLow:
+    case BufferMethods::SemaphoreSequence:
+    case BufferMethods::UnkCacheFlush:
+    case BufferMethods::WrcacheFlush:
+    case BufferMethods::FenceValue:
+        break;
+    case BufferMethods::RefCnt:
+        rasterizer->SignalReference();
+        break;
+    case BufferMethods::FenceAction:
+        ProcessFenceActionMethod();
+        break;
+    case BufferMethods::WaitForInterrupt:
+        ProcessWaitForInterruptMethod();
+        break;
+    case BufferMethods::SemaphoreTrigger: {
+        ProcessSemaphoreTriggerMethod();
+        break;
+    }
+    case BufferMethods::NotifyIntr: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
+        break;
+    }
+    case BufferMethods::Unk28: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
+        break;
+    }
+    case BufferMethods::SemaphoreAcquire: {
+        ProcessSemaphoreAcquire();
+        break;
+    }
+    case BufferMethods::SemaphoreRelease: {
+        ProcessSemaphoreRelease();
+        break;
+    }
+    case BufferMethods::Yield: {
+        // TODO(Kmather73): Research and implement this method.
+        LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
+        break;
+    }
+    default:
+        LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
+        break;
+    }
+}
+
+/// Calls a GPU engine method.
+void Puller::CallEngineMethod(const MethodCall& method_call) {
+    const EngineID engine = bound_engines[method_call.subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
+                                           method_call.IsLastCall());
+        break;
+    case EngineID::MAXWELL_B:
+        channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
+                                             method_call.IsLastCall());
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
+                                                 method_call.IsLastCall());
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
+                                              method_call.IsLastCall());
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
+                                                method_call.IsLastCall());
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
+/// Calls a GPU engine multivalue method.
+void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                                   u32 methods_pending) {
+    const EngineID engine = bound_engines[subchannel];
+
+    switch (engine) {
+    case EngineID::FERMI_TWOD_A:
+        channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_B:
+        channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_COMPUTE_B:
+        channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::MAXWELL_DMA_COPY_A:
+        channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+        channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unimplemented engine");
+    }
+}
+
+/// Calls a GPU method.
+void Puller::CallMethod(const MethodCall& method_call) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
+              method_call.subchannel);
+
+    ASSERT(method_call.subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method_call.method)) {
+        CallEngineMethod(method_call);
+    } else {
+        CallPullerMethod(method_call);
+    }
+}
+
+/// Calls a GPU multivalue method.
+void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                             u32 methods_pending) {
+    LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+    ASSERT(subchannel < bound_engines.size());
+
+    if (ExecuteMethodOnEngine(method)) {
+        CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+    } else {
+        for (std::size_t i = 0; i < amount; i++) {
+            CallPullerMethod(MethodCall{
+                method,
+                base_start[i],
+                subchannel,
+                methods_pending - static_cast<u32>(i),
+            });
+        }
+    }
+}
+
+void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+    rasterizer = rasterizer_;
+}
+
+/// Determines where the method should be executed.
+[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
+    const auto buffer_method = static_cast<BufferMethods>(method);
+    return buffer_method >= BufferMethods::NonPullerMethods;
+}
+
+} // namespace Tegra::Engines
--- a/src/video_core/engines/puller.h
+++ b/src/video_core/engines/puller.h
@@ -0,0 +1,179 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+class DmaPusher;
+
+enum class EngineID {
+    FERMI_TWOD_A = 0x902D, // 2D Engine
+    MAXWELL_B = 0xB197,    // 3D Engine
+    KEPLER_COMPUTE_B = 0xB1C0,
+    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+    MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+class Puller final {
+public:
+    struct MethodCall {
+        u32 method{};
+        u32 argument{};
+        u32 subchannel{};
+        u32 method_count{};
+
+        explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
+            : method(method_), argument(argument_), subchannel(subchannel_),
+              method_count(method_count_) {}
+
+        [[nodiscard]] bool IsLastCall() const {
+            return method_count <= 1;
+        }
+    };
+
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+    };
+
+    explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
+                    Control::ChannelState& channel_state);
+    ~Puller();
+
+    void CallMethod(const MethodCall& method_call);
+
+    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                         u32 methods_pending);
+
+    void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+    void CallPullerMethod(const MethodCall& method_call);
+
+    void CallEngineMethod(const MethodCall& method_call);
+
+    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                               u32 methods_pending);
+
+private:
+    Tegra::GPU& gpu;
+
+    MemoryManager& memory_manager;
+    DmaPusher& dma_pusher;
+    Control::ChannelState& channel_state;
+    VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+    static constexpr std::size_t NUM_REGS = 0x800;
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x40;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS_NOINIT(0x4);
+                struct {
+                    u32 address_high;
+                    u32 address_low;
+
+                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                                     address_low);
+                    }
+                } semaphore_address;
+
+                u32 semaphore_sequence;
+                u32 semaphore_trigger;
+                INSERT_PADDING_WORDS_NOINIT(0xC);
+
+                // The pusher and the puller share the reference counter, the pusher only has read
+                // access
+                u32 reference_count;
+                INSERT_PADDING_WORDS_NOINIT(0x5);
+
+                u32 semaphore_acquire;
+                u32 semaphore_release;
+                u32 fence_value;
+                FenceAction fence_action;
+                INSERT_PADDING_WORDS_NOINIT(0xE2);
+
+                // Puller state
+                u32 acquire_mode;
+                u32 acquire_source;
+                u32 acquire_active;
+                u32 acquire_timeout;
+                u32 acquire_value;
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessSemaphoreAcquire();
+    void ProcessSemaphoreRelease();
+    void ProcessSemaphoreTriggerMethod();
+    void ProcessWaitForInterruptMethod();
+    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
+
+    /// Mapping of command subchannels to their bound engine ids
+    std::array<EngineID, 8> bound_engines{};
+
+    enum class GpuSemaphoreOperation {
+        AcquireEqual = 0x1,
+        WriteLong = 0x2,
+        AcquireGequal = 0x4,
+        AcquireMask = 0x8,
+    };
+
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
+                  "Field " #field_name " has invalid position")
+
+    ASSERT_REG_POSITION(semaphore_address, 0x4);
+    ASSERT_REG_POSITION(semaphore_sequence, 0x6);
+    ASSERT_REG_POSITION(semaphore_trigger, 0x7);
+    ASSERT_REG_POSITION(reference_count, 0x14);
+    ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
+    ASSERT_REG_POSITION(semaphore_release, 0x1B);
+    ASSERT_REG_POSITION(fence_value, 0x1C);
+    ASSERT_REG_POSITION(fence_action, 0x1D);
+
+    ASSERT_REG_POSITION(acquire_mode, 0x100);
+    ASSERT_REG_POSITION(acquire_source, 0x101);
+    ASSERT_REG_POSITION(acquire_active, 0x102);
+    ASSERT_REG_POSITION(acquire_timeout, 0x103);
+    ASSERT_REG_POSITION(acquire_value, 0x104);
+
+#undef ASSERT_REG_POSITION
+};
+
+} // namespace Tegra::Engines
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -4,12 +4,13 @@
 #pragma once

 #include <algorithm>
+#include <cstring>
+#include <memory>
 #include <queue>

 #include "common/common_types.h"
 #include "video_core/delayed_destruction_ring.h"
 #include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"

 namespace VideoCommon {
@@ -19,10 +20,10 @@ public:
    explicit FenceBase(u32 payload_, bool is_stubbed_)
        : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}

-    explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+    explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
        : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}

-    GPUVAddr GetAddress() const {
+    u8* GetAddress() const {
        return address;
    }

@@ -35,7 +36,7 @@ public:
    }

 private:
-    GPUVAddr address;
+    u8* address;
    u32 payload;
    bool is_semaphore;

@@ -57,7 +58,7 @@ public:
        buffer_cache.AccumulateFlushes();
    }

-    void SignalSemaphore(GPUVAddr addr, u32 value) {
+    void SignalSemaphore(u8* addr, u32 value) {
        TryReleasePendingFences();
        const bool should_flush = ShouldFlush();
        CommitAsyncFlushes();
@@ -91,8 +92,9 @@ public:
            }
            PopAsyncFlushes();
            if (current_fence->IsSemaphore()) {
-                gpu_memory.template Write<u32>(current_fence->GetAddress(),
-                                               current_fence->GetPayload());
+                char* address = reinterpret_cast<char*>(current_fence->GetAddress());
+                auto payload = current_fence->GetPayload();
+                std::memcpy(address, &payload, sizeof(payload));
            } else {
                gpu.IncrementSyncPoint(current_fence->GetPayload());
            }
@@ -104,8 +106,8 @@ protected:
    explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
                          TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
                          TQueryCache& query_cache_)
-        : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
-          texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
+        : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
+          buffer_cache{buffer_cache_}, query_cache{query_cache_} {}

    virtual ~FenceManager() = default;

@@ -113,7 +115,7 @@ protected:
    /// true
    virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
    /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
-    virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
+    virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
    /// Queues a fence into the backend if the fence isn't stubbed.
    virtual void QueueFence(TFence& fence) = 0;
    /// Notifies that the backend fence has been signaled/reached in host GPU.
@@ -123,7 +125,6 @@ protected:

    VideoCore::RasterizerInterface& rasterizer;
    Tegra::GPU& gpu;
-    Tegra::MemoryManager& gpu_memory;
    TTextureCache& texture_cache;
    TTBufferCache& buffer_cache;
    TQueryCache& query_cache;
@@ -137,8 +138,9 @@ private:
            }
            PopAsyncFlushes();
            if (current_fence->IsSemaphore()) {
-                gpu_memory.template Write<u32>(current_fence->GetAddress(),
-                                               current_fence->GetPayload());
+                char* address = reinterpret_cast<char*>(current_fence->GetAddress());
+                const auto payload = current_fence->GetPayload();
+                std::memcpy(address, &payload, sizeof(payload));
            } else {
                gpu.IncrementSyncPoint(current_fence->GetPayload());
            }
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -18,6 +18,8 @@
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "core/perf_stats.h"
 #include "video_core/cdma_pusher.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/engines/kepler_compute.h"
@@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));

 struct GPU::Impl {
    explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
-        : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
-                                          system)},
-          dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
-          maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
-          fermi_2d{std::make_unique<Engines::Fermi2D>()},
-          kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
-          maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
-          kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+        : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
          shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
-          gpu_thread{system_, is_async_} {}
+          gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}

    ~Impl() = default;

+    std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
+        auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
+        channels.emplace(channel_id, channel_state);
+        scheduler->DeclareChannel(channel_state);
+        return channel_state;
+    }
+
+    void BindChannel(s32 channel_id) {
+        if (bound_channel == channel_id) {
+            return;
+        }
+        auto it = channels.find(channel_id);
+        ASSERT(it != channels.end());
+        bound_channel = channel_id;
+        current_channel = it->second.get();
+
+        rasterizer->BindChannel(*current_channel);
+    }
+
+    std::shared_ptr<Control::ChannelState> AllocateChannel() {
+        return CreateChannel(new_channel_id++);
+    }
+
+    void InitChannel(Control::ChannelState& to_init) {
+        to_init.Init(system, gpu);
+        to_init.BindRasterizer(rasterizer);
+        rasterizer->InitializeChannel(to_init);
+    }
+
+    void ReleaseChannel(Control::ChannelState& to_release) {
+        UNIMPLEMENTED();
+    }
+
+    void CreateHost1xChannel() {
+        if (host1x_channel) {
+            return;
+        }
+        host1x_channel = CreateChannel(0);
+        host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system);
+        InitChannel(*host1x_channel);
+    }
+
    /// Binds a renderer to the GPU.
    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
        renderer = std::move(renderer_);
        rasterizer = renderer->ReadRasterizer();
-
-        memory_manager->BindRasterizer(rasterizer);
-        maxwell_3d->BindRasterizer(rasterizer);
-        fermi_2d->BindRasterizer(rasterizer);
-        kepler_compute->BindRasterizer(rasterizer);
-        kepler_memory->BindRasterizer(rasterizer);
-        maxwell_dma->BindRasterizer(rasterizer);
-    }
-
-    /// Calls a GPU method.
-    void CallMethod(const GPU::MethodCall& method_call) {
-        LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
-                  method_call.subchannel);
-
-        ASSERT(method_call.subchannel < bound_engines.size());
-
-        if (ExecuteMethodOnEngine(method_call.method)) {
-            CallEngineMethod(method_call);
-        } else {
-            CallPullerMethod(method_call);
-        }
-    }
-
-    /// Calls a GPU multivalue method.
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending) {
-        LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
-
-        ASSERT(subchannel < bound_engines.size());
-
-        if (ExecuteMethodOnEngine(method)) {
-            CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
-        } else {
-            for (std::size_t i = 0; i < amount; i++) {
-                CallPullerMethod(GPU::MethodCall{
-                    method,
-                    base_start[i],
-                    subchannel,
-                    methods_pending - static_cast<u32>(i),
-                });
-            }
-        }
    }

    /// Flush all current written commands into the host GPU for execution.
@@ -146,42 +141,44 @@ struct GPU::Impl {

    /// Returns a reference to the Maxwell3D GPU engine.
    [[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
-        return *maxwell_3d;
+        ASSERT(current_channel);
+        return *current_channel->maxwell_3d;
    }

    /// Returns a const reference to the Maxwell3D GPU engine.
    [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
-        return *maxwell_3d;
+        ASSERT(current_channel);
+        return *current_channel->maxwell_3d;
    }

    /// Returns a reference to the KeplerCompute GPU engine.
    [[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
-        return *kepler_compute;
+        ASSERT(current_channel);
+        return *current_channel->kepler_compute;
    }

    /// Returns a reference to the KeplerCompute GPU engine.
    [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
-        return *kepler_compute;
+        ASSERT(current_channel);
+        return *current_channel->kepler_compute;
    }

    /// Returns a reference to the GPU memory manager.
    [[nodiscard]] Tegra::MemoryManager& MemoryManager() {
-        return *memory_manager;
-    }
-
-    /// Returns a const reference to the GPU memory manager.
-    [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
-        return *memory_manager;
+        CreateHost1xChannel();
+        return *host1x_channel->memory_manager;
    }

    /// Returns a reference to the GPU DMA pusher.
    [[nodiscard]] Tegra::DmaPusher& DmaPusher() {
-        return *dma_pusher;
+        ASSERT(current_channel);
+        return *current_channel->dma_pusher;
    }

    /// Returns a const reference to the GPU DMA pusher.
    [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
-        return *dma_pusher;
+        ASSERT(current_channel);
+        return *current_channel->dma_pusher;
    }

    /// Returns a reference to the underlying renderer.
@@ -306,7 +303,7 @@ struct GPU::Impl {
    /// This can be used to launch any necessary threads and register any necessary
    /// core timing events.
    void Start() {
-        gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+        gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
        cpu_context = renderer->GetRenderWindow().CreateSharedContext();
        cpu_context->MakeCurrent();
    }
@@ -328,8 +325,8 @@ struct GPU::Impl {
    }

    /// Push GPU command entries to be processed
-    void PushGPUEntries(Tegra::CommandList&& entries) {
-        gpu_thread.SubmitList(std::move(entries));
+    void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+        gpu_thread.SubmitList(channel, std::move(entries));
    }

    /// Push GPU command buffer entries to be processed
@@ -381,303 +378,16 @@ struct GPU::Impl {
        interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
    }

-    void ProcessBindMethod(const GPU::MethodCall& method_call) {
-        // Bind the current subchannel to the desired engine id.
-        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
-                  method_call.argument);
-        const auto engine_id = static_cast<EngineID>(method_call.argument);
-        bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
-        switch (engine_id) {
-        case EngineID::FERMI_TWOD_A:
-            dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
-            break;
-        case EngineID::MAXWELL_B:
-            dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
-            break;
-        case EngineID::KEPLER_COMPUTE_B:
-            dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
-            break;
-        case EngineID::MAXWELL_DMA_COPY_A:
-            dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
-            break;
-        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-            dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
-        }
-    }
-
-    void ProcessFenceActionMethod() {
-        switch (regs.fence_action.op) {
-        case GPU::FenceOperation::Acquire:
-            WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
-            break;
-        case GPU::FenceOperation::Increment:
-            IncrementSyncPoint(regs.fence_action.syncpoint_id);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
-        }
-    }
-
-    void ProcessWaitForInterruptMethod() {
-        // TODO(bunnei) ImplementMe
-        LOG_WARNING(HW_GPU, "(STUBBED) called");
-    }
-
-    void ProcessSemaphoreTriggerMethod() {
-        const auto semaphoreOperationMask = 0xF;
-        const auto op =
-            static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
-        if (op == GpuSemaphoreOperation::WriteLong) {
-            struct Block {
-                u32 sequence;
-                u32 zeros = 0;
-                u64 timestamp;
-            };
-
-            Block block{};
-            block.sequence = regs.semaphore_sequence;
-            // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
-            // CoreTiming
-            block.timestamp = GetTicks();
-            memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
-                                       sizeof(block));
-        } else {
-            const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
-            if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
-                (op == GpuSemaphoreOperation::AcquireGequal &&
-                 static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
-                (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
-                // Nothing to do in this case
-            } else {
-                regs.acquire_source = true;
-                regs.acquire_value = regs.semaphore_sequence;
-                if (op == GpuSemaphoreOperation::AcquireEqual) {
-                    regs.acquire_active = true;
-                    regs.acquire_mode = false;
-                } else if (op == GpuSemaphoreOperation::AcquireGequal) {
-                    regs.acquire_active = true;
-                    regs.acquire_mode = true;
-                } else if (op == GpuSemaphoreOperation::AcquireMask) {
-                    // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
-                    // semaphore_sequence, gives a non-0 result
-                    LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
-                } else {
-                    LOG_ERROR(HW_GPU, "Invalid semaphore operation");
-                }
-            }
-        }
-    }
-
-    void ProcessSemaphoreRelease() {
-        memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
-                                   regs.semaphore_release);
-    }
-
-    void ProcessSemaphoreAcquire() {
-        const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
-        const auto value = regs.semaphore_acquire;
-        if (word != value) {
-            regs.acquire_active = true;
-            regs.acquire_value = value;
-            // TODO(kemathe73) figure out how to do the acquire_timeout
-            regs.acquire_mode = false;
-            regs.acquire_source = false;
-        }
-    }
-
-    /// Calls a GPU puller method.
-    void CallPullerMethod(const GPU::MethodCall& method_call) {
-        regs.reg_array[method_call.method] = method_call.argument;
-        const auto method = static_cast<BufferMethods>(method_call.method);
-
-        switch (method) {
-        case BufferMethods::BindObject: {
-            ProcessBindMethod(method_call);
-            break;
-        }
-        case BufferMethods::Nop:
-        case BufferMethods::SemaphoreAddressHigh:
-        case BufferMethods::SemaphoreAddressLow:
-        case BufferMethods::SemaphoreSequence:
-            break;
-        case BufferMethods::UnkCacheFlush:
-            rasterizer->SyncGuestHost();
-            break;
-        case BufferMethods::WrcacheFlush:
-            rasterizer->SignalReference();
-            break;
-        case BufferMethods::FenceValue:
-            break;
-        case BufferMethods::RefCnt:
-            rasterizer->SignalReference();
-            break;
-        case BufferMethods::FenceAction:
-            ProcessFenceActionMethod();
-            break;
-        case BufferMethods::WaitForInterrupt:
-            rasterizer->WaitForIdle();
-            break;
-        case BufferMethods::SemaphoreTrigger: {
-            ProcessSemaphoreTriggerMethod();
-            break;
-        }
-        case BufferMethods::NotifyIntr: {
-            // TODO(Kmather73): Research and implement this method.
-            LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
-            break;
-        }
-        case BufferMethods::Unk28: {
-            // TODO(Kmather73): Research and implement this method.
-            LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
-            break;
-        }
-        case BufferMethods::SemaphoreAcquire: {
-            ProcessSemaphoreAcquire();
-            break;
-        }
-        case BufferMethods::SemaphoreRelease: {
-            ProcessSemaphoreRelease();
-            break;
-        }
-        case BufferMethods::Yield: {
-            // TODO(Kmather73): Research and implement this method.
-            LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
-            break;
-        }
-        default:
-            LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
-            break;
-        }
-    }
-
-    /// Calls a GPU engine method.
-    void CallEngineMethod(const GPU::MethodCall& method_call) {
-        const EngineID engine = bound_engines[method_call.subchannel];
-
-        switch (engine) {
-        case EngineID::FERMI_TWOD_A:
-            fermi_2d->CallMethod(method_call.method, method_call.argument,
-                                 method_call.IsLastCall());
-            break;
-        case EngineID::MAXWELL_B:
-            maxwell_3d->CallMethod(method_call.method, method_call.argument,
-                                   method_call.IsLastCall());
-            break;
-        case EngineID::KEPLER_COMPUTE_B:
-            kepler_compute->CallMethod(method_call.method, method_call.argument,
-                                       method_call.IsLastCall());
-            break;
-        case EngineID::MAXWELL_DMA_COPY_A:
-            maxwell_dma->CallMethod(method_call.method, method_call.argument,
-                                    method_call.IsLastCall());
-            break;
-        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-            kepler_memory->CallMethod(method_call.method, method_call.argument,
-                                      method_call.IsLastCall());
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented engine");
-        }
-    }
-
-    /// Calls a GPU engine multivalue method.
-    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                               u32 methods_pending) {
-        const EngineID engine = bound_engines[subchannel];
-
-        switch (engine) {
-        case EngineID::FERMI_TWOD_A:
-            fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::MAXWELL_B:
-            maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::KEPLER_COMPUTE_B:
-            kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::MAXWELL_DMA_COPY_A:
-            maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        case EngineID::KEPLER_INLINE_TO_MEMORY_B:
-            kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented engine");
-        }
-    }
-
-    /// Determines where the method should be executed.
-    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
-        const auto buffer_method = static_cast<BufferMethods>(method);
-        return buffer_method >= BufferMethods::NonPullerMethods;
-    }
-
-    struct Regs {
-        static constexpr size_t NUM_REGS = 0x40;
-
-        union {
-            struct {
-                INSERT_PADDING_WORDS_NOINIT(0x4);
-                struct {
-                    u32 address_high;
-                    u32 address_low;
-
-                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
-                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
-                                                     address_low);
-                    }
-                } semaphore_address;
-
-                u32 semaphore_sequence;
-                u32 semaphore_trigger;
-                INSERT_PADDING_WORDS_NOINIT(0xC);
-
-                // The pusher and the puller share the reference counter, the pusher only has read
-                // access
-                u32 reference_count;
-                INSERT_PADDING_WORDS_NOINIT(0x5);
-
-                u32 semaphore_acquire;
-                u32 semaphore_release;
-                u32 fence_value;
-                GPU::FenceAction fence_action;
-                INSERT_PADDING_WORDS_NOINIT(0xE2);
-
-                // Puller state
-                u32 acquire_mode;
-                u32 acquire_source;
-                u32 acquire_active;
-                u32 acquire_timeout;
-                u32 acquire_value;
-            };
-            std::array<u32, NUM_REGS> reg_array;
-        };
-    } regs{};
-
    GPU& gpu;
    Core::System& system;
-    std::unique_ptr<Tegra::MemoryManager> memory_manager;
-    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+
    std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
    std::unique_ptr<VideoCore::RendererBase> renderer;
    VideoCore::RasterizerInterface* rasterizer = nullptr;
    const bool use_nvdec;

-    /// Mapping of command subchannels to their bound engine ids
-    std::array<EngineID, 8> bound_engines{};
-    /// 3D engine
-    std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
-    /// 2D engine
-    std::unique_ptr<Engines::Fermi2D> fermi_2d;
-    /// Compute engine
-    std::unique_ptr<Engines::KeplerCompute> kepler_compute;
-    /// DMA engine
-    std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
-    /// Inline memory engine
-    std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+    std::shared_ptr<Control::ChannelState> host1x_channel;
+    s32 new_channel_id{1};
    /// Shader build notifier
    std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
    /// When true, we are about to shut down emulation session, so terminate outstanding tasks
@@ -710,33 +420,10 @@ struct GPU::Impl {
    VideoCommon::GPUThread::ThreadManager gpu_thread;
    std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;

-#define ASSERT_REG_POSITION(field_name, position)                                                  \
-    static_assert(offsetof(Regs, field_name) == position * 4,                                      \
-                  "Field " #field_name " has invalid position")
-
-    ASSERT_REG_POSITION(semaphore_address, 0x4);
-    ASSERT_REG_POSITION(semaphore_sequence, 0x6);
-    ASSERT_REG_POSITION(semaphore_trigger, 0x7);
-    ASSERT_REG_POSITION(reference_count, 0x14);
-    ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
-    ASSERT_REG_POSITION(semaphore_release, 0x1B);
-    ASSERT_REG_POSITION(fence_value, 0x1C);
-    ASSERT_REG_POSITION(fence_action, 0x1D);
-
-    ASSERT_REG_POSITION(acquire_mode, 0x100);
-    ASSERT_REG_POSITION(acquire_source, 0x101);
-    ASSERT_REG_POSITION(acquire_active, 0x102);
-    ASSERT_REG_POSITION(acquire_timeout, 0x103);
-    ASSERT_REG_POSITION(acquire_value, 0x104);
-
-#undef ASSERT_REG_POSITION
-
-    enum class GpuSemaphoreOperation {
-        AcquireEqual = 0x1,
-        WriteLong = 0x2,
-        AcquireGequal = 0x4,
-        AcquireMask = 0x8,
-    };
+    std::unique_ptr<Tegra::Control::Scheduler> scheduler;
+    std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
+    Tegra::Control::ChannelState* current_channel;
+    s32 bound_channel{-1};
 };

 GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -744,19 +431,26 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)

 GPU::~GPU() = default;

+std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
+    return impl->AllocateChannel();
+}
+
+void GPU::InitChannel(Control::ChannelState& to_init) {
+    impl->InitChannel(to_init);
+}
+
+void GPU::BindChannel(s32 channel_id) {
+    impl->BindChannel(channel_id);
+}
+
+void GPU::ReleaseChannel(Control::ChannelState& to_release) {
+    impl->ReleaseChannel(to_release);
+}
+
 void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
    impl->BindRenderer(std::move(renderer));
 }

-void GPU::CallMethod(const MethodCall& method_call) {
-    impl->CallMethod(method_call);
-}
-
-void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                          u32 methods_pending) {
-    impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
-}
-
 void GPU::FlushCommands() {
    impl->FlushCommands();
 }
@@ -881,8 +575,8 @@ void GPU::ReleaseContext() {
    impl->ReleaseContext();
 }

-void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
-    impl->PushGPUEntries(std::move(entries));
+void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+    impl->PushGPUEntries(channel, std::move(entries));
 }

 void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -89,57 +89,20 @@ class Maxwell3D;
 class KeplerCompute;
 } // namespace Engines

-enum class EngineID {
-    FERMI_TWOD_A = 0x902D, // 2D Engine
-    MAXWELL_B = 0xB197,    // 3D Engine
-    KEPLER_COMPUTE_B = 0xB1C0,
-    KEPLER_INLINE_TO_MEMORY_B = 0xA140,
-    MAXWELL_DMA_COPY_A = 0xB0B5,
-};
+namespace Control {
+struct ChannelState;
+}

 class MemoryManager;

 class GPU final {
 public:
-    struct MethodCall {
-        u32 method{};
-        u32 argument{};
-        u32 subchannel{};
-        u32 method_count{};
-
-        explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
-            : method(method_), argument(argument_), subchannel(subchannel_),
-              method_count(method_count_) {}
-
-        [[nodiscard]] bool IsLastCall() const {
-            return method_count <= 1;
-        }
-    };
-
-    enum class FenceOperation : u32 {
-        Acquire = 0,
-        Increment = 1,
-    };
-
-    union FenceAction {
-        u32 raw;
-        BitField<0, 1, FenceOperation> op;
-        BitField<8, 24, u32> syncpoint_id;
-    };
-
    explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
    ~GPU();

    /// Binds a renderer to the GPU.
    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);

-    /// Calls a GPU method.
-    void CallMethod(const MethodCall& method_call);
-
-    /// Calls a GPU multivalue method.
-    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
-                         u32 methods_pending);
-
    /// Flush all current written commands into the host GPU for execution.
    void FlushCommands();
    /// Synchronizes CPU writes with Host GPU memory.
@@ -147,6 +110,14 @@ public:
    /// Signal the ending of command list.
    void OnCommandListEnd();

+    std::shared_ptr<Control::ChannelState> AllocateChannel();
+
+    void InitChannel(Control::ChannelState& to_init);
+
+    void BindChannel(s32 channel_id);
+
+    void ReleaseChannel(Control::ChannelState& to_release);
+
    /// Request a host GPU memory flush from the CPU.
    [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);

@@ -226,7 +197,7 @@ public:
    void ReleaseContext();

    /// Push GPU command entries to be processed
-    void PushGPUEntries(Tegra::CommandList&& entries);
+    void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);

    /// Push GPU command buffer entries to be processed
    void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
@@ -248,7 +219,7 @@ public:

 private:
    struct Impl;
-    std::unique_ptr<Impl> impl;
+    mutable std::unique_ptr<Impl> impl;
 };

 } // namespace Tegra
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -8,6 +8,7 @@
 #include "common/thread.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
+#include "video_core/control/scheduler.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
 #include "video_core/gpu_thread.h"
@@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread {
 /// Runs the GPU thread
 static void RunThread(std::stop_token stop_token, Core::System& system,
                      VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                      Tegra::DmaPusher& dma_pusher, SynchState& state) {
+                      Tegra::Control::Scheduler& scheduler, SynchState& state) {
    std::string name = "GPU";
    MicroProfileOnThreadCreate(name.c_str());
    SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
            break;
        }
        if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
-            dma_pusher.Push(std::move(submit_list->entries));
-            dma_pusher.DispatchCalls();
+            scheduler.Push(submit_list->channel, std::move(submit_list->entries));
        } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
            renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
        } else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default;

 void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
                                Core::Frontend::GraphicsContext& context,
-                                Tegra::DmaPusher& dma_pusher) {
+                                Tegra::Control::Scheduler& scheduler) {
    rasterizer = renderer.ReadRasterizer();
    thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
-                          std::ref(dma_pusher), std::ref(state));
+                          std::ref(scheduler), std::ref(state));
 }

-void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
-    PushCommand(SubmitListCommand(std::move(entries)));
+void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
+    PushCommand(SubmitListCommand(channel, std::move(entries)));
 }

 void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -15,7 +15,9 @@

 namespace Tegra {
 struct FramebufferConfig;
-class DmaPusher;
+namespace Control {
+class Scheduler;
+}
 } // namespace Tegra

 namespace Core {
@@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread {

 /// Command to signal to the GPU thread that a command list is ready for processing
 struct SubmitListCommand final {
-    explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
+    explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
+        : channel{channel_}, entries{std::move(entries_)} {}

+    s32 channel;
    Tegra::CommandList entries;
 };

@@ -112,10 +116,10 @@ public:

    /// Creates and starts the GPU thread.
    void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
-                     Tegra::DmaPusher& dma_pusher);
+                     Tegra::Control::Scheduler& scheduler);

    /// Push GPU command entries to be processed
-    void SubmitList(Tegra::CommandList&& entries);
+    void SubmitList(s32 channel, Tegra::CommandList&& entries);

    /// Swap buffers (render frame)
    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
    // TryLockPage(page_entry, size);
    auto& current_page = page_table[PageEntryIndex(gpu_addr)];

-    if ((!current_page.IsValid() && page_entry.IsValid()) ||
-        current_page.ToAddress() != page_entry.ToAddress()) {
-        rasterizer->ModifyGPUMemory(gpu_addr, size);
-    }
-
    current_page = page_entry;
 }

--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -17,6 +17,7 @@

 #include "common/assert.h"
 #include "common/settings.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
@@ -90,13 +91,10 @@ private:
 };

 template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
-class QueryCacheBase {
+class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
 public:
-    explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
-                            Tegra::Engines::Maxwell3D& maxwell3d_,
-                            Tegra::MemoryManager& gpu_memory_)
-        : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-          gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+    explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
+        : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
                                                          VideoCore::QueryType::SamplesPassed}}} {}

    void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -117,13 +115,13 @@ public:
     */
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
        std::unique_lock lock{mutex};
-        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
        ASSERT(cpu_addr);

        CachedQuery* query = TryGet(*cpu_addr);
        if (!query) {
            ASSERT_OR_EXECUTE(cpu_addr, return;);
-            u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+            u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);

            query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
        }
@@ -137,7 +135,7 @@ public:
    /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
    void UpdateCounters() {
        std::unique_lock lock{mutex};
-        const auto& regs = maxwell3d.regs;
+        const auto& regs = maxwell3d->regs;
        Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
    }

@@ -264,8 +262,6 @@ private:
    static constexpr unsigned YUZU_PAGEBITS = 12;

    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::MemoryManager& gpu_memory;

    std::recursive_mutex mutex;

--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -16,6 +16,9 @@ class MemoryManager;
 namespace Engines {
 class AccelerateDMAInterface;
 }
+namespace Control {
+struct ChannelState;
+}
 } // namespace Tegra

 namespace VideoCore {
@@ -137,5 +140,11 @@ public:
    /// Initialize disk cached resources for the game being emulated
    virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                                   const DiskResourceLoadCallback& callback) {}
+
+    virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
+
+    virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
+
+    virtual void ReleaseChannel(s32 channel_id) {}
 };
 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -12,7 +12,7 @@ namespace OpenGL {

 GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}

-GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
    : FenceBase{address_, payload_, is_stubbed_} {}

 GLInnerFence::~GLInnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
    return std::make_shared<GLInnerFence>(value, is_stubbed);
 }

-Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
    return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
 }

--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,7 +17,7 @@ namespace OpenGL {
 class GLInnerFence : public VideoCommon::FenceBase {
 public:
    explicit GLInnerFence(u32 payload_, bool is_stubbed_);
-    explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
+    explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
    ~GLInnerFence();

    void Queue();
@@ -41,7 +41,7 @@ public:

 protected:
    Fence CreateFence(u32 value, bool is_stubbed) override;
-    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
    void QueueFence(Fence& fence) override;
    bool IsFenceSignaled(Fence& fence) const override;
    void WaitFence(Fence& fence) override;
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {

 } // Anonymous namespace

-QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                       Tegra::MemoryManager& gpu_memory_)
-    : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
+    : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}

 QueryCache::~QueryCache() = default;

--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
 class QueryCache final
    : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
-    explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                        Tegra::MemoryManager& gpu_memory_);
+    explicit QueryCache(RasterizerOpenGL& rasterizer_);
    ~QueryCache();

    OGLQuery AllocateQuery(VideoCore::QueryType type);
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
      kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
      screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
      texture_cache_runtime(device, program_manager, state_tracker),
-      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
-      buffer_cache_runtime(device),
-      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
-      shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
-                   buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
-      query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+      texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+      buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+      shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
+                   state_tracker, gpu.ShaderNotify()),
+      query_cache(*this), accelerate_dma(buffer_cache),
      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}

 RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
        gpu_memory.Write<u32>(addr, value);
        return;
    }
-    fence_manager.SignalSemaphore(addr, value);
+    auto paddr = gpu_memory.GetPointer(addr);
+    fence_manager.SignalSemaphore(paddr, value);
 }

 void RasterizerOpenGL::SignalSyncPoint(u32 value) {
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
 } // Anonymous namespace

 ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                         TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                         ProgramManager& program_manager_, StateTracker& state_tracker_,
-                         VideoCore::ShaderNotify& shader_notify_)
-    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
-      emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
-      buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
-      shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+                         const Device& device_, TextureCache& texture_cache_,
+                         BufferCache& buffer_cache_, ProgramManager& program_manager_,
+                         StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
+    : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
+      texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
+      state_tracker{state_tracker_}, shader_notify{shader_notify_},
+      use_asynchronous_shaders{device.UseAsynchronousShaders()},
      profile{
          .supported_spirv = 0x00010000,

@@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
        current_pipeline = nullptr;
        return nullptr;
    }
-    const auto& regs{maxwell3d.regs};
+    const auto& regs{maxwell3d->regs};
    graphics_key.raw = 0;
    graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
    graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
@@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
    }
    // If something is using depth, we can assume that games are not rendering anything which
    // will be used one time.
-    if (maxwell3d.regs.zeta_enable) {
+    if (maxwell3d->regs.zeta_enable) {
        return nullptr;
    }
    // If games are using a small index count, we can assume these are full screen quads.
    // Usually these shaders are only used once for building textures so we can assume they
    // can't be built async
-    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+    if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
        return pipeline;
    }
    return nullptr;
@@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
    if (!shader) {
        return nullptr;
    }
-    const auto& qmd{kepler_compute.launch_description};
+    const auto& qmd{kepler_compute->launch_description};
    const ComputePipelineKey key{
        .unique_hash = shader->unique_hash,
        .shared_memory_size = qmd.shared_alloc,
@@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
    }
    auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
    return std::make_unique<GraphicsPipeline>(
-        device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
-        thread_worker, &shader_notify, sources, sources_spirv, infos, key);
+        device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager,
+        state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key);

 } catch (Shader::Exception& exception) {
    LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(

 std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
    const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
-    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
-    const auto& qmd{kepler_compute.launch_description};
-    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+    const auto& qmd{kepler_compute->launch_description};
+    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
    env.SetCachedSize(shader->size_bytes);

    main_pools.ReleaseContents();
@@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
        break;
    }

-    return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
-                                             kepler_compute, program_manager, program.info, code,
+    return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory,
+                                             *kepler_compute, program_manager, program.info, code,
                                             code_spirv);
 } catch (Shader::Exception& exception) {
    LOG_ERROR(Render_OpenGL, "{}", exception.what());
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
 class ShaderCache : public VideoCommon::ShaderCache {
 public:
    explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
-                         Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_,
-                         Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                         TextureCache& texture_cache_, BufferCache& buffer_cache_,
-                         ProgramManager& program_manager_, StateTracker& state_tracker_,
-                         VideoCore::ShaderNotify& shader_notify_);
+                         const Device& device_, TextureCache& texture_cache_,
+                         BufferCache& buffer_cache_, ProgramManager& program_manager_,
+                         StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
    ~ShaderCache();

    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
                               Core::Frontend::EmuWindow& emu_window,
                               Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
                               std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
-    : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
-      cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
+    : RendererBase(emu_window, std::move(context_)),
+      telemetry_session(telemetry_session_),
+      cpu_memory(cpu_memory_),
+      gpu(gpu_),
+      library(OpenLibrary()),
      instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
                              true, Settings::values.renderer_debug.GetValue())),
      debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
      surface(CreateSurface(instance, render_window)),
-      device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
-      state_tracker(gpu), scheduler(device, state_tracker),
+      device(CreateDevice(instance, dld, *surface)),
+      memory_allocator(device, false),
+      state_tracker(gpu),
+      scheduler(device, state_tracker),
      swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
                render_window.GetFramebufferLayout().height, false),
      blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
                  screen_info),
-      rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
-                 memory_allocator, state_tracker, scheduler) {
+      rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
+                 state_tracker, scheduler) {
    Report();
 } catch (const vk::Exception& exception) {
    LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -14,7 +14,7 @@ namespace Vulkan {
 InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
    : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}

-InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
+InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
    : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}

 InnerFence::~InnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
    return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
 }

-Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
    return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
 }

--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -26,7 +26,7 @@ class Scheduler;
 class InnerFence : public VideoCommon::FenceBase {
 public:
    explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
-    explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
+    explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
    ~InnerFence();

    void Queue();
@@ -51,7 +51,7 @@ public:

 protected:
    Fence CreateFence(u32 value, bool is_stubbed) override;
-    Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+    Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
    void QueueFence(Fence& fence) override;
    bool IsFenceSignaled(Fence& fence) const override;
    void WaitFence(Fence& fence) override;
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
    return std::memcmp(&rhs, this, Size()) == 0;
 }

-PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                             Tegra::Engines::KeplerCompute& kepler_compute_,
-                             Tegra::MemoryManager& gpu_memory_, const Device& device_,
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
                             Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
                             UpdateDescriptorQueue& update_descriptor_queue_,
                             RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
                             TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
-    : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
-      device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
-      update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
-      buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+    : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
+      descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
+      render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
+      texture_cache{texture_cache_}, shader_notify{shader_notify_},
      use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
      workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
      serialization_thread(1, "VkPipelineSerialization") {
@@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
        current_pipeline = nullptr;
        return nullptr;
    }
-    graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+    graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
                               device.IsExtVertexInputDynamicStateSupported());

    if (current_pipeline) {
@@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
    if (!shader) {
        return nullptr;
    }
-    const auto& qmd{kepler_compute.launch_description};
+    const auto& qmd{kepler_compute->launch_description};
    const ComputePipelineCacheKey key{
        .unique_hash = shader->unique_hash,
        .shared_memory_size = qmd.shared_alloc,
@@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
    }
    // If something is using depth, we can assume that games are not rendering anything which
    // will be used one time.
-    if (maxwell3d.regs.zeta_enable) {
+    if (maxwell3d->regs.zeta_enable) {
        return nullptr;
    }
    // If games are using a small index count, we can assume these are full screen quads.
    // Usually these shaders are only used once for building textures so we can assume they
    // can't be built async
-    if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+    if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
        return pipeline;
    }
    return nullptr;
@@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
    }
    Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
    return std::make_unique<GraphicsPipeline>(
-        maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+        *maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
        descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
        std::move(modules), infos);

@@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {

 std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
    const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
-    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
-    const auto& qmd{kepler_compute.launch_description};
-    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+    const auto& qmd{kepler_compute->launch_description};
+    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
    env.SetCachedSize(shader->size_bytes);

    main_pools.ReleaseContents();
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -100,10 +100,8 @@ struct ShaderPools {

 class PipelineCache : public VideoCommon::ShaderCache {
 public:
-    explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
-                           Tegra::Engines::KeplerCompute& kepler_compute,
-                           Tegra::MemoryManager& gpu_memory, const Device& device,
-                           Scheduler& scheduler, DescriptorPool& descriptor_pool,
+    explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
+                           DescriptorPool& descriptor_pool,
                           UpdateDescriptorQueue& update_descriptor_queue,
                           RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
                           TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
    usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
 }

-QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
-                       Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
-                       const Device& device_, Scheduler& scheduler_)
-    : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
+QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+                           Scheduler& scheduler_)
+    : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
      query_pools{
          QueryPool{device_, scheduler_, QueryType::SamplesPassed},
      } {}
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,9 +52,8 @@ private:
 class QueryCache final
    : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
 public:
-    explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
-                        Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
-                        const Device& device_, Scheduler& scheduler_);
+    explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+                        Scheduler& scheduler_);
    ~QueryCache();

    std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -11,6 +11,7 @@
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "common/settings.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/blit_image.h"
@@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
 } // Anonymous namespace

 RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
-                                   Tegra::MemoryManager& gpu_memory_,
                                   Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
                                   const Device& device_, MemoryAllocator& memory_allocator_,
                                   StateTracker& state_tracker_, Scheduler& scheduler_)
-    : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
-      gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
-      screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
-      state_tracker{state_tracker_}, scheduler{scheduler_},
+    : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
+      memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
      staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
      update_descriptor_queue(device, scheduler),
      blit_image(device, scheduler, state_tracker, descriptor_pool),
@@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
                                                       memory_allocator, staging_pool,
                                                       blit_image,       astc_decoder_pass,
                                                       render_pass_cache},
-      texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+      texture_cache(texture_cache_runtime, *this),
      buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
                           update_descriptor_queue, descriptor_pool),
-      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
-      pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
-                     descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
-                     texture_cache, gpu.ShaderNotify()),
-      query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
+      buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+      pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
+                     render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
+      query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
      wfi_event(device.GetLogical().CreateEvent()) {
    scheduler.SetQueryCache(query_cache);
@@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {

    UpdateDynamicStates();

-    const auto& regs{maxwell3d.regs};
-    const u32 num_instances{maxwell3d.mme_draw.instance_count};
+    const auto& regs{maxwell3d->regs};
+    const u32 num_instances{maxwell3d->mme_draw.instance_count};
    const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
    scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
        if (draw_params.is_indexed) {
@@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 void RasterizerVulkan::Clear() {
    MICROPROFILE_SCOPE(Vulkan_Clearing);

-    if (!maxwell3d.ShouldExecute()) {
+    if (!maxwell3d->ShouldExecute()) {
        return;
    }
    FlushWork();

    query_cache.UpdateCounters();

-    auto& regs = maxwell3d.regs;
+    auto& regs = maxwell3d->regs;
    const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
                           regs.clear_buffers.A;
    const bool use_depth = regs.clear_buffers.Z;
@@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() {
        return;
    }
    std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
-    pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
+    pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);

-    const auto& qmd{kepler_compute.launch_description};
+    const auto& qmd{kepler_compute->launch_description};
    const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
    scheduler.RequestOutsideRenderPassOperationContext();
    scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
@@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {

 void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
    if (!gpu.IsAsync()) {
-        gpu_memory.Write<u32>(addr, value);
+        gpu_memory->Write<u32>(addr, value);
        return;
    }
-    fence_manager.SignalSemaphore(addr, value);
+    auto paddr = gpu_memory->GetPointer(addr);
+    fence_manager.SignalSemaphore(paddr, value);
 }

 void RasterizerVulkan::SignalSyncPoint(u32 value) {
@@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()

 void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
                                                std::span<u8> memory) {
-    auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+    auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
    if (!cpu_addr) [[unlikely]] {
-        gpu_memory.WriteBlock(address, memory.data(), copy_size);
+        gpu_memory->WriteBlock(address, memory.data(), copy_size);
        return;
    }
-    gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+    gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
    {
        std::unique_lock<std::mutex> lock{buffer_cache.mutex};
        if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
 }

 void RasterizerVulkan::UpdateDynamicStates() {
-    auto& regs = maxwell3d.regs;
+    auto& regs = maxwell3d->regs;
    UpdateViewportsState(regs);
    UpdateScissorsState(regs);
    UpdateDepthBias(regs);
@@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
 }

 void RasterizerVulkan::BeginTransformFeedback() {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
    if (regs.tfb_enabled == 0) {
        return;
    }
@@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
 }

 void RasterizerVulkan::EndTransformFeedback() {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
    if (regs.tfb_enabled == 0) {
        return;
    }
@@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
 }

 void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
-    auto& dirty{maxwell3d.dirty.flags};
+    auto& dirty{maxwell3d->dirty.flags};
    if (!dirty[Dirty::VertexInput]) {
        return;
    }
@@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
    });
 }

+void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
+    CreateChannel(channel);
+    {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+        texture_cache.CreateChannel(channel);
+        buffer_cache.CreateChannel(channel);
+    }
+    pipeline_cache.CreateChannel(channel);
+    query_cache.CreateChannel(channel);
+    state_tracker.SetupTables(channel);
+}
+
+void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
+    const s32 channel_id = channel.bind_id;
+    BindToChannel(channel_id);
+    {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+        texture_cache.BindToChannel(channel_id);
+        buffer_cache.BindToChannel(channel_id);
+    }
+    pipeline_cache.BindToChannel(channel_id);
+    query_cache.BindToChannel(channel_id);
+    state_tracker.ChangeChannel(channel);
+    scheduler.InvalidateState();
+}
+
+void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
+    EraseChannel(channel_id);
+    {
+        std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+        texture_cache.EraseChannel(channel_id);
+        buffer_cache.EraseChannel(channel_id);
+    }
+    pipeline_cache.EraseChannel(channel_id);
+    query_cache.EraseChannel(channel_id);
+}
+
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -8,6 +8,7 @@
 #include <boost/container/static_vector.hpp>

 #include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/engines/maxwell_dma.h"
 #include "video_core/rasterizer_accelerated.h"
 #include "video_core/rasterizer_interface.h"
@@ -54,13 +55,13 @@ private:
    BufferCache& buffer_cache;
 };

-class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
+class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
+                               protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
 public:
    explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
-                              Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
-                              ScreenInfo& screen_info_, const Device& device_,
-                              MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
-                              Scheduler& scheduler_);
+                              Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
+                              const Device& device_, MemoryAllocator& memory_allocator_,
+                              StateTracker& state_tracker_, Scheduler& scheduler_);
    ~RasterizerVulkan() override;

    void Draw(bool is_indexed, bool is_instanced) override;
@@ -99,6 +100,12 @@ public:
    void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                           const VideoCore::DiskResourceLoadCallback& callback) override;

+    void InitializeChannel(Tegra::Control::ChannelState& channel) override;
+
+    void BindChannel(Tegra::Control::ChannelState& channel) override;
+
+    void ReleaseChannel(s32 channel_id) override;
+
 private:
    static constexpr size_t MAX_TEXTURES = 192;
    static constexpr size_t MAX_IMAGES = 48;
@@ -134,9 +141,6 @@ private:
    void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);

    Tegra::GPU& gpu;
-    Tegra::MemoryManager& gpu_memory;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;

    ScreenInfo& screen_info;
    const Device& device;
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -7,6 +7,7 @@

 #include "common/common_types.h"
 #include "core/core.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/gpu.h"
@@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
 }
 } // Anonymous namespace

-StateTracker::StateTracker(Tegra::GPU& gpu)
-    : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
-    auto& tables{gpu.Maxwell3D().dirty.tables};
+void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
+    auto& tables{channel_state.maxwell_3d->dirty.tables};
    SetupDirtyFlags(tables);
    SetupDirtyViewports(tables);
    SetupDirtyScissors(tables);
@@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
    SetupDirtyVertexBindings(tables);
 }

+void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
+    flags = &channel_state.maxwell_3d->dirty.flags;
+}
+
+StateTracker::StateTracker(Tegra::GPU& gpu)
+    : flags{}, invalidation_flags{MakeInvalidationFlags()} {}
+
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -10,6 +10,12 @@
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/maxwell_3d.h"

+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
 namespace Vulkan {

 namespace Dirty {
@@ -56,16 +62,16 @@ public:
    explicit StateTracker(Tegra::GPU& gpu);

    void InvalidateCommandBufferState() {
-        flags |= invalidation_flags;
+        (*flags) |= invalidation_flags;
        current_topology = INVALID_TOPOLOGY;
    }

    void InvalidateViewports() {
-        flags[Dirty::Viewports] = true;
+        (*flags)[Dirty::Viewports] = true;
    }

    void InvalidateScissors() {
-        flags[Dirty::Scissors] = true;
+        (*flags)[Dirty::Scissors] = true;
    }

    bool TouchViewports() {
@@ -139,16 +145,20 @@ public:
        return has_changed;
    }

+    void SetupTables(Tegra::Control::ChannelState& channel_state);
+
+    void ChangeChannel(Tegra::Control::ChannelState& channel_state);
+
 private:
    static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);

    bool Exchange(std::size_t id, bool new_value) const noexcept {
-        const bool is_dirty = flags[id];
-        flags[id] = new_value;
+        const bool is_dirty = (*flags)[id];
+        (*flags)[id] = new_value;
        return is_dirty;
    }

-    Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+    Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
    Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
    Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
 };
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -8,6 +8,7 @@
 #include "common/assert.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
    RemovePendingShaders();
 }

-ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_)
-    : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
-      rasterizer{rasterizer_} {}
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}

 bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
-    auto& dirty{maxwell3d.dirty.flags};
+    auto& dirty{maxwell3d->dirty.flags};
    if (!dirty[VideoCommon::Dirty::Shaders]) {
        return last_shaders_valid;
    }
    dirty[VideoCommon::Dirty::Shaders] = false;

-    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
    for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
-        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+        if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
            unique_hashes[index] = 0;
            continue;
        }
-        const auto& shader_config{maxwell3d.regs.shader_config[index]};
+        const auto& shader_config{maxwell3d->regs.shader_config[index]};
        const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
        const GPUVAddr shader_addr{base_addr + shader_config.offset};
-        const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+        const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
        if (!cpu_shader_addr) {
            LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
            last_shaders_valid = false;
@@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
        const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
        if (!shader_info) {
            const u32 start_address{shader_config.offset};
-            GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+            GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
            shader_info = MakeShaderInfo(env, *cpu_shader_addr);
        }
        shader_infos[index] = shader_info;
@@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
 }

 const ShaderInfo* ShaderCache::ComputeShader() {
-    const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
-    const auto& qmd{kepler_compute.launch_description};
+    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+    const auto& qmd{kepler_compute->launch_description};
    const GPUVAddr shader_addr{program_base + qmd.program_start};
-    const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+    const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
    if (!cpu_shader_addr) {
        LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
        return nullptr;
@@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
    if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
        return shader;
    }
-    ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
    return MakeShaderInfo(env, *cpu_shader_addr);
 }

 void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
                                          const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
    size_t env_index{};
-    const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+    const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
    for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
        if (unique_hashes[index] == 0) {
            continue;
        }
        const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
        auto& env{result.envs[index]};
-        const u32 start_address{maxwell3d.regs.shader_config[index].offset};
-        env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+        const u32 start_address{maxwell3d->regs.shader_config[index].offset};
+        env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
        env.SetCachedSize(shader_infos[index]->size_bytes);
        result.env_ptrs[env_index++] = &env;
    }
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -12,6 +12,7 @@
 #include <vector>

 #include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/shader_environment.h"

@@ -19,6 +20,10 @@ namespace Tegra {
 class MemoryManager;
 }

+namespace Tegra::Control {
+struct ChannelState;
+}
+
 namespace VideoCommon {

 class GenericEnvironment;
@@ -28,7 +33,7 @@ struct ShaderInfo {
    size_t size_bytes{};
 };

-class ShaderCache {
+class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
    static constexpr u64 YUZU_PAGEBITS = 14;
    static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;

@@ -71,9 +76,7 @@ protected:
        }
    };

-    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
-                         Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
-                         Tegra::Engines::KeplerCompute& kepler_compute_);
+    explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);

    /// @brief Update the hashes and information of shader stages
    /// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -88,10 +91,6 @@ protected:
    void GetGraphicsEnvironments(GraphicsEnvironments& result,
                                 const std::array<u64, NUM_PROGRAMS>& unique_hashes);

-    Tegra::MemoryManager& gpu_memory;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-
    std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
    bool last_shaders_valid = false;

--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -88,6 +88,9 @@ struct ImageBase {
    u32 scale_rating = 0;
    u64 scale_tick = 0;
    bool has_scaled = false;
+
+    size_t channel = 0;
+
    ImageFlagBits flags = ImageFlagBits::CpuModified;

    GPUVAddr gpu_addr = 0;
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@

 #include "common/alignment.h"
 #include "common/settings.h"
+#include "video_core/control/channel_state.h"
 #include "video_core/dirty_flags.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/texture_cache/image_view_base.h"
@@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
 using namespace Common::Literals;

 template <class P>
-TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
-                              Tegra::Engines::Maxwell3D& maxwell3d_,
-                              Tegra::Engines::KeplerCompute& kepler_compute_,
-                              Tegra::MemoryManager& gpu_memory_)
-    : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
-      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
+TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
+    : runtime{runtime_}, rasterizer{rasterizer_} {
    // Configure null sampler
    TSCEntry sampler_descriptor{};
    sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
    sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
    sampler_descriptor.cubemap_anisotropy.Assign(1);

+    // Setup channels
+    current_channel_id = UNSET_CHANNEL;
+    state = nullptr;
+    maxwell3d = nullptr;
+    kepler_compute = nullptr;
+    gpu_memory = nullptr;
+
    // Make sure the first index is reserved for the null resources
    // This way the null resource becomes a compile time constant
    void(slot_images.insert(NullImageParams{}));
@@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
            const auto copies = FullDownloadCopies(image.info);
            image.DownloadMemory(map, copies);
            runtime.Finish();
-            SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+            SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
        }
        if (True(image.flags & ImageFlagBits::Tracked)) {
            UntrackImage(image, image_id);
@@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
 template <class P>
 template <bool has_blacklists>
 void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
+    FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
+                                   views);
 }

 template <class P>
 void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
-    FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
+    FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
 }

 template <class P>
 typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
-    if (index > graphics_sampler_table.Limit()) {
+    if (index > state->graphics_sampler_table.Limit()) {
        LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
        return &slot_samplers[NULL_SAMPLER_ID];
    }
-    const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
-    SamplerId& id = graphics_sampler_ids[index];
+    const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
+    SamplerId& id = state->graphics_sampler_ids[index];
    if (is_new) {
        id = FindSampler(descriptor);
    }
@@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {

 template <class P>
 typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
-    if (index > compute_sampler_table.Limit()) {
+    if (index > state->compute_sampler_table.Limit()) {
        LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
        return &slot_samplers[NULL_SAMPLER_ID];
    }
-    const auto [descriptor, is_new] = compute_sampler_table.Read(index);
-    SamplerId& id = compute_sampler_ids[index];
+    const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
+    SamplerId& id = state->compute_sampler_ids[index];
    if (is_new) {
        id = FindSampler(descriptor);
    }
@@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
 template <class P>
 void TextureCache<P>::SynchronizeGraphicsDescriptors() {
    using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
-    const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
-    const u32 tic_limit = maxwell3d.regs.tic.limit;
-    const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
-    if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
-        graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
+    const u32 tic_limit = maxwell3d->regs.tic.limit;
+    const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
+    if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
+        state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
    }
-    if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
-        graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
+        state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
    }
 }

 template <class P>
 void TextureCache<P>::SynchronizeComputeDescriptors() {
-    const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
-    const u32 tic_limit = kepler_compute.regs.tic.limit;
-    const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
-    const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
-    if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
-        compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+    const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
+    const u32 tic_limit = kepler_compute->regs.tic.limit;
+    const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
+    const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
+    if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+        state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
    }
-    if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
-        compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+    if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
+        state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
    }
 }

 template <class P>
 bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
    u32 scale_rating = 0;
    bool rescaled = false;
    std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
 template <class P>
 void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
    using namespace VideoCommon::Dirty;
-    auto& flags = maxwell3d.dirty.flags;
+    auto& flags = maxwell3d->dirty.flags;
    if (!flags[Dirty::RenderTargets]) {
        for (size_t index = 0; index < NUM_RT; ++index) {
            ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
    PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));

    for (size_t index = 0; index < NUM_RT; ++index) {
-        render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
+        render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
    }
    u32 up_scale = 1;
    u32 down_shift = 0;
@@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
        down_shift = Settings::values.resolution_info.down_shift;
    }
    render_targets.size = Extent2D{
-        (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
-        (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
+        (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
+        (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
    };

    flags[Dirty::DepthBiasGlobal] = true;
@@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
        const auto copies = FullDownloadCopies(image.info);
        image.DownloadMemory(map, copies);
        runtime.Finish();
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+        SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
    }
 }

@@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
    for (const ImageId image_id : download_ids) {
        const ImageBase& image = slot_images[image_id];
        const auto copies = FullDownloadCopies(image.info);
-        SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+        SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
        download_map.offset += image.unswizzled_size_bytes;
        download_span = download_span.subspan(image.unswizzled_size_bytes);
    }
@@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
    const GPUVAddr gpu_addr = image.gpu_addr;

    if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
-        gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
+        gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
        const auto uploads = FullUploadSwizzles(image.info);
        runtime.AccelerateImageUpload(image, staging, uploads);
    } else if (True(image.flags & ImageFlagBits::Converted)) {
        std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
-        auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
+        auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
        ConvertImage(unswizzled_data, image.info, mapped_span, copies);
        image.UploadMemory(staging, copies);
    } else {
-        const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
+        const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
        image.UploadMemory(staging, copies);
    }
 }

 template <class P>
 ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
-    if (!IsValidEntry(gpu_memory, config)) {
+    if (!IsValidEntry(*gpu_memory, config)) {
        return NULL_IMAGE_VIEW_ID;
    }
-    const auto [pair, is_new] = image_views.try_emplace(config);
+    const auto [pair, is_new] = state->image_views.try_emplace(config);
    ImageViewId& image_view_id = pair->second;
    if (is_new) {
        image_view_id = CreateImageView(config);
@@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
 template <class P>
 ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                   RelaxedOptions options) {
-    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    if (!cpu_addr) {
-        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+        cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
        if (!cpu_addr) {
            return ImageId{};
        }
@@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
        image.scale_tick = frame_tick + 1;
    }
    const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
-    auto& dirty = maxwell3d.dirty.flags;
+    auto& dirty = maxwell3d->dirty.flags;
    dirty[Dirty::RenderTargets] = true;
    dirty[Dirty::ZetaBuffer] = true;
    for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
    image.image_view_ids.clear();
    image.image_view_infos.clear();
    if constexpr (ENABLE_VALIDATION) {
-        std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
-        std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+        std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
+        std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
    }
-    graphics_image_table.Invalidate();
-    compute_image_table.Invalidate();
+    state->graphics_image_table.Invalidate();
+    state->compute_image_table.Invalidate();
    has_deleted_images = true;
 }

@@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
 template <class P>
 ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
                                     RelaxedOptions options) {
-    std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+    std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
    if (!cpu_addr) {
        const auto size = CalculateGuestSizeInBytes(info);
-        cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+        cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
        if (!cpu_addr) {
            const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
            virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
    const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
    Image& new_image = slot_images[new_image_id];

-    if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+    if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
        new_image.flags |= ImageFlagBits::Sparse;
    }

@@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
    if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
        return NULL_SAMPLER_ID;
    }
-    const auto [pair, is_new] = samplers.try_emplace(config);
+    const auto [pair, is_new] = state->samplers.try_emplace(config);
    if (is_new) {
        pair->second = slot_samplers.insert(runtime, config);
    }
@@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {

 template <class P>
 ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
    if (index >= regs.rt_control.count) {
        return ImageViewId{};
    }
@@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {

 template <class P>
 ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
    if (!regs.zeta_enable) {
        return ImageViewId{};
    }
@@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
    static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
    boost::container::small_vector<ImageId, 8> images;
    ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
-        const auto it = gpu_page_table.find(page);
-        if (it == gpu_page_table.end()) {
+        const auto it = state->gpu_page_table.find(page);
+        if (it == state->gpu_page_table.end()) {
            if constexpr (BOOL_BREAK) {
                return false;
            } else {
@@ -1403,9 +1408,9 @@ template <typename Func>
 void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
    using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
    static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
-    const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+    const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
    for (const auto& [gpu_addr, size] : segments) {
-        std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+        std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
        ASSERT(cpu_addr);
        if constexpr (RETURNS_BOOL) {
            if (func(gpu_addr, *cpu_addr, size)) {
@@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
    image.lru_index = lru_cache.Insert(image_id, frame_tick);

    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
-                   [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+                   [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
    if (False(image.flags & ImageFlagBits::Sparse)) {
        auto map_id =
            slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
            }
            image_ids.erase(vector_it);
        };
-    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
-                   [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+    ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+        clear_page_table(page, state->gpu_page_table);
+    });
    if (False(image.flags & ImageFlagBits::Sparse)) {
        const auto map_id = image.map_view_id;
        ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
    ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");

    // Mark render targets as dirty
-    auto& dirty = maxwell3d.dirty.flags;
+    auto& dirty = maxwell3d->dirty.flags;
    dirty[Dirty::RenderTargets] = true;
    dirty[Dirty::ZetaBuffer] = true;
    for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
    if (alloc_images.empty()) {
        image_allocs_table.erase(alloc_it);
    }
-    if constexpr (ENABLE_VALIDATION) {
-        std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
-        std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+    for (auto& this_state : channel_storage) {
+        if constexpr (ENABLE_VALIDATION) {
+            std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
+            std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
+        }
+        this_state.graphics_image_table.Invalidate();
+        this_state.compute_image_table.Invalidate();
    }
-    graphics_image_table.Invalidate();
-    compute_image_table.Invalidate();
    has_deleted_images = true;
 }

 template <class P>
 void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
-    auto it = image_views.begin();
-    while (it != image_views.end()) {
+    auto it = state->image_views.begin();
+    while (it != state->image_views.end()) {
        const auto found = std::ranges::find(removed_views, it->second);
        if (found != removed_views.end()) {
-            it = image_views.erase(it);
+            it = state->image_views.erase(it);
        } else {
            ++it;
        }
@@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
    const ImageViewBase& image_view = slot_image_views[id];
    const ImageBase& image = slot_images[image_view.image_id];
    const Extent3D size = image_view.size;
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = maxwell3d->regs;
    const auto& scissor = regs.scissor_test[0];
    if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
        // Images with multiple resources can't be cleared in a single call
@@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
           scissor.max_y >= size.height;
 }

+template <class P>
+TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
+    : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
+      gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
+      graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
+                                                                               gpu_memory} {}
+
+template <class P>
+void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+    ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+    auto new_id = [this, &channel]() {
+        if (!free_channel_ids.empty()) {
+            auto id = free_channel_ids.front();
+            free_channel_ids.pop_front();
+            new (&channel_storage[id]) ChannelInfo(channel);
+            return id;
+        }
+        channel_storage.emplace_back(channel);
+        return channel_storage.size() - 1;
+    }();
+    channel_map.emplace(channel.bind_id, new_id);
+    if (current_channel_id != UNSET_CHANNEL) {
+        state = &channel_storage[current_channel_id];
+    }
+}
+
+/// Bind a channel for execution.
+template <class P>
+void TextureCache<P>::BindToChannel(s32 id) {
+    auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    current_channel_id = it->second;
+    state = &channel_storage[current_channel_id];
+    maxwell3d = &state->maxwell3d;
+    kepler_compute = &state->kepler_compute;
+    gpu_memory = &state->gpu_memory;
+}
+
+/// Erase channel's state.
+template <class P>
+void TextureCache<P>::EraseChannel(s32 id) {
+    const auto it = channel_map.find(id);
+    ASSERT(it != channel_map.end() && id >= 0);
+    const auto this_id = it->second;
+    free_channel_ids.push_back(this_id);
+    channel_map.erase(it);
+    if (this_id == current_channel_id) {
+        current_channel_id = UNSET_CHANNEL;
+        state = nullptr;
+        maxwell3d = nullptr;
+        kepler_compute = nullptr;
+        gpu_memory = nullptr;
+    } else if (current_channel_id != UNSET_CHANNEL) {
+        state = &channel_storage[current_channel_id];
+    }
+}
+
 } // namespace VideoCommon
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,8 @@

 #pragma once

+#include <deque>
+#include <limits>
 #include <mutex>
 #include <span>
 #include <type_traits>
@@ -26,6 +28,10 @@
 #include "video_core/texture_cache/types.h"
 #include "video_core/textures/texture.h"

+namespace Tegra::Control {
+struct ChannelState;
+}
+
 namespace VideoCommon {

 using Tegra::Texture::SwizzleSource;
@@ -58,6 +64,8 @@ class TextureCache {
    /// True when the API can provide info about the memory of the device.
    static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;

+    static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
    static constexpr s64 TARGET_THRESHOLD = 4_GiB;
    static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
    static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@@ -85,8 +93,7 @@ class TextureCache {
    };

 public:
-    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
-                          Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
+    explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);

    /// Notify the cache that a new frame has been queued
    void TickFrame();
@@ -171,6 +178,15 @@ public:

    [[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;

+    /// Create channel state.
+    void CreateChannel(struct Tegra::Control::ChannelState& channel);
+
+    /// Bind a channel for execution.
+    void BindToChannel(s32 id);
+
+    /// Erase channel's state.
+    void EraseChannel(s32 id);
+
    std::mutex mutex;

 private:
@@ -338,31 +354,52 @@ private:
    u64 GetScaledImageSizeBytes(ImageBase& image);

    Runtime& runtime;
+
+    struct ChannelInfo {
+        ChannelInfo() = delete;
+        ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
+        ChannelInfo(const ChannelInfo& state) = delete;
+        ChannelInfo& operator=(const ChannelInfo&) = delete;
+        ChannelInfo(ChannelInfo&& other) noexcept = default;
+        ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
+
+        Tegra::Engines::Maxwell3D& maxwell3d;
+        Tegra::Engines::KeplerCompute& kepler_compute;
+        Tegra::MemoryManager& gpu_memory;
+
+        DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+        DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+        std::vector<SamplerId> graphics_sampler_ids;
+        std::vector<ImageViewId> graphics_image_view_ids;
+
+        DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+        DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+        std::vector<SamplerId> compute_sampler_ids;
+        std::vector<ImageViewId> compute_image_view_ids;
+
+        std::unordered_map<TICEntry, ImageViewId> image_views;
+        std::unordered_map<TSCEntry, SamplerId> samplers;
+
+        std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+    };
+
+    std::deque<ChannelInfo> channel_storage;
+    std::deque<size_t> free_channel_ids;
+    std::unordered_map<s32, size_t> channel_map;
+
+    ChannelInfo* state;
+    size_t current_channel_id{UNSET_CHANNEL};
    VideoCore::RasterizerInterface& rasterizer;
-    Tegra::Engines::Maxwell3D& maxwell3d;
-    Tegra::Engines::KeplerCompute& kepler_compute;
-    Tegra::MemoryManager& gpu_memory;
-
-    DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
-    std::vector<SamplerId> graphics_sampler_ids;
-    std::vector<ImageViewId> graphics_image_view_ids;
-
-    DescriptorTable<TICEntry> compute_image_table{gpu_memory};
-    DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
-    std::vector<SamplerId> compute_sampler_ids;
-    std::vector<ImageViewId> compute_image_view_ids;
+    Tegra::Engines::Maxwell3D* maxwell3d;
+    Tegra::Engines::KeplerCompute* kepler_compute;
+    Tegra::MemoryManager* gpu_memory;

    RenderTargets render_targets;

-    std::unordered_map<TICEntry, ImageViewId> image_views;
-    std::unordered_map<TSCEntry, SamplerId> samplers;
    std::unordered_map<RenderTargets, FramebufferId> framebuffers;

    std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
-    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
    std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
-
    std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;

    VAddr virtual_invalid_space{};