diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index 5d65965fa..06909f288 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -60,6 +60,7 @@ add_library(common STATIC
     detached_tasks.h
     bit_field.h
     bit_set.h
+    blocking_loop.h
     cityhash.cpp
     cityhash.h
     color.h
@@ -69,6 +70,7 @@ add_library(common STATIC
     construct.h
     file_util.cpp
     file_util.h
+    flag.h
     hash.h
     linear_disk_cache.h
     logging/backend.cpp
@@ -92,6 +94,7 @@ add_library(common STATIC
     scm_rev.cpp
     scm_rev.h
     scope_exit.h
+    semaphore.h
     serialization/atomic.h
     serialization/boost_discrete_interval.hpp
     serialization/boost_flat_set.h
diff --git a/src/common/blocking_loop.h b/src/common/blocking_loop.h
new file mode 100644
index 000000000..fcdf6382c
--- /dev/null
+++ b/src/common/blocking_loop.h
@@ -0,0 +1,257 @@
+// Copyright 2015 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <atomic>
+#include <mutex>
+#include <thread>
+
+#include "common/thread.h"
+#include "common/flag.h"
+
+namespace Common
+{
+// This class provides a synchronized loop.
+// It's a thread-safe way to trigger a new iteration without busy loops.
+// It's optimized for high-usage iterations which usually are already running while it's triggered
+// often.
+// Be careful when using Wait() and Wakeup() at the same time. Wait() may block forever while
+// Wakeup() is called regularly.
+class BlockingLoop
+{
+public:
+  enum StopMode
+  {
+    kNonBlock,
+    kBlock,
+    kBlockAndGiveUp,
+  };
+
+  BlockingLoop() { m_stopped.Set(); }
+  ~BlockingLoop() { Stop(kBlockAndGiveUp); }
+  // Triggers to rerun the payload of the Run() function at least once again.
+  // This function will never block and is designed to finish as fast as possible.
+  void Wakeup()
+  {
+    // Already running, so no need for a wakeup.
+    // This is the common case, so try to get this as fast as possible.
+    if (m_running_state.load() >= STATE_NEED_EXECUTION)
+      return;
+
+    // Mark that new data is available. If the old state will rerun the payload
+    // itself, we don't have to set the event to interrupt the worker.
+    if (m_running_state.exchange(STATE_NEED_EXECUTION) != STATE_SLEEPING)
+      return;
+
+    // Else as the worker thread may sleep now, we have to set the event.
+    m_new_work_event.Set();
+  }
+
+  // Wait for a complete payload run after the last Wakeup() call.
+  // If stopped, this returns immediately.
+  void Wait()
+  {
+    // already done
+    if (IsDone())
+      return;
+
+    // notifying this event will only wake up one thread, so use a mutex here to
+    // allow only one waiting thread. And in this way, we get an event free wakeup
+    // but for the first thread for free
+    std::lock_guard<std::mutex> lk(m_wait_lock);
+
+    // Wait for the worker thread to finish.
+    while (!IsDone())
+    {
+      m_done_event.Wait();
+    }
+
+    // As we wanted to wait for the other thread, there is likely no work remaining.
+    // So there is no need for a busy loop any more.
+    m_may_sleep.Set();
+  }
+
+  // Wait for a complete payload run after the last Wakeup() call.
+  // This version will call a yield function every 100ms.
+  // If stopped, this returns immediately.
+  template <class Rep, class Period, typename Functor>
+  void WaitYield(const std::chrono::duration<Rep, Period>& rel_time, Functor yield_func)
+  {
+    // already done
+    if (IsDone())
+      return;
+
+    // notifying this event will only wake up one thread, so use a mutex here to
+    // allow only one waiting thread. And in this way, we get an event free wakeup
+    // but for the first thread for free
+    std::lock_guard<std::mutex> lk(m_wait_lock);
+
+    // Wait for the worker thread to finish.
+    while (!IsDone())
+    {
+      if (!m_done_event.WaitFor(rel_time))
+        yield_func();
+    }
+
+    // As we wanted to wait for the other thread, there is likely no work remaining.
+    // So there is no need for a busy loop any more.
+    m_may_sleep.Set();
+  }
+
+  // Half start the worker.
+  // So this object is in a running state and Wait() will block until the worker calls Run().
+  // This may be called from any thread and is supposed to be called at least once before Wait() is
+  // used.
+  void Prepare()
+  {
+    // There is a race condition if the other threads call this function while
+    // the loop thread is initializing. Using this lock will ensure a valid state.
+    std::lock_guard<std::mutex> lk(m_prepare_lock);
+
+    if (!m_stopped.TestAndClear())
+      return;
+    m_running_state.store(
+        STATE_LAST_EXECUTION);  // so the payload will only be executed once without any Wakeup call
+    m_shutdown.Clear();
+    m_may_sleep.Set();
+  }
+
+  // Main loop of this object.
+  // The payload callback is called at least as often as it's needed to match the Wakeup()
+  // requirements.
+  // The optional timeout parameter is a timeout for how periodically the payload should be called.
+  // Use timeout = 0 to run without a timeout at all.
+  template <class F>
+  void Run(F payload, int64_t timeout = 0)
+  {
+    // Asserts that Prepare is called at least once before we enter the loop.
+    // But a good implementation should call this before already.
+    Prepare();
+
+    while (!m_shutdown.IsSet())
+    {
+      payload();
+
+      switch (m_running_state.load())
+      {
+      case STATE_NEED_EXECUTION:
+        // We won't get notified while we are in the STATE_NEED_EXECUTION state, so maybe Wakeup was
+        // called.
+        // So we have to assume on finishing the STATE_NEED_EXECUTION state, that there may be some
+        // remaining tasks.
+        // To process this tasks, we call the payload again within the STATE_LAST_EXECUTION state.
+        m_running_state--;
+        break;
+
+      case STATE_LAST_EXECUTION:
+        // If we're still in the STATE_LAST_EXECUTION state, then Wakeup wasn't called within the
+        // last
+        // execution of the payload. This means we should be ready now.
+        // But bad luck, Wakeup may have been called right now. So break and rerun the payload
+        // if the state was touched.
+        if (m_running_state-- != STATE_LAST_EXECUTION)
+          break;
+
+        // Else we're likely in the STATE_DONE state now, so wakeup the waiting threads right now.
+        // However, if we're not in the STATE_DONE state any more, the event should also be
+        // triggered so that we'll skip the next waiting call quite fast.
+        m_done_event.Set();
+        [[fallthrough]];
+
+      case STATE_DONE:
+        // We're done now. So time to check if we want to sleep or if we want to stay in a busy
+        // loop.
+        if (m_may_sleep.TestAndClear())
+        {
+          // Try to set the sleeping state.
+          if (m_running_state-- != STATE_DONE)
+            break;
+        }
+        else
+        {
+          // Busy loop.
+          break;
+        }
+        [[fallthrough]];
+
+      case STATE_SLEEPING:
+        // Just relax
+        if (timeout > 0)
+        {
+          m_new_work_event.WaitFor(std::chrono::milliseconds(timeout));
+        }
+        else
+        {
+          m_new_work_event.Wait();
+        }
+        break;
+      }
+    }
+
+    // Shutdown down, so get a safe state
+    m_running_state.store(STATE_DONE);
+    m_stopped.Set();
+
+    // Wake up the last Wait calls.
+    m_done_event.Set();
+  }
+
+  // Quits the main loop.
+  // By default, it will wait until the main loop quits.
+  // Be careful to not use the blocking way within the payload of the Run() method.
+  void Stop(StopMode mode = kBlock)
+  {
+    if (m_stopped.IsSet())
+      return;
+
+    m_shutdown.Set();
+
+    // We have to interrupt the sleeping call to let the worker shut down soon.
+    Wakeup();
+
+    switch (mode)
+    {
+    case kNonBlock:
+      break;
+    case kBlock:
+      Wait();
+      break;
+    case kBlockAndGiveUp:
+      WaitYield(std::chrono::milliseconds(100), [&] {
+        // If timed out, assume no one will come along to call Run, so force a break
+        m_stopped.Set();
+      });
+      break;
+    }
+  }
+
+  bool IsRunning() const { return !m_stopped.IsSet() && !m_shutdown.IsSet(); }
+  bool IsDone() const { return m_stopped.IsSet() || m_running_state.load() <= STATE_DONE; }
+  // This function should be triggered regularly over time so
+  // that we will fall back from the busy loop to sleeping.
+  void AllowSleep() { m_may_sleep.Set(); }
+
+private:
+  std::mutex m_wait_lock;
+  std::mutex m_prepare_lock;
+
+  Flag m_stopped;   // If this is set, Wait() shall not block.
+  Flag m_shutdown;  // If this is set, the loop shall end.
+
+  Event m_new_work_event;
+  Event m_done_event;
+
+  enum RUNNING_TYPE
+  {
+    STATE_SLEEPING = 0,
+    STATE_DONE = 1,
+    STATE_LAST_EXECUTION = 2,
+    STATE_NEED_EXECUTION = 3
+  };
+  std::atomic<int> m_running_state;  // must be of type RUNNING_TYPE
+
+  Flag m_may_sleep;  // If this is set, we fall back from the busy loop to an event based
+                     // synchronization.
+};
+}  // namespace Common
diff --git a/src/common/flag.h b/src/common/flag.h
new file mode 100644
index 000000000..d9c750c37
--- /dev/null
+++ b/src/common/flag.h
@@ -0,0 +1,45 @@
+// Copyright 2014 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+// Abstraction for a simple flag that can be toggled in a multithreaded way.
+//
+// Simple API:
+// * Set(bool = true): sets the Flag
+// * IsSet(): tests if the flag is set
+// * Clear(): clears the flag (equivalent to Set(false)).
+//
+// More advanced features:
+// * TestAndSet(bool = true): sets the flag to the given value. If a change was
+//                            needed (the flag did not already have this value)
+//                            the function returns true. Else, false.
+// * TestAndClear(): alias for TestAndSet(false).
+
+#pragma once
+
+#include <atomic>
+
+namespace Common
+{
+class Flag final
+{
+public:
+  // Declared as explicit since we do not want "= true" to work on a flag
+  // object - it should be made explicit that a flag is *not* a normal
+  // variable.
+  explicit Flag(bool initial_value = false) : m_val(initial_value) {}
+  void Set(bool val = true) { m_val.store(val); }
+  void Clear() { Set(false); }
+  bool IsSet() const { return m_val.load(); }
+  bool TestAndSet(bool val = true)
+  {
+    bool expected = !val;
+    return m_val.compare_exchange_strong(expected, val);
+  }
+
+  bool TestAndClear() { return TestAndSet(false); }
+
+private:
+  std::atomic_bool m_val;
+};
+
+}  // namespace Common
diff --git a/src/common/semaphore.h b/src/common/semaphore.h
new file mode 100644
index 000000000..cc8755014
--- /dev/null
+++ b/src/common/semaphore.h
@@ -0,0 +1,72 @@
+// Copyright 2016 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#ifdef _WIN32
+
+#include <Windows.h>
+
+namespace Common
+{
+class Semaphore
+{
+public:
+  Semaphore(int initial_count, int maximum_count)
+  {
+    m_handle = CreateSemaphoreA(nullptr, initial_count, maximum_count, nullptr);
+  }
+
+  ~Semaphore() { CloseHandle(m_handle); }
+  void Wait() { WaitForSingleObject(m_handle, INFINITE); }
+  void Post() { ReleaseSemaphore(m_handle, 1, nullptr); }
+
+private:
+  HANDLE m_handle;
+};
+}  // namespace Common
+
+#elif defined(__APPLE__)
+
+#include <dispatch/dispatch.h>
+
+namespace Common
+{
+class Semaphore
+{
+public:
+  Semaphore(int initial_count, int maximum_count)
+  {
+    m_handle = dispatch_semaphore_create(0);
+    for (int i = 0; i < initial_count; i++)
+      dispatch_semaphore_signal(m_handle);
+  }
+  ~Semaphore() { dispatch_release(m_handle); }
+  void Wait() { dispatch_semaphore_wait(m_handle, DISPATCH_TIME_FOREVER); }
+  void Post() { dispatch_semaphore_signal(m_handle); }
+
+private:
+  dispatch_semaphore_t m_handle;
+};
+}  // namespace Common
+
+#else
+
+#include <semaphore.h>
+
+namespace Common
+{
+class Semaphore
+{
+public:
+  Semaphore(int initial_count, int maximum_count) { sem_init(&m_handle, 0, initial_count); }
+  ~Semaphore() { sem_destroy(&m_handle); }
+  void Wait() { sem_wait(&m_handle); }
+  void Post() { sem_post(&m_handle); }
+
+private:
+  sem_t m_handle;
+};
+}  // namespace Common
+
+#endif  // _WIN32
diff --git a/src/common/thread.h b/src/common/thread.h
index b4881fd44..6354dcce8 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -29,8 +29,8 @@ public:
         is_set = false;
     }
 
-    template <class Duration>
-    bool WaitFor(const std::chrono::duration<Duration>& time) {
+    template <class Duration, class Period = std::ratio<1>>
+    bool WaitFor(const std::chrono::duration<Duration, Period>& time) {
         std::unique_lock lk{mutex};
         if (!condvar.wait_for(lk, time, [this] { return is_set.load(); }))
             return false;
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e3f905847..a055bf5ac 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -73,6 +73,8 @@ add_library(video_core STATIC
     renderer_vulkan/renderer_vulkan.h
     renderer_vulkan/vk_buffer.cpp
     renderer_vulkan/vk_buffer.h
+    renderer_vulkan/vk_command_manager.cpp
+    renderer_vulkan/vk_command_manager.h
     renderer_vulkan/vk_instance.cpp
     renderer_vulkan/vk_instance.h
     renderer_vulkan/vk_resource_cache.cpp
diff --git a/src/video_core/renderer_vulkan/vk_command_manager.cpp b/src/video_core/renderer_vulkan/vk_command_manager.cpp
new file mode 100644
index 000000000..119bc9e6c
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_manager.cpp
@@ -0,0 +1,329 @@
+// Copyright 2016 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "video_core/renderer_vulkan/vk_command_manager.h"
+#include "common/assert.h"
+#include "common/thread.h"
+
+namespace Vulkan {
+
+VKCommandManager::VKCommandManager(bool use_threaded_submission)
+    : submit_semaphore(1, 1), use_threaded_submission(use_threaded_submission) {
+}
+
+VKCommandManager::~VKCommandManager() {
+    // If the worker thread is enabled, stop and block until it exits.
+    if (use_threaded_submission) {
+        submit_loop->Stop();
+        submit_thread.join();
+    }
+
+    DestroyCommandBuffers();
+}
+
+bool VKCommandManager::Initialize()
+{
+    if (!CreateCommandBuffers()) {
+        return false;
+    }
+
+    if (use_threaded_submission && !CreateSubmitThread()) {
+        return false;
+    }
+
+    return true;
+}
+
+bool VKCommandManager::CreateCommandBuffers() {
+    static constexpr vk::SemaphoreCreateInfo semaphore_create_info;
+
+    auto device = g_vk_instace->GetDevice();
+    for (auto& resources : frame_resources) {
+        resources.init_command_buffer_used = false;
+        resources.semaphore_used = false;
+
+        // Create command pool
+        vk::CommandPoolCreateInfo pool_info({}, g_vk_instace->GetGraphicsQueueFamilyIndex());
+        resources.command_pool = device.createCommandPool(pool_info);
+
+        // Create command buffers
+        vk::CommandBufferAllocateInfo buffer_info
+        (
+            resources.command_pool,
+            vk::CommandBufferLevel::ePrimary,
+            resources.command_buffers.size()
+        );
+
+        resources.command_buffers = device.allocateCommandBuffers(buffer_info);
+
+        vk::FenceCreateInfo fence_info(vk::FenceCreateFlagBits::eSignaled);
+        resources.fence = device.createFence(fence_info);
+
+        // TODO: A better way to choose the number of descriptors.
+        const std::array<vk::DescriptorPoolSize, 3> pool_sizes{{
+            { vk::DescriptorType::eUniformBuffer, 32 },
+            { vk::DescriptorType::eCombinedImageSampler, 64 },
+            { vk::DescriptorType::eStorageTexelBuffer, 64 }
+        }};
+
+        const vk::DescriptorPoolCreateInfo pool_create_info({}, 2048, pool_sizes);
+        resources.descriptor_pool = device.createDescriptorPool(pool_create_info);
+    }
+
+    // Create present semaphore
+    present_semaphore = device.createSemaphore(semaphore_create_info);
+
+    // Activate the first command buffer. ActivateCommandBuffer moves forward, so start with the last
+    current_frame = static_cast<u32>(frame_resources.size()) - 1;
+    BeginCommandBuffer();
+    return true;
+}
+
+void VKCommandManager::DestroyCommandBuffers() {
+    vk::Device device = g_vk_instace->GetDevice();
+
+    for (auto& resources : frame_resources) {
+        // Destroy command pool which also clears any allocated command buffers
+        if (resources.command_pool) {
+            device.destroyCommandPool(resources.command_pool);
+        }
+
+        // Destroy any pending objects.
+        for (auto& it : resources.cleanup_resources)
+          it();
+
+        // Destroy remaining vulkan objects
+        if (resources.semaphore) {
+            device.destroySemaphore(resources.semaphore);
+        }
+
+        if (resources.fence) {
+            device.destroyFence(resources.fence);
+        }
+
+        if (resources.descriptor_pool) {
+            device.destroyDescriptorPool(resources.descriptor_pool);
+        }
+    }
+
+    device.destroySemaphore(present_semaphore);
+}
+
+vk::DescriptorSet VKCommandManager::AllocateDescriptorSet(vk::DescriptorSetLayout set_layout) {
+    vk::DescriptorSetAllocateInfo allocate_info(frame_resources[current_frame].descriptor_pool, set_layout);
+    return g_vk_instace->GetDevice().allocateDescriptorSets(allocate_info)[0];
+}
+
+bool VKCommandManager::CreateSubmitThread() {
+    submit_loop = std::make_unique<Common::BlockingLoop>();
+
+    submit_thread = std::thread([this]() {
+    Common::SetCurrentThreadName("Vulkan CommandBufferManager SubmitThread");
+
+    submit_loop->Run([this]() {
+        PendingCommandBufferSubmit submit;
+        {
+          std::lock_guard<std::mutex> guard(pending_submit_lock);
+          if (pending_submits.empty())
+          {
+            submit_loop->AllowSleep();
+            return;
+          }
+
+          submit = pending_submits.front();
+          pending_submits.pop_front();
+        }
+
+        SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain,
+                          submit.present_image_index);
+    });
+    });
+
+    return true;
+}
+
+void VKCommandManager::WaitForWorkerThreadIdle()
+{
+  // Drain the semaphore, then allow another request in the future.
+  submit_semaphore.Wait();
+  submit_semaphore.Post();
+}
+
+void VKCommandManager::WaitForFenceCounter(u64 fence_counter) {
+    if (completed_fence_counter >= fence_counter)
+        return;
+
+    // Find the first command buffer which covers this counter value.
+    u32 index = (current_frame + 1) % COMMAND_BUFFER_COUNT;
+    while (index != current_frame) {
+        if (frame_resources[index].fence_counter >= fence_counter)
+            break;
+
+        index = (index + 1) % COMMAND_BUFFER_COUNT;
+    }
+
+    ASSERT(index != current_frame);
+    WaitForCommandBufferCompletion(index);
+}
+
+void VKCommandManager::WaitForCommandBufferCompletion(u32 index) {
+    // Ensure this command buffer has been submitted.
+    WaitForWorkerThreadIdle();
+
+    // Wait for this command buffer to be completed.
+    auto result = g_vk_instace->GetDevice().waitForFences(frame_resources[index].fence,
+                                                          VK_TRUE, UINT64_MAX);
+
+    if (result != vk::Result::eSuccess) {
+        LOG_ERROR(Render_Vulkan, "vkWaitForFences failed");
+    }
+
+    // Clean up any resources for command buffers between the last known completed buffer and this
+    // now-completed command buffer. If we use >2 buffers, this may be more than one buffer.
+    const u64 now_completed_counter = frame_resources[index].fence_counter;
+    u32 cleanup_index = (current_frame + 1) % COMMAND_BUFFER_COUNT;
+
+    while (cleanup_index != current_frame) {
+        auto& resources = frame_resources[cleanup_index];
+        if (resources.fence_counter > now_completed_counter) {
+            break;
+        }
+
+        if (resources.fence_counter > completed_fence_counter) {
+            for (auto& it : resources.cleanup_resources)
+                it();
+
+            resources.cleanup_resources.clear();
+        }
+
+        cleanup_index = (cleanup_index + 1) % COMMAND_BUFFER_COUNT;
+    }
+
+    completed_fence_counter = now_completed_counter;
+}
+
+void VKCommandManager::SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion,
+                                           vk::SwapchainKHR present_swap_chain,
+                                           u32 present_image_index) {
+
+    // End the current command buffer.
+    auto& resources = frame_resources[current_frame];
+    for (auto& command_buffer : resources.command_buffers) {
+        command_buffer.end();
+    }
+
+    // Grab the semaphore before submitting command buffer either on-thread or off-thread.
+    // This prevents a race from occurring where a second command buffer is executed
+    // before the worker thread has woken and executed the first one yet.
+    submit_semaphore.Wait();
+
+    // Submitting off-thread?
+    if (use_threaded_submission && submit_on_worker_thread && !wait_for_completion) {
+
+        // Push to the pending submit queue.
+        {
+            std::lock_guard<std::mutex> guard(pending_submit_lock);
+            pending_submits.push_back({present_swap_chain, present_image_index, current_frame});
+        }
+
+        // Wake up the worker thread for a single iteration.
+        submit_loop->Wakeup();
+    }
+    else {
+        // Pass through to normal submission path.
+        SubmitCommandBuffer(current_frame, present_swap_chain, present_image_index);
+
+        if (wait_for_completion) {
+            WaitForCommandBufferCompletion(current_frame);
+        }
+    }
+
+    // Switch to next cmdbuffer.
+    BeginCommandBuffer();
+}
+
+void VKCommandManager::SubmitCommandBuffer(u32 command_buffer_index,
+                                               vk::SwapchainKHR swapchain,
+                                               u32 present_image_index) {
+    auto& resources = frame_resources[command_buffer_index];
+
+    vk::PipelineStageFlags wait_stage = vk::PipelineStageFlagBits::eColorAttachmentOutput;
+    vk::SubmitInfo submit_info({}, wait_stage, resources.command_buffers);
+
+    // If the init command buffer did not have any commands recorded, don't submit it.
+    if (!resources.init_command_buffer_used) {
+        submit_info.setCommandBuffers(resources.command_buffers[1]);
+    }
+
+    if (resources.semaphore_used) {
+        submit_info.setSignalSemaphores(resources.semaphore);
+    }
+
+    submit_info.setSignalSemaphores(present_semaphore);
+    g_vk_instace->GetGraphicsQueue().submit(submit_info, resources.fence);
+
+    // Should have a signal semaphore.
+    vk::PresentInfoKHR present_info(present_semaphore, swapchain, present_image_index);
+    auto last_present_result = g_vk_instace->GetPresentQueue().presentKHR(present_info);
+    if (last_present_result != vk::Result::eSuccess) {
+
+        // eErrorOutOfDateKHR is not fatal, just means we need to recreate our swap chain.
+        if (last_present_result != vk::Result::eErrorOutOfDateKHR &&
+            last_present_result != vk::Result::eSuboptimalKHR)
+        {
+            LOG_ERROR(Render_Vulkan, "Present queue return error");
+        }
+
+      // Don't treat eSuboptimalKHR as fatal on Android. Android 10+ requires prerotation.
+      // See https://twitter.com/Themaister/status/1207062674011574273
+    #ifdef VK_USE_PLATFORANDROID_KHR
+        if (last_present_result != VK_SUBOPTIMAL_KHR) {
+            last_present_failed.Set();
+        }
+    #else
+        last_present_failed.Set();
+    #endif
+    }
+
+    // Command buffer has been queued, so permit the next one.
+    submit_semaphore.Post();
+}
+
+void VKCommandManager::BeginCommandBuffer()
+{
+    // Move to the next command buffer.
+    const u32 next_buffer_index = (current_frame + 1) % COMMAND_BUFFER_COUNT;
+    auto& resources = frame_resources[next_buffer_index];
+    auto& device = g_vk_instace->GetDevice();
+
+    // Wait for the GPU to finish with all resources for this command buffer.
+    if (resources.fence_counter > completed_fence_counter) {
+        WaitForCommandBufferCompletion(next_buffer_index);
+    }
+
+    // Reset fence to unsignaled before starting.
+    device.resetFences(resources.fence);
+
+    // Reset command pools to beginning since we can re-use the memory now
+    device.resetCommandPool(resources.command_pool);
+
+    vk::CommandBufferBeginInfo begin_info(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
+
+    // Enable commands to be recorded to the two buffers again.
+    for (auto command_buffer : resources.command_buffers) {
+        command_buffer.begin(begin_info);
+    }
+
+    // Also can do the same for the descriptor pools
+    device.resetDescriptorPool(resources.descriptor_pool);
+
+    // Reset upload command buffer state
+    resources.init_command_buffer_used = false;
+    resources.semaphore_used = false;
+    resources.fence_counter = next_fence_counter++;
+    current_frame = next_buffer_index;
+}
+
+std::unique_ptr<VKCommandManager> g_command_buffer_mgr;
+
+}  // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_command_manager.h b/src/video_core/renderer_vulkan/vk_command_manager.h
new file mode 100644
index 000000000..6a619f6d7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_manager.h
@@ -0,0 +1,145 @@
+// Copyright 2016 Dolphin Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <deque>
+#include <functional>
+#include <mutex>
+#include <thread>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+#include "common/blocking_loop.h"
+#include "common/semaphore.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+
+namespace Vulkan {
+
+constexpr u32 COMMAND_BUFFER_COUNT = 2;
+
+class VKCommandManager {
+public:
+    explicit VKCommandManager(bool use_threaded_submission);
+    ~VKCommandManager();
+
+    bool Initialize();
+
+    // These command buffers are allocated per-frame. They are valid until the command buffer
+    // is submitted, after that you should call these functions again.
+    vk::CommandBuffer GetCurrentInitCommandBuffer() {
+        frame_resources[current_frame].init_command_buffer_used = true;
+        return frame_resources[current_frame].command_buffers[0];
+    }
+
+    vk::CommandBuffer GetCurrentCommandBuffer() const {
+        return frame_resources[current_frame].command_buffers[1];
+    }
+
+    vk::DescriptorPool GetCurrentDescriptorPool() const {
+        return frame_resources[current_frame].descriptor_pool;
+    }
+
+    // Allocates a descriptors set from the pool reserved for the current frame.
+    vk::DescriptorSet AllocateDescriptorSet(vk::DescriptorSetLayout set_layout);
+
+    // Fence "counters" are used to track which commands have been completed by the GPU.
+    // If the last completed fence counter is greater or equal to N, it means that the work
+    // associated counter N has been completed by the GPU. The value of N to associate with
+    // commands can be retreived by calling GetCurrentFenceCounter().
+    u64 GetCompletedFenceCounter() const { return completed_fence_counter; }
+
+    // Gets the fence that will be signaled when the currently executing command buffer is
+    // queued and executed. Do not wait for this fence before the buffer is executed.
+    u64 GetCurrentFenceCounter() const { return frame_resources[current_frame].fence_counter; }
+
+    // Returns the semaphore for the current command buffer, which can be used to ensure the
+    // swap chain image is ready before the command buffer executes.
+    vk::Semaphore GetCurrentCommandBufferSemaphore() {
+      frame_resources[current_frame].semaphore_used = true;
+      return frame_resources[current_frame].semaphore;
+    }
+
+    // Ensure that the worker thread has submitted any previous command buffers and is idle.
+    void WaitForWorkerThreadIdle();
+
+    // Wait for a fence to be completed.
+    // Also invokes callbacks for completion.
+    void WaitForFenceCounter(u64 fence_counter);
+
+    void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion,
+                             vk::SwapchainKHR present_swap_chain = VK_NULL_HANDLE,
+                             u32 present_image_index = -1);
+
+    // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain.
+    bool CheckLastPresentFail() { return last_present_failed.TestAndClear(); }
+    vk::Result GetLastPresentResult() const { return last_present_result; }
+
+    // Schedule a vulkan resource for destruction later on. This will occur when the command buffer
+    // is next re-used, and the GPU has finished working with the specified resource.
+    template <typename VulkanObject>
+    void DestroyResource(VulkanObject object);
+
+private:
+    void BeginCommandBuffer();
+    bool CreateCommandBuffers();
+    void DestroyCommandBuffers();
+
+    bool CreateSubmitThread();
+
+    void WaitForCommandBufferCompletion(u32 command_buffer_index);
+    void SubmitCommandBuffer(u32 command_buffer_index, vk::SwapchainKHR present_swap_chain,
+                             u32 present_image_index);
+private:
+    struct FrameResources {
+        // [0] - Init (upload) command buffer, [1] - draw command buffer
+        std::vector<vk::CommandBuffer> command_buffers = {};
+        std::vector<std::function<void()>> cleanup_resources;
+
+        vk::CommandPool command_pool;
+        vk::DescriptorPool descriptor_pool;
+        vk::Fence fence;
+        vk::Semaphore semaphore;
+        u64 fence_counter = 0;
+        bool init_command_buffer_used = false;
+        bool semaphore_used = false;
+    };
+
+    struct PendingCommandBufferSubmit {
+        vk::SwapchainKHR present_swap_chain;
+        u32 present_image_index;
+        u32 command_buffer_index;
+    };
+
+    u64 next_fence_counter = 1;
+    u64 completed_fence_counter = 0;
+
+    std::array<FrameResources, COMMAND_BUFFER_COUNT> frame_resources;
+    u32 current_frame = 0;
+
+    // Threaded command buffer execution
+    // Semaphore determines when a command buffer can be queued
+    Common::Semaphore submit_semaphore;
+    std::thread submit_thread;
+    std::unique_ptr<Common::BlockingLoop> submit_loop;
+    std::deque<PendingCommandBufferSubmit> pending_submits;
+    std::mutex pending_submit_lock;
+    Common::Flag last_present_failed;
+    vk::Semaphore present_semaphore;
+    vk::Result last_present_result = vk::Result::eSuccess;
+    bool use_threaded_submission = false;
+};
+
+template <typename VulkanObject>
+void VKCommandManager::DestroyResource(VulkanObject object) {
+    auto& resources = frame_resources[current_frame];
+    auto deleter = [object]() { g_vk_instace->GetDevice().destroy(object); };
+    resources.cleanup_resources.push_back(deleter);
+}
+
+extern std::unique_ptr<VKCommandManager> g_command_buffer_mgr;
+
+}  // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h
index c868279ff..a112772b4 100644
--- a/src/video_core/renderer_vulkan/vk_instance.h
+++ b/src/video_core/renderer_vulkan/vk_instance.h
@@ -12,23 +12,6 @@
 
 namespace Vulkan {
 
-// Using multiple command buffers prevents stalling
-constexpr u32 COMMAND_BUFFER_COUNT = 3;
-
-struct FrameResources
-{
-    vk::CommandPool command_pool;
-    std::array<vk::CommandBuffer, COMMAND_BUFFER_COUNT> command_buffers = {};
-    vk::DescriptorPool descriptor_pool;
-    vk::Fence fence;
-    vk::Semaphore semaphore;
-    u64 fence_counter = 0;
-    bool init_command_buffer_used = false;
-    bool semaphore_used = false;
-
-    std::vector<std::function<void()>> cleanup_resources;
-};
-
 /// The global Vulkan instance
 class VKInstance
 {
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 51ea0e5e2..eeb71f1d3 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -62,6 +62,9 @@ bool VKSwapChain::Create(u32 width, u32 height, bool vsync_enabled) {
         swapchain.swap(new_swapchain);
     }
 
+    // Create framebuffer and image views
+    SetupImages();
+
     return true;
 }