diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 5d65965fa..06909f288 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -60,6 +60,7 @@ add_library(common STATIC detached_tasks.h bit_field.h bit_set.h + blocking_loop.h cityhash.cpp cityhash.h color.h @@ -69,6 +70,7 @@ add_library(common STATIC construct.h file_util.cpp file_util.h + flag.h hash.h linear_disk_cache.h logging/backend.cpp @@ -92,6 +94,7 @@ add_library(common STATIC scm_rev.cpp scm_rev.h scope_exit.h + semaphore.h serialization/atomic.h serialization/boost_discrete_interval.hpp serialization/boost_flat_set.h diff --git a/src/common/blocking_loop.h b/src/common/blocking_loop.h new file mode 100644 index 000000000..fcdf6382c --- /dev/null +++ b/src/common/blocking_loop.h @@ -0,0 +1,257 @@ +// Copyright 2015 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include + +#include "common/thread.h" +#include "common/flag.h" + +namespace Common +{ +// This class provides a synchronized loop. +// It's a thread-safe way to trigger a new iteration without busy loops. +// It's optimized for high-usage iterations which usually are already running while it's triggered +// often. +// Be careful when using Wait() and Wakeup() at the same time. Wait() may block forever while +// Wakeup() is called regularly. +class BlockingLoop +{ +public: + enum StopMode + { + kNonBlock, + kBlock, + kBlockAndGiveUp, + }; + + BlockingLoop() { m_stopped.Set(); } + ~BlockingLoop() { Stop(kBlockAndGiveUp); } + // Triggers to rerun the payload of the Run() function at least once again. + // This function will never block and is designed to finish as fast as possible. + void Wakeup() + { + // Already running, so no need for a wakeup. + // This is the common case, so try to get this as fast as possible. + if (m_running_state.load() >= STATE_NEED_EXECUTION) + return; + + // Mark that new data is available. If the old state will rerun the payload + // itself, we don't have to set the event to interrupt the worker. + if (m_running_state.exchange(STATE_NEED_EXECUTION) != STATE_SLEEPING) + return; + + // Else as the worker thread may sleep now, we have to set the event. + m_new_work_event.Set(); + } + + // Wait for a complete payload run after the last Wakeup() call. + // If stopped, this returns immediately. + void Wait() + { + // already done + if (IsDone()) + return; + + // notifying this event will only wake up one thread, so use a mutex here to + // allow only one waiting thread. And in this way, we get an event free wakeup + // but for the first thread for free + std::lock_guard lk(m_wait_lock); + + // Wait for the worker thread to finish. + while (!IsDone()) + { + m_done_event.Wait(); + } + + // As we wanted to wait for the other thread, there is likely no work remaining. + // So there is no need for a busy loop any more. + m_may_sleep.Set(); + } + + // Wait for a complete payload run after the last Wakeup() call. + // This version will call a yield function every 100ms. + // If stopped, this returns immediately. + template + void WaitYield(const std::chrono::duration& rel_time, Functor yield_func) + { + // already done + if (IsDone()) + return; + + // notifying this event will only wake up one thread, so use a mutex here to + // allow only one waiting thread. And in this way, we get an event free wakeup + // but for the first thread for free + std::lock_guard lk(m_wait_lock); + + // Wait for the worker thread to finish. + while (!IsDone()) + { + if (!m_done_event.WaitFor(rel_time)) + yield_func(); + } + + // As we wanted to wait for the other thread, there is likely no work remaining. + // So there is no need for a busy loop any more. + m_may_sleep.Set(); + } + + // Half start the worker. + // So this object is in a running state and Wait() will block until the worker calls Run(). + // This may be called from any thread and is supposed to be called at least once before Wait() is + // used. + void Prepare() + { + // There is a race condition if the other threads call this function while + // the loop thread is initializing. Using this lock will ensure a valid state. + std::lock_guard lk(m_prepare_lock); + + if (!m_stopped.TestAndClear()) + return; + m_running_state.store( + STATE_LAST_EXECUTION); // so the payload will only be executed once without any Wakeup call + m_shutdown.Clear(); + m_may_sleep.Set(); + } + + // Main loop of this object. + // The payload callback is called at least as often as it's needed to match the Wakeup() + // requirements. + // The optional timeout parameter is a timeout for how periodically the payload should be called. + // Use timeout = 0 to run without a timeout at all. + template + void Run(F payload, int64_t timeout = 0) + { + // Asserts that Prepare is called at least once before we enter the loop. + // But a good implementation should call this before already. + Prepare(); + + while (!m_shutdown.IsSet()) + { + payload(); + + switch (m_running_state.load()) + { + case STATE_NEED_EXECUTION: + // We won't get notified while we are in the STATE_NEED_EXECUTION state, so maybe Wakeup was + // called. + // So we have to assume on finishing the STATE_NEED_EXECUTION state, that there may be some + // remaining tasks. + // To process this tasks, we call the payload again within the STATE_LAST_EXECUTION state. + m_running_state--; + break; + + case STATE_LAST_EXECUTION: + // If we're still in the STATE_LAST_EXECUTION state, then Wakeup wasn't called within the + // last + // execution of the payload. This means we should be ready now. + // But bad luck, Wakeup may have been called right now. So break and rerun the payload + // if the state was touched. + if (m_running_state-- != STATE_LAST_EXECUTION) + break; + + // Else we're likely in the STATE_DONE state now, so wakeup the waiting threads right now. + // However, if we're not in the STATE_DONE state any more, the event should also be + // triggered so that we'll skip the next waiting call quite fast. + m_done_event.Set(); + [[fallthrough]]; + + case STATE_DONE: + // We're done now. So time to check if we want to sleep or if we want to stay in a busy + // loop. + if (m_may_sleep.TestAndClear()) + { + // Try to set the sleeping state. + if (m_running_state-- != STATE_DONE) + break; + } + else + { + // Busy loop. + break; + } + [[fallthrough]]; + + case STATE_SLEEPING: + // Just relax + if (timeout > 0) + { + m_new_work_event.WaitFor(std::chrono::milliseconds(timeout)); + } + else + { + m_new_work_event.Wait(); + } + break; + } + } + + // Shutdown down, so get a safe state + m_running_state.store(STATE_DONE); + m_stopped.Set(); + + // Wake up the last Wait calls. + m_done_event.Set(); + } + + // Quits the main loop. + // By default, it will wait until the main loop quits. + // Be careful to not use the blocking way within the payload of the Run() method. + void Stop(StopMode mode = kBlock) + { + if (m_stopped.IsSet()) + return; + + m_shutdown.Set(); + + // We have to interrupt the sleeping call to let the worker shut down soon. + Wakeup(); + + switch (mode) + { + case kNonBlock: + break; + case kBlock: + Wait(); + break; + case kBlockAndGiveUp: + WaitYield(std::chrono::milliseconds(100), [&] { + // If timed out, assume no one will come along to call Run, so force a break + m_stopped.Set(); + }); + break; + } + } + + bool IsRunning() const { return !m_stopped.IsSet() && !m_shutdown.IsSet(); } + bool IsDone() const { return m_stopped.IsSet() || m_running_state.load() <= STATE_DONE; } + // This function should be triggered regularly over time so + // that we will fall back from the busy loop to sleeping. + void AllowSleep() { m_may_sleep.Set(); } + +private: + std::mutex m_wait_lock; + std::mutex m_prepare_lock; + + Flag m_stopped; // If this is set, Wait() shall not block. + Flag m_shutdown; // If this is set, the loop shall end. + + Event m_new_work_event; + Event m_done_event; + + enum RUNNING_TYPE + { + STATE_SLEEPING = 0, + STATE_DONE = 1, + STATE_LAST_EXECUTION = 2, + STATE_NEED_EXECUTION = 3 + }; + std::atomic m_running_state; // must be of type RUNNING_TYPE + + Flag m_may_sleep; // If this is set, we fall back from the busy loop to an event based + // synchronization. +}; +} // namespace Common diff --git a/src/common/flag.h b/src/common/flag.h new file mode 100644 index 000000000..d9c750c37 --- /dev/null +++ b/src/common/flag.h @@ -0,0 +1,45 @@ +// Copyright 2014 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +// Abstraction for a simple flag that can be toggled in a multithreaded way. +// +// Simple API: +// * Set(bool = true): sets the Flag +// * IsSet(): tests if the flag is set +// * Clear(): clears the flag (equivalent to Set(false)). +// +// More advanced features: +// * TestAndSet(bool = true): sets the flag to the given value. If a change was +// needed (the flag did not already have this value) +// the function returns true. Else, false. +// * TestAndClear(): alias for TestAndSet(false). + +#pragma once + +#include + +namespace Common +{ +class Flag final +{ +public: + // Declared as explicit since we do not want "= true" to work on a flag + // object - it should be made explicit that a flag is *not* a normal + // variable. + explicit Flag(bool initial_value = false) : m_val(initial_value) {} + void Set(bool val = true) { m_val.store(val); } + void Clear() { Set(false); } + bool IsSet() const { return m_val.load(); } + bool TestAndSet(bool val = true) + { + bool expected = !val; + return m_val.compare_exchange_strong(expected, val); + } + + bool TestAndClear() { return TestAndSet(false); } + +private: + std::atomic_bool m_val; +}; + +} // namespace Common diff --git a/src/common/semaphore.h b/src/common/semaphore.h new file mode 100644 index 000000000..cc8755014 --- /dev/null +++ b/src/common/semaphore.h @@ -0,0 +1,72 @@ +// Copyright 2016 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _WIN32 + +#include + +namespace Common +{ +class Semaphore +{ +public: + Semaphore(int initial_count, int maximum_count) + { + m_handle = CreateSemaphoreA(nullptr, initial_count, maximum_count, nullptr); + } + + ~Semaphore() { CloseHandle(m_handle); } + void Wait() { WaitForSingleObject(m_handle, INFINITE); } + void Post() { ReleaseSemaphore(m_handle, 1, nullptr); } + +private: + HANDLE m_handle; +}; +} // namespace Common + +#elif defined(__APPLE__) + +#include + +namespace Common +{ +class Semaphore +{ +public: + Semaphore(int initial_count, int maximum_count) + { + m_handle = dispatch_semaphore_create(0); + for (int i = 0; i < initial_count; i++) + dispatch_semaphore_signal(m_handle); + } + ~Semaphore() { dispatch_release(m_handle); } + void Wait() { dispatch_semaphore_wait(m_handle, DISPATCH_TIME_FOREVER); } + void Post() { dispatch_semaphore_signal(m_handle); } + +private: + dispatch_semaphore_t m_handle; +}; +} // namespace Common + +#else + +#include + +namespace Common +{ +class Semaphore +{ +public: + Semaphore(int initial_count, int maximum_count) { sem_init(&m_handle, 0, initial_count); } + ~Semaphore() { sem_destroy(&m_handle); } + void Wait() { sem_wait(&m_handle); } + void Post() { sem_post(&m_handle); } + +private: + sem_t m_handle; +}; +} // namespace Common + +#endif // _WIN32 diff --git a/src/common/thread.h b/src/common/thread.h index b4881fd44..6354dcce8 100644 --- a/src/common/thread.h +++ b/src/common/thread.h @@ -29,8 +29,8 @@ public: is_set = false; } - template - bool WaitFor(const std::chrono::duration& time) { + template > + bool WaitFor(const std::chrono::duration& time) { std::unique_lock lk{mutex}; if (!condvar.wait_for(lk, time, [this] { return is_set.load(); })) return false; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e3f905847..a055bf5ac 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -73,6 +73,8 @@ add_library(video_core STATIC renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_buffer.cpp renderer_vulkan/vk_buffer.h + renderer_vulkan/vk_command_manager.cpp + renderer_vulkan/vk_command_manager.h renderer_vulkan/vk_instance.cpp renderer_vulkan/vk_instance.h renderer_vulkan/vk_resource_cache.cpp diff --git a/src/video_core/renderer_vulkan/vk_command_manager.cpp b/src/video_core/renderer_vulkan/vk_command_manager.cpp new file mode 100644 index 000000000..119bc9e6c --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_command_manager.cpp @@ -0,0 +1,329 @@ +// Copyright 2016 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/renderer_vulkan/vk_command_manager.h" +#include "common/assert.h" +#include "common/thread.h" + +namespace Vulkan { + +VKCommandManager::VKCommandManager(bool use_threaded_submission) + : submit_semaphore(1, 1), use_threaded_submission(use_threaded_submission) { +} + +VKCommandManager::~VKCommandManager() { + // If the worker thread is enabled, stop and block until it exits. + if (use_threaded_submission) { + submit_loop->Stop(); + submit_thread.join(); + } + + DestroyCommandBuffers(); +} + +bool VKCommandManager::Initialize() +{ + if (!CreateCommandBuffers()) { + return false; + } + + if (use_threaded_submission && !CreateSubmitThread()) { + return false; + } + + return true; +} + +bool VKCommandManager::CreateCommandBuffers() { + static constexpr vk::SemaphoreCreateInfo semaphore_create_info; + + auto device = g_vk_instace->GetDevice(); + for (auto& resources : frame_resources) { + resources.init_command_buffer_used = false; + resources.semaphore_used = false; + + // Create command pool + vk::CommandPoolCreateInfo pool_info({}, g_vk_instace->GetGraphicsQueueFamilyIndex()); + resources.command_pool = device.createCommandPool(pool_info); + + // Create command buffers + vk::CommandBufferAllocateInfo buffer_info + ( + resources.command_pool, + vk::CommandBufferLevel::ePrimary, + resources.command_buffers.size() + ); + + resources.command_buffers = device.allocateCommandBuffers(buffer_info); + + vk::FenceCreateInfo fence_info(vk::FenceCreateFlagBits::eSignaled); + resources.fence = device.createFence(fence_info); + + // TODO: A better way to choose the number of descriptors. + const std::array pool_sizes{{ + { vk::DescriptorType::eUniformBuffer, 32 }, + { vk::DescriptorType::eCombinedImageSampler, 64 }, + { vk::DescriptorType::eStorageTexelBuffer, 64 } + }}; + + const vk::DescriptorPoolCreateInfo pool_create_info({}, 2048, pool_sizes); + resources.descriptor_pool = device.createDescriptorPool(pool_create_info); + } + + // Create present semaphore + present_semaphore = device.createSemaphore(semaphore_create_info); + + // Activate the first command buffer. ActivateCommandBuffer moves forward, so start with the last + current_frame = static_cast(frame_resources.size()) - 1; + BeginCommandBuffer(); + return true; +} + +void VKCommandManager::DestroyCommandBuffers() { + vk::Device device = g_vk_instace->GetDevice(); + + for (auto& resources : frame_resources) { + // Destroy command pool which also clears any allocated command buffers + if (resources.command_pool) { + device.destroyCommandPool(resources.command_pool); + } + + // Destroy any pending objects. + for (auto& it : resources.cleanup_resources) + it(); + + // Destroy remaining vulkan objects + if (resources.semaphore) { + device.destroySemaphore(resources.semaphore); + } + + if (resources.fence) { + device.destroyFence(resources.fence); + } + + if (resources.descriptor_pool) { + device.destroyDescriptorPool(resources.descriptor_pool); + } + } + + device.destroySemaphore(present_semaphore); +} + +vk::DescriptorSet VKCommandManager::AllocateDescriptorSet(vk::DescriptorSetLayout set_layout) { + vk::DescriptorSetAllocateInfo allocate_info(frame_resources[current_frame].descriptor_pool, set_layout); + return g_vk_instace->GetDevice().allocateDescriptorSets(allocate_info)[0]; +} + +bool VKCommandManager::CreateSubmitThread() { + submit_loop = std::make_unique(); + + submit_thread = std::thread([this]() { + Common::SetCurrentThreadName("Vulkan CommandBufferManager SubmitThread"); + + submit_loop->Run([this]() { + PendingCommandBufferSubmit submit; + { + std::lock_guard guard(pending_submit_lock); + if (pending_submits.empty()) + { + submit_loop->AllowSleep(); + return; + } + + submit = pending_submits.front(); + pending_submits.pop_front(); + } + + SubmitCommandBuffer(submit.command_buffer_index, submit.present_swap_chain, + submit.present_image_index); + }); + }); + + return true; +} + +void VKCommandManager::WaitForWorkerThreadIdle() +{ + // Drain the semaphore, then allow another request in the future. + submit_semaphore.Wait(); + submit_semaphore.Post(); +} + +void VKCommandManager::WaitForFenceCounter(u64 fence_counter) { + if (completed_fence_counter >= fence_counter) + return; + + // Find the first command buffer which covers this counter value. + u32 index = (current_frame + 1) % COMMAND_BUFFER_COUNT; + while (index != current_frame) { + if (frame_resources[index].fence_counter >= fence_counter) + break; + + index = (index + 1) % COMMAND_BUFFER_COUNT; + } + + ASSERT(index != current_frame); + WaitForCommandBufferCompletion(index); +} + +void VKCommandManager::WaitForCommandBufferCompletion(u32 index) { + // Ensure this command buffer has been submitted. + WaitForWorkerThreadIdle(); + + // Wait for this command buffer to be completed. + auto result = g_vk_instace->GetDevice().waitForFences(frame_resources[index].fence, + VK_TRUE, UINT64_MAX); + + if (result != vk::Result::eSuccess) { + LOG_ERROR(Render_Vulkan, "vkWaitForFences failed"); + } + + // Clean up any resources for command buffers between the last known completed buffer and this + // now-completed command buffer. If we use >2 buffers, this may be more than one buffer. + const u64 now_completed_counter = frame_resources[index].fence_counter; + u32 cleanup_index = (current_frame + 1) % COMMAND_BUFFER_COUNT; + + while (cleanup_index != current_frame) { + auto& resources = frame_resources[cleanup_index]; + if (resources.fence_counter > now_completed_counter) { + break; + } + + if (resources.fence_counter > completed_fence_counter) { + for (auto& it : resources.cleanup_resources) + it(); + + resources.cleanup_resources.clear(); + } + + cleanup_index = (cleanup_index + 1) % COMMAND_BUFFER_COUNT; + } + + completed_fence_counter = now_completed_counter; +} + +void VKCommandManager::SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion, + vk::SwapchainKHR present_swap_chain, + u32 present_image_index) { + + // End the current command buffer. + auto& resources = frame_resources[current_frame]; + for (auto& command_buffer : resources.command_buffers) { + command_buffer.end(); + } + + // Grab the semaphore before submitting command buffer either on-thread or off-thread. + // This prevents a race from occurring where a second command buffer is executed + // before the worker thread has woken and executed the first one yet. + submit_semaphore.Wait(); + + // Submitting off-thread? + if (use_threaded_submission && submit_on_worker_thread && !wait_for_completion) { + + // Push to the pending submit queue. + { + std::lock_guard guard(pending_submit_lock); + pending_submits.push_back({present_swap_chain, present_image_index, current_frame}); + } + + // Wake up the worker thread for a single iteration. + submit_loop->Wakeup(); + } + else { + // Pass through to normal submission path. + SubmitCommandBuffer(current_frame, present_swap_chain, present_image_index); + + if (wait_for_completion) { + WaitForCommandBufferCompletion(current_frame); + } + } + + // Switch to next cmdbuffer. + BeginCommandBuffer(); +} + +void VKCommandManager::SubmitCommandBuffer(u32 command_buffer_index, + vk::SwapchainKHR swapchain, + u32 present_image_index) { + auto& resources = frame_resources[command_buffer_index]; + + vk::PipelineStageFlags wait_stage = vk::PipelineStageFlagBits::eColorAttachmentOutput; + vk::SubmitInfo submit_info({}, wait_stage, resources.command_buffers); + + // If the init command buffer did not have any commands recorded, don't submit it. + if (!resources.init_command_buffer_used) { + submit_info.setCommandBuffers(resources.command_buffers[1]); + } + + if (resources.semaphore_used) { + submit_info.setSignalSemaphores(resources.semaphore); + } + + submit_info.setSignalSemaphores(present_semaphore); + g_vk_instace->GetGraphicsQueue().submit(submit_info, resources.fence); + + // Should have a signal semaphore. + vk::PresentInfoKHR present_info(present_semaphore, swapchain, present_image_index); + auto last_present_result = g_vk_instace->GetPresentQueue().presentKHR(present_info); + if (last_present_result != vk::Result::eSuccess) { + + // eErrorOutOfDateKHR is not fatal, just means we need to recreate our swap chain. + if (last_present_result != vk::Result::eErrorOutOfDateKHR && + last_present_result != vk::Result::eSuboptimalKHR) + { + LOG_ERROR(Render_Vulkan, "Present queue return error"); + } + + // Don't treat eSuboptimalKHR as fatal on Android. Android 10+ requires prerotation. + // See https://twitter.com/Themaister/status/1207062674011574273 + #ifdef VK_USE_PLATFORANDROID_KHR + if (last_present_result != VK_SUBOPTIMAL_KHR) { + last_present_failed.Set(); + } + #else + last_present_failed.Set(); + #endif + } + + // Command buffer has been queued, so permit the next one. + submit_semaphore.Post(); +} + +void VKCommandManager::BeginCommandBuffer() +{ + // Move to the next command buffer. + const u32 next_buffer_index = (current_frame + 1) % COMMAND_BUFFER_COUNT; + auto& resources = frame_resources[next_buffer_index]; + auto& device = g_vk_instace->GetDevice(); + + // Wait for the GPU to finish with all resources for this command buffer. + if (resources.fence_counter > completed_fence_counter) { + WaitForCommandBufferCompletion(next_buffer_index); + } + + // Reset fence to unsignaled before starting. + device.resetFences(resources.fence); + + // Reset command pools to beginning since we can re-use the memory now + device.resetCommandPool(resources.command_pool); + + vk::CommandBufferBeginInfo begin_info(vk::CommandBufferUsageFlagBits::eOneTimeSubmit); + + // Enable commands to be recorded to the two buffers again. + for (auto command_buffer : resources.command_buffers) { + command_buffer.begin(begin_info); + } + + // Also can do the same for the descriptor pools + device.resetDescriptorPool(resources.descriptor_pool); + + // Reset upload command buffer state + resources.init_command_buffer_used = false; + resources.semaphore_used = false; + resources.fence_counter = next_fence_counter++; + current_frame = next_buffer_index; +} + +std::unique_ptr g_command_buffer_mgr; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_command_manager.h b/src/video_core/renderer_vulkan/vk_command_manager.h new file mode 100644 index 000000000..6a619f6d7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_command_manager.h @@ -0,0 +1,145 @@ +// Copyright 2016 Dolphin Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/common_types.h" +#include "common/blocking_loop.h" +#include "common/semaphore.h" +#include "video_core/renderer_vulkan/vk_instance.h" + +namespace Vulkan { + +constexpr u32 COMMAND_BUFFER_COUNT = 2; + +class VKCommandManager { +public: + explicit VKCommandManager(bool use_threaded_submission); + ~VKCommandManager(); + + bool Initialize(); + + // These command buffers are allocated per-frame. They are valid until the command buffer + // is submitted, after that you should call these functions again. + vk::CommandBuffer GetCurrentInitCommandBuffer() { + frame_resources[current_frame].init_command_buffer_used = true; + return frame_resources[current_frame].command_buffers[0]; + } + + vk::CommandBuffer GetCurrentCommandBuffer() const { + return frame_resources[current_frame].command_buffers[1]; + } + + vk::DescriptorPool GetCurrentDescriptorPool() const { + return frame_resources[current_frame].descriptor_pool; + } + + // Allocates a descriptors set from the pool reserved for the current frame. + vk::DescriptorSet AllocateDescriptorSet(vk::DescriptorSetLayout set_layout); + + // Fence "counters" are used to track which commands have been completed by the GPU. + // If the last completed fence counter is greater or equal to N, it means that the work + // associated counter N has been completed by the GPU. The value of N to associate with + // commands can be retreived by calling GetCurrentFenceCounter(). + u64 GetCompletedFenceCounter() const { return completed_fence_counter; } + + // Gets the fence that will be signaled when the currently executing command buffer is + // queued and executed. Do not wait for this fence before the buffer is executed. + u64 GetCurrentFenceCounter() const { return frame_resources[current_frame].fence_counter; } + + // Returns the semaphore for the current command buffer, which can be used to ensure the + // swap chain image is ready before the command buffer executes. + vk::Semaphore GetCurrentCommandBufferSemaphore() { + frame_resources[current_frame].semaphore_used = true; + return frame_resources[current_frame].semaphore; + } + + // Ensure that the worker thread has submitted any previous command buffers and is idle. + void WaitForWorkerThreadIdle(); + + // Wait for a fence to be completed. + // Also invokes callbacks for completion. + void WaitForFenceCounter(u64 fence_counter); + + void SubmitCommandBuffer(bool submit_on_worker_thread, bool wait_for_completion, + vk::SwapchainKHR present_swap_chain = VK_NULL_HANDLE, + u32 present_image_index = -1); + + // Was the last present submitted to the queue a failure? If so, we must recreate our swapchain. + bool CheckLastPresentFail() { return last_present_failed.TestAndClear(); } + vk::Result GetLastPresentResult() const { return last_present_result; } + + // Schedule a vulkan resource for destruction later on. This will occur when the command buffer + // is next re-used, and the GPU has finished working with the specified resource. + template + void DestroyResource(VulkanObject object); + +private: + void BeginCommandBuffer(); + bool CreateCommandBuffers(); + void DestroyCommandBuffers(); + + bool CreateSubmitThread(); + + void WaitForCommandBufferCompletion(u32 command_buffer_index); + void SubmitCommandBuffer(u32 command_buffer_index, vk::SwapchainKHR present_swap_chain, + u32 present_image_index); +private: + struct FrameResources { + // [0] - Init (upload) command buffer, [1] - draw command buffer + std::vector command_buffers = {}; + std::vector> cleanup_resources; + + vk::CommandPool command_pool; + vk::DescriptorPool descriptor_pool; + vk::Fence fence; + vk::Semaphore semaphore; + u64 fence_counter = 0; + bool init_command_buffer_used = false; + bool semaphore_used = false; + }; + + struct PendingCommandBufferSubmit { + vk::SwapchainKHR present_swap_chain; + u32 present_image_index; + u32 command_buffer_index; + }; + + u64 next_fence_counter = 1; + u64 completed_fence_counter = 0; + + std::array frame_resources; + u32 current_frame = 0; + + // Threaded command buffer execution + // Semaphore determines when a command buffer can be queued + Common::Semaphore submit_semaphore; + std::thread submit_thread; + std::unique_ptr submit_loop; + std::deque pending_submits; + std::mutex pending_submit_lock; + Common::Flag last_present_failed; + vk::Semaphore present_semaphore; + vk::Result last_present_result = vk::Result::eSuccess; + bool use_threaded_submission = false; +}; + +template +void VKCommandManager::DestroyResource(VulkanObject object) { + auto& resources = frame_resources[current_frame]; + auto deleter = [object]() { g_vk_instace->GetDevice().destroy(object); }; + resources.cleanup_resources.push_back(deleter); +} + +extern std::unique_ptr g_command_buffer_mgr; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index c868279ff..a112772b4 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -12,23 +12,6 @@ namespace Vulkan { -// Using multiple command buffers prevents stalling -constexpr u32 COMMAND_BUFFER_COUNT = 3; - -struct FrameResources -{ - vk::CommandPool command_pool; - std::array command_buffers = {}; - vk::DescriptorPool descriptor_pool; - vk::Fence fence; - vk::Semaphore semaphore; - u64 fence_counter = 0; - bool init_command_buffer_used = false; - bool semaphore_used = false; - - std::vector> cleanup_resources; -}; - /// The global Vulkan instance class VKInstance { diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 51ea0e5e2..eeb71f1d3 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -62,6 +62,9 @@ bool VKSwapChain::Create(u32 width, u32 height, bool vsync_enabled) { swapchain.swap(new_swapchain); } + // Create framebuffer and image views + SetupImages(); + return true; }