VideoCore: Refactor fencing system.

This commit is contained in:
Fernando Sahmkow 2022-02-06 01:16:11 +01:00
parent 4d60410dd9
commit bc8b3d225e
20 changed files with 154 additions and 167 deletions

View File

@ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
u32 height, u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect) {
const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
const VAddr addr = nvmap.GetHandleAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
stride, format, transform, crop_rect};
system.GetPerfStats().EndSystemFrame();
system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0);
system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
}

View File

@ -38,7 +38,8 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height,
u32 stride, android::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect);
const Common::Rectangle<int>& crop_rect,
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
Kernel::KEvent* QueryEvent(u32 event_id) override;

View File

@ -269,17 +269,6 @@ void NVFlinger::Compose() {
return; // We are likely shutting down
}
auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
const auto& multi_fence = buffer.fence;
guard->unlock();
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
const auto& fence = multi_fence.fences[fence_id];
syncpoint_manager.WaitGuest(fence.id, fence.value);
}
guard->lock();
MicroProfileFlip();
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
@ -293,8 +282,10 @@ void NVFlinger::Compose() {
nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
buffer.fence.fences, buffer.fence.num_fences);
MicroProfileFlip();
guard->lock();
swap_interval = buffer.swap_interval;

View File

@ -826,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
const bool is_accuracy_normal =
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
auto it = committed_ranges.begin();
while (it != committed_ranges.end()) {
auto& current_intervals = *it;
auto next_it = std::next(it);
while (next_it != committed_ranges.end()) {
for (auto& interval : *next_it) {
current_intervals.subtract(interval);
}
next_it++;
}
it++;
}
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
u64 total_size_bytes = 0;
u64 largest_copy = 0;

View File

@ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
void DmaPusher::DispatchCalls() {
MICROPROFILE_SCOPE(DispatchCalls);
gpu.SyncGuestHost();
dma_pushbuffer_subindex = 0;
dma_state.is_last_call = true;
@ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() {
}
}
gpu.FlushCommands();
gpu.SyncGuestHost();
gpu.OnCommandListEnd();
}

View File

@ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
return;
case MAXWELL3D_REG_INDEX(fragment_barrier):
return rasterizer->FragmentBarrier();
case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
rasterizer->InvalidateGPUCache();
return rasterizer->WaitForIdle();
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
return rasterizer->TiledCacheBarrier();
}
@ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
if (regs.query.query_get.fence == 1) {
rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) {
const GPUVAddr sequence_address{regs.query.QueryAddress()};
const u32 payload = regs.query.query_sequence;
std::function<void()> operation([this, sequence_address, payload] {
memory_manager.Write<u32>(sequence_address, payload);
});
rasterizer->SignalFence(std::move(operation));
} else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
struct LongQueryResult {
u64_le value;
u64_le timestamp;
};
const GPUVAddr sequence_address{regs.query.QueryAddress()};
const u32 payload = regs.query.query_sequence;
std::function<void()> operation([this, sequence_address, payload] {
LongQueryResult query_result{payload, system.GPU().GetTicks()};
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
});
rasterizer->SignalFence(std::move(operation));
}
break;
case Regs::QueryOperation::Acquire:

View File

@ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() {
u64 timestamp;
};
Block block{};
block.sequence = regs.semaphore_sequence;
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
// CoreTiming
block.timestamp = gpu.GetTicks();
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_sequence;
std::function<void()> operation([this, sequence_address, payload] {
Block block{};
block.sequence = payload;
block.timestamp = gpu.GetTicks();
memory_manager.WriteBlock(sequence_address, &block, sizeof(block));
});
rasterizer->SignalFence(std::move(operation));
} else {
do {
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
@ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() {
regs.acquire_active = true;
regs.acquire_mode = false;
if (word != regs.acquire_value) {
rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
}
@ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() {
regs.acquire_active = true;
regs.acquire_mode = true;
if (word < regs.acquire_value) {
rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
}
} else if (op == GpuSemaphoreOperation::AcquireMask) {
if (word & regs.semaphore_sequence == 0) {
if (word && regs.semaphore_sequence == 0) {
rasterizer->ReleaseFences();
std::this_thread::sleep_for(std::chrono::milliseconds(1));
continue;
}
@ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() {
}
void Puller::ProcessSemaphoreRelease() {
rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
const u32 payload = regs.semaphore_release;
std::function<void()> operation([this, sequence_address, payload] {
memory_manager.Write<u32>(sequence_address, payload);
});
rasterizer->SignalFence(std::move(operation));
}
void Puller::ProcessSemaphoreAcquire() {
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
const auto value = regs.semaphore_acquire;
std::this_thread::sleep_for(std::chrono::milliseconds(5));
if (word != value) {
while (word != value) {
regs.acquire_active = true;
regs.acquire_value = value;
std::this_thread::sleep_for(std::chrono::milliseconds(1));
rasterizer->ReleaseFences();
word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
// TODO(kemathe73) figure out how to do the acquire_timeout
regs.acquire_mode = false;
regs.acquire_source = false;
@ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
case BufferMethods::SemaphoreAddressHigh:
case BufferMethods::SemaphoreAddressLow:
case BufferMethods::SemaphoreSequencePayload:
case BufferMethods::WrcacheFlush:
case BufferMethods::SyncpointPayload:
break;
case BufferMethods::WrcacheFlush:
case BufferMethods::RefCnt:
rasterizer->SignalReference();
break;
@ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
}
case BufferMethods::MemOpB: {
// Implement this better.
rasterizer->SyncGuestHost();
rasterizer->InvalidateGPUCache();
break;
}
case BufferMethods::MemOpC:

View File

@ -5,6 +5,8 @@
#include <algorithm>
#include <cstring>
#include <deque>
#include <functional>
#include <memory>
#include <queue>
@ -19,28 +21,7 @@ namespace VideoCommon {
class FenceBase {
public:
explicit FenceBase(u32 payload_, bool is_stubbed_)
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
u8* GetAddress() const {
return address;
}
u32 GetPayload() const {
return payload;
}
bool IsSemaphore() const {
return is_semaphore;
}
private:
u8* address;
u32 payload;
bool is_semaphore;
explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
protected:
bool is_stubbed;
@ -60,31 +41,28 @@ public:
buffer_cache.AccumulateFlushes();
}
void SignalSemaphore(u8* addr, u32 value) {
void SyncOperation(std::function<void()>&& func) {
uncommitted_operations.emplace_back(std::move(func));
}
void SignalFence(std::function<void()>&& func) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
TFence new_fence = CreateFence(addr, value, !should_flush);
uncommitted_operations.emplace_back(std::move(func));
CommitOperations();
TFence new_fence = CreateFence(!should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
}
void SignalSyncPoint(u32 value) {
syncpoint_manager.IncrementGuest(value);
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
TFence new_fence = CreateFence(value, !should_flush);
fences.push(new_fence);
QueueFence(new_fence);
if (should_flush) {
rasterizer.FlushCommands();
}
rasterizer.SyncGuestHost();
std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); });
SignalFence(std::move(func));
}
void WaitPendingFences() {
@ -94,12 +72,10 @@ public:
WaitFence(current_fence);
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
auto payload = current_fence->GetPayload();
std::memcpy(address, &payload, sizeof(payload));
} else {
syncpoint_manager.IncrementHost(current_fence->GetPayload());
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
operation();
}
PopFence();
}
@ -114,11 +90,9 @@ protected:
virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
virtual TFence CreateFence(bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
@ -141,12 +115,10 @@ private:
return;
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
const auto payload = current_fence->GetPayload();
std::memcpy(address, &payload, sizeof(payload));
} else {
syncpoint_manager.IncrementHost(current_fence->GetPayload());
auto operations = std::move(pending_operations.front());
pending_operations.pop_front();
for (auto& operation : operations) {
operation();
}
PopFence();
}
@ -165,16 +137,20 @@ private:
}
void PopAsyncFlushes() {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.PopAsyncFlushes();
buffer_cache.PopAsyncFlushes();
}
query_cache.PopAsyncFlushes();
}
void CommitAsyncFlushes() {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CommitAsyncFlushes();
buffer_cache.CommitAsyncFlushes();
}
query_cache.CommitAsyncFlushes();
}
@ -183,7 +159,13 @@ private:
fences.pop();
}
void CommitOperations() {
pending_operations.emplace_back(std::move(uncommitted_operations));
}
std::queue<TFence> fences;
std::deque<std::function<void()>> uncommitted_operations;
std::deque<std::deque<std::function<void()>>> pending_operations;
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
};

View File

@ -93,16 +93,13 @@ struct GPU::Impl {
}
/// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost() {
rasterizer->SyncGuestHost();
void InvalidateGPUCache() {
rasterizer->InvalidateGPUCache();
}
/// Signal the ending of command list.
void OnCommandListEnd() {
if (is_async) {
// This command only applies to asynchronous GPU mode
gpu_thread.OnCommandListEnd();
}
gpu_thread.OnCommandListEnd();
}
/// Request a host GPU memory flush from the CPU.
@ -296,7 +293,7 @@ struct GPU::Impl {
}
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
Service::Nvidia::NvFence* fences, size_t num_fences) {
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
size_t current_request_counter{};
{
std::unique_lock<std::mutex> lk(request_swap_mutex);
@ -412,8 +409,8 @@ void GPU::FlushCommands() {
impl->FlushCommands();
}
void GPU::SyncGuestHost() {
impl->SyncGuestHost();
void GPU::InvalidateGPUCache() {
impl->InvalidateGPUCache();
}
void GPU::OnCommandListEnd() {
@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
}
void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
Service::Nvidia::NvFence* fences, size_t num_fences) {
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
impl->RequestSwapBuffers(framebuffer, fences, num_fences);
}

View File

@ -110,7 +110,7 @@ public:
/// Flush all current written commands into the host GPU for execution.
void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
void SyncGuestHost();
void InvalidateGPUCache();
/// Signal the ending of command list.
void OnCommandListEnd();
@ -180,7 +180,7 @@ public:
void RendererFrameEndNotify();
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
Service::Nvidia::NvFence* fences, size_t num_fences);
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences);
/// Performs any additional setup necessary in order to begin GPU emulation.
/// This can be used to launch any necessary threads and register any necessary

View File

@ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
}
void ThreadManager::TickGPU() {
PushCommand(GPUTickCommand(), true);
PushCommand(GPUTickCommand());
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

View File

@ -62,7 +62,10 @@ public:
virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0;
/// Signal a GPU based semaphore as a fence
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
virtual void SignalFence(std::function<void()>&& func) = 0;
/// Send an operation to be done after a certain amount of flushes.
virtual void SyncOperation(std::function<void()>&& func) = 0;
/// Signal a GPU based syncpoint as a fence
virtual void SignalSyncPoint(u32 value) = 0;
@ -89,7 +92,7 @@ public:
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
/// Sync memory between guest and host.
virtual void SyncGuestHost() = 0;
virtual void InvalidateGPUCache() = 0;
/// Unmap memory range
virtual void UnmapMemory(VAddr addr, u64 size) = 0;

View File

@ -10,10 +10,7 @@
namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize
BufferCache& buffer_cache_, QueryCache& query_cache_)
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
}
Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
return std::make_shared<GLInnerFence>(is_stubbed);
}
void FenceManagerOpenGL::QueueFence(Fence& fence) {

View File

@ -16,8 +16,7 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
explicit GLInnerFence(bool is_stubbed_);
~GLInnerFence();
void Queue();
@ -40,8 +39,7 @@ public:
QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;

View File

@ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
}
}
void RasterizerOpenGL::SyncGuestHost() {
void RasterizerOpenGL::InvalidateGPUCache() {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
shader_cache.SyncGuestHost();
{
@ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
}
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory->Write<u32>(addr, value);
return;
}
auto paddr = gpu_memory->GetPointer(addr);
fence_manager.SignalSemaphore(paddr, value);
void RasterizerOpenGL::SignalFence(std::function<void()>&& func) {
fence_manager.SignalFence(std::move(func));
}
void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) {
fence_manager.SyncOperation(std::move(func));
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
@ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
}
void RasterizerOpenGL::SignalReference() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.SignalOrdering();
}
void RasterizerOpenGL::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}
@ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() {
}
void RasterizerOpenGL::FragmentBarrier() {
glTextureBarrier();
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}

View File

@ -80,10 +80,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;

View File

@ -11,11 +11,8 @@
namespace Vulkan {
InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
: FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
: FenceBase{is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
scheduler{scheduler_} {}
Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
}
Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
Fence FenceManager::CreateFence(bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, is_stubbed);
}
void FenceManager::QueueFence(Fence& fence) {

View File

@ -25,8 +25,7 @@ class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
~InnerFence();
void Queue();
@ -50,8 +49,7 @@ public:
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
Fence CreateFence(bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;

View File

@ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
}
}
void RasterizerVulkan::SyncGuestHost() {
void RasterizerVulkan::InvalidateGPUCache() {
pipeline_cache.SyncGuestHost();
{
std::scoped_lock lock{buffer_cache.mutex};
@ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
}
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
gpu_memory->Write<u32>(addr, value);
return;
}
auto paddr = gpu_memory->GetPointer(addr);
fence_manager.SignalSemaphore(paddr, value);
void RasterizerVulkan::SignalFence(std::function<void()>&& func) {
fence_manager.SignalFence(std::move(func));
}
void RasterizerVulkan::SyncOperation(std::function<void()>&& func) {
fence_manager.SyncOperation(std::move(func));
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
@ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::SignalReference() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.SignalOrdering();
}
void RasterizerVulkan::ReleaseFences() {
if (!gpu.IsAsync()) {
return;
}
fence_manager.WaitPendingFences();
}

View File

@ -76,10 +76,11 @@ public:
bool MustFlushRegion(VAddr addr, u64 size) override;
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
void InvalidateGPUCache() override;
void UnmapMemory(VAddr addr, u64 size) override;
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
void SignalSemaphore(GPUVAddr addr, u32 value) override;
void SignalFence(std::function<void()>&& func) override;
void SyncOperation(std::function<void()>&& func) override;
void SignalSyncPoint(u32 value) override;
void SignalReference() override;
void ReleaseFences() override;