texture_runtime: Add staging buffer lock mechanism

This commit is contained in:
emufan4568
2022-09-10 12:54:48 +03:00
committed by GPUCode
parent 5d48107dd6
commit 73d6a9d585
7 changed files with 82 additions and 73 deletions

View File

@ -303,7 +303,7 @@ if(UNIX AND NOT APPLE)
install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin") install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
endif() endif()
if (MSVC) if (MSVC OR MINGW)
include(CopyCitraQt5Deps) include(CopyCitraQt5Deps)
include(CopyCitraSDLDeps) include(CopyCitraSDLDeps)
copy_citra_Qt5_deps(citra-qt) copy_citra_Qt5_deps(citra-qt)

View File

@ -3,7 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #pragma once
#include <span>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <boost/serialization/export.hpp> #include <boost/serialization/export.hpp>
@ -65,8 +65,11 @@ private:
BOOST_CLASS_EXPORT_KEY(BufferMem); BOOST_CLASS_EXPORT_KEY(BufferMem);
/// A managed reference to host-side memory. Fast enough to be used everywhere instead of u8* /**
/// Supports serialization. * A managed reference to host-side memory.
* Fast enough to be used everywhere instead of u8*
* Supports serialization.
*/
class MemoryRef { class MemoryRef {
public: public:
MemoryRef() = default; MemoryRef() = default;
@ -75,41 +78,52 @@ public:
: backing_mem(std::move(backing_mem_)), offset(0) { : backing_mem(std::move(backing_mem_)), offset(0) {
Init(); Init();
} }
MemoryRef(std::shared_ptr<BackingMem> backing_mem_, u64 offset_) MemoryRef(std::shared_ptr<BackingMem> backing_mem_, u64 offset_)
: backing_mem(std::move(backing_mem_)), offset(offset_) { : backing_mem(std::move(backing_mem_)), offset(offset_) {
ASSERT(offset <= backing_mem->GetSize()); ASSERT(offset <= backing_mem->GetSize());
Init(); Init();
} }
explicit operator bool() const { explicit operator bool() const {
return cptr != nullptr; return cptr != nullptr;
} }
operator u8*() { operator u8*() {
return cptr; return cptr;
} }
u8* GetPtr() {
return cptr;
}
std::byte* GetBytes() {
return reinterpret_cast<std::byte*>(cptr);
}
operator const u8*() const { operator const u8*() const {
return cptr; return cptr;
} }
u8* GetPtr() {
return cptr;
}
const u8* GetPtr() const { const u8* GetPtr() const {
return cptr; return cptr;
} }
const std::byte* GetBytes() const {
return reinterpret_cast<const std::byte*>(cptr); std::span<std::byte> GetBytes(std::size_t size) {
return std::span{reinterpret_cast<std::byte*>(cptr), size > csize ? csize : size};
} }
std::span<const std::byte> GetBytes(std::size_t size) const {
return std::span{reinterpret_cast<const std::byte*>(cptr), size > csize ? csize : size};
}
std::size_t GetSize() const { std::size_t GetSize() const {
return csize; return csize;
} }
MemoryRef& operator+=(u32 offset_by) { MemoryRef& operator+=(u32 offset_by) {
ASSERT(offset_by < csize); ASSERT(offset_by < csize);
offset += offset_by; offset += offset_by;
Init(); Init();
return *this; return *this;
} }
MemoryRef operator+(u32 offset_by) const { MemoryRef operator+(u32 offset_by) const {
ASSERT(offset_by < csize); ASSERT(offset_by < csize);
return MemoryRef(backing_mem, offset + offset_by); return MemoryRef(backing_mem, offset + offset_by);

View File

@ -3,7 +3,6 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#pragma once #pragma once
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <memory> #include <memory>

View File

@ -2,12 +2,9 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <bit>
#include "common/microprofile.h" #include "common/microprofile.h"
#include "common/scope_exit.h" #include "common/scope_exit.h"
#include "common/texture.h" #include "core/memory.h"
#include "core/core.h"
#include "core/settings.h"
#include "video_core/rasterizer_cache/cached_surface.h" #include "video_core/rasterizer_cache/cached_surface.h"
#include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state.h"
@ -29,17 +26,16 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa
void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
DEBUG_ASSERT(load_start >= addr && load_end <= end); DEBUG_ASSERT(load_start >= addr && load_end <= end);
auto texture_ptr = VideoCore::g_memory->GetPhysicalRef(load_start); auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start);
if (!texture_ptr) { if (!source_ptr) {
return; return;
} }
const u32 upload_size = std::clamp(load_end - load_start, 0u, static_cast<u32>(texture_ptr.GetSize())); const auto upload_size = std::clamp<std::size_t>(load_end - load_start, 0u, source_ptr.GetSize());
const u32 texture_size = width * height * GetBytesPerPixel(pixel_format); const auto upload_data = source_ptr.GetBytes(upload_size);
const auto upload_data = std::span{texture_ptr.GetBytes(), upload_size};
if (gl_buffer.empty()) { if (gl_buffer.empty()) {
gl_buffer.resize(texture_size); gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
} }
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
@ -51,7 +47,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
} else if (pixel_format == PixelFormat::RGB8 && GLES) { } else if (pixel_format == PixelFormat::RGB8 && GLES) {
Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer); Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer);
} else { } else {
std::memcpy(gl_buffer.data() + load_start - addr, texture_ptr, upload_size); std::memcpy(gl_buffer.data() + load_start - addr, source_ptr, upload_size);
} }
} else { } else {
UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer); UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer);
@ -60,76 +56,53 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64));
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
u8* dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr); DEBUG_ASSERT(flush_start >= addr && flush_end <= end);
if (dst_buffer == nullptr) {
auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr);
if (!dest_ptr) {
return; return;
} }
const u32 byte_size = width * height * GetBytesPerPixel(pixel_format); const auto download_size = std::clamp<std::size_t>(flush_end - flush_start, 0u, dest_ptr.GetSize());
DEBUG_ASSERT(gl_buffer.size() == byte_size); const auto download_loc = dest_ptr.GetBytes(download_size);
// TODO: Should probably be done in ::Memory:: and check for other regions too
// same as loadglbuffer()
if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END)
flush_end = Memory::VRAM_VADDR_END;
if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
flush_start = Memory::VRAM_VADDR;
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
ASSERT(flush_start >= addr && flush_end <= end);
const u32 start_offset = flush_start - addr; const u32 start_offset = flush_start - addr;
const u32 end_offset = flush_end - addr; const u32 end_offset = flush_end - addr;
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
if (type == SurfaceType::Fill) { if (type == SurfaceType::Fill) {
const u32 coarse_start_offset = start_offset - (start_offset % fill_size); const u32 coarse_start_offset = start_offset - (start_offset % fill_size);
const u32 backup_bytes = start_offset % fill_size; const u32 backup_bytes = start_offset % fill_size;
std::array<u8, 4> backup_data; std::array<u8, 4> backup_data;
if (backup_bytes) { if (backup_bytes) {
std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes); std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes);
} }
for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) { for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
std::memcpy(&dst_buffer[offset], &fill_data[0], std::memcpy(&dest_ptr[offset], &fill_data[0],
std::min(fill_size, end_offset - offset)); std::min(fill_size, end_offset - offset));
} }
if (backup_bytes) if (backup_bytes)
std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes); std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes);
} else if (!is_tiled) { } else if (!is_tiled) {
ASSERT(type == SurfaceType::Color); ASSERT(type == SurfaceType::Color);
if (pixel_format == PixelFormat::RGBA8 && GLES) { if (pixel_format == PixelFormat::RGBA8 && GLES) {
for (std::size_t i = start_offset; i < flush_end - addr; i += 4) { Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_loc);
dst_buffer[i] = (u8)gl_buffer[i + 3];
dst_buffer[i + 1] = (u8)gl_buffer[i + 2];
dst_buffer[i + 2] = (u8)gl_buffer[i + 1];
dst_buffer[i + 3] = (u8)gl_buffer[i];
}
} else if (pixel_format == PixelFormat::RGB8 && GLES) { } else if (pixel_format == PixelFormat::RGB8 && GLES) {
for (std::size_t i = start_offset; i < flush_end - addr; i += 3) { Pica::Texture::ConvertBGRToRGB(gl_buffer, download_loc);
dst_buffer[i] = (u8)gl_buffer[i + 2]; } else {
dst_buffer[i + 1] = (u8)gl_buffer[i + 1]; std::memcpy(download_loc.data() + start_offset, gl_buffer.data() + start_offset, flush_end - flush_start);
dst_buffer[i + 2] = (u8)gl_buffer[i];
} }
} else { } else {
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], SwizzleTexture(*this, flush_start, flush_end, gl_buffer, download_loc);
flush_end - flush_start);
}
} else {
std::span<std::byte> texture_data{(std::byte*)dst_buffer + start_offset, byte_size};
SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data);
} }
} }
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) { void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
if (type == SurfaceType::Fill) {
return;
}
MICROPROFILE_SCOPE(RasterizerCache_TextureUL); MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
ASSERT(gl_buffer.size() == width * height * GetBytesPerPixel(pixel_format));
// Load data from memory to the surface // Load data from memory to the surface
GLint x0 = static_cast<GLint>(rect.left); GLint x0 = static_cast<GLint>(rect.left);
@ -138,15 +111,13 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
GLuint target_tex = texture.handle; GLuint target_tex = texture.handle;
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in surface
// surface
OGLTexture unscaled_tex; OGLTexture unscaled_tex;
if (res_scale != 1) { if (res_scale != 1) {
x0 = 0; x0 = 0;
y0 = 0; y0 = 0;
unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight()); unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
target_tex = unscaled_tex.handle; target_tex = unscaled_tex.handle;
} }

View File

@ -214,9 +214,12 @@ const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
.size = size .size = size
}; };
if (auto it = search.lower_bound(key); it != search.end()) { for (auto it = search.lower_bound(key); it != search.end(); it++) {
// Attempt to find a free buffer that fits the requested data
if (it->IsFree()) {
return *it; return *it;
} }
}
StagingBuffer staging{}; StagingBuffer staging{};
staging.buffer.Create(); staging.buffer.Create();
@ -243,7 +246,7 @@ const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
// Insert it to the cache and return the memory // Insert it to the cache and return the memory
staging.mapped = std::span{reinterpret_cast<std::byte*>(data), size}; staging.mapped = std::span{reinterpret_cast<std::byte*>(data), size};
const auto& [it, _] = search.insert(std::move(staging)); const auto& it = search.insert(std::move(staging));
return *it; return *it;
} }

View File

@ -16,10 +16,27 @@ struct StagingBuffer {
u32 size = 0; u32 size = 0;
std::span<std::byte> mapped{}; std::span<std::byte> mapped{};
OGLBuffer buffer{}; OGLBuffer buffer{};
mutable OGLSync buffer_lock{};
bool operator<(const StagingBuffer& other) const { bool operator<(const StagingBuffer& other) const {
return size < other.size; return size < other.size;
} }
/// Returns true if the buffer does not take part in pending transfer operations
bool IsFree() const {
GLint status;
glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status);
return status == GL_SIGNALED;
}
/// Prevents the runtime from reusing the buffer until the transfer operation is complete
void Lock() const {
if (buffer_lock) {
buffer_lock.Release();
}
buffer_lock.Create();
}
}; };
class Driver; class Driver;
@ -33,6 +50,9 @@ public:
TextureRuntime(Driver& driver); TextureRuntime(Driver& driver);
~TextureRuntime() = default; ~TextureRuntime() = default;
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
const StagingBuffer& FindStaging(u32 size, bool upload);
/// Copies the GPU pixel data to the provided pixels buffer /// Copies the GPU pixel data to the provided pixels buffer
void ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy, void ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy,
PixelFormat format, std::span<std::byte> pixels); PixelFormat format, std::span<std::byte> pixels);
@ -49,14 +69,11 @@ public:
/// Generates mipmaps for all the available levels of the texture /// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(OGLTexture& texture, u32 max_level); void GenerateMipmaps(OGLTexture& texture, u32 max_level);
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
const StagingBuffer& FindStaging(u32 size, bool upload);
private: private:
Driver& driver; Driver& driver;
OGLFramebuffer read_fbo, draw_fbo; OGLFramebuffer read_fbo, draw_fbo;
std::set<StagingBuffer> upload_buffers; std::multiset<StagingBuffer> upload_buffers;
std::set<StagingBuffer> download_buffers; std::multiset<StagingBuffer> download_buffers;
}; };
} // namespace OpenGL } // namespace OpenGL

View File

@ -152,12 +152,17 @@ public:
~OGLSync() { ~OGLSync() {
Release(); Release();
} }
OGLSync& operator=(OGLSync&& o) noexcept { OGLSync& operator=(OGLSync&& o) noexcept {
Release(); Release();
handle = std::exchange(o.handle, nullptr); handle = std::exchange(o.handle, nullptr);
return *this; return *this;
} }
explicit operator bool() const noexcept {
return handle != 0;
}
/// Creates a new internal OpenGL resource and stores the handle /// Creates a new internal OpenGL resource and stores the handle
void Create(); void Create();