texture_runtime: Add staging buffer lock mechanism
This commit is contained in:
@ -303,7 +303,7 @@ if(UNIX AND NOT APPLE)
|
|||||||
install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
|
install(TARGETS citra-qt RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (MSVC)
|
if (MSVC OR MINGW)
|
||||||
include(CopyCitraQt5Deps)
|
include(CopyCitraQt5Deps)
|
||||||
include(CopyCitraSDLDeps)
|
include(CopyCitraSDLDeps)
|
||||||
copy_citra_Qt5_deps(citra-qt)
|
copy_citra_Qt5_deps(citra-qt)
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <span>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <boost/serialization/export.hpp>
|
#include <boost/serialization/export.hpp>
|
||||||
@ -65,8 +65,11 @@ private:
|
|||||||
|
|
||||||
BOOST_CLASS_EXPORT_KEY(BufferMem);
|
BOOST_CLASS_EXPORT_KEY(BufferMem);
|
||||||
|
|
||||||
/// A managed reference to host-side memory. Fast enough to be used everywhere instead of u8*
|
/**
|
||||||
/// Supports serialization.
|
* A managed reference to host-side memory.
|
||||||
|
* Fast enough to be used everywhere instead of u8*
|
||||||
|
* Supports serialization.
|
||||||
|
*/
|
||||||
class MemoryRef {
|
class MemoryRef {
|
||||||
public:
|
public:
|
||||||
MemoryRef() = default;
|
MemoryRef() = default;
|
||||||
@ -75,41 +78,52 @@ public:
|
|||||||
: backing_mem(std::move(backing_mem_)), offset(0) {
|
: backing_mem(std::move(backing_mem_)), offset(0) {
|
||||||
Init();
|
Init();
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryRef(std::shared_ptr<BackingMem> backing_mem_, u64 offset_)
|
MemoryRef(std::shared_ptr<BackingMem> backing_mem_, u64 offset_)
|
||||||
: backing_mem(std::move(backing_mem_)), offset(offset_) {
|
: backing_mem(std::move(backing_mem_)), offset(offset_) {
|
||||||
ASSERT(offset <= backing_mem->GetSize());
|
ASSERT(offset <= backing_mem->GetSize());
|
||||||
Init();
|
Init();
|
||||||
}
|
}
|
||||||
|
|
||||||
explicit operator bool() const {
|
explicit operator bool() const {
|
||||||
return cptr != nullptr;
|
return cptr != nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
operator u8*() {
|
operator u8*() {
|
||||||
return cptr;
|
return cptr;
|
||||||
}
|
}
|
||||||
u8* GetPtr() {
|
|
||||||
return cptr;
|
|
||||||
}
|
|
||||||
std::byte* GetBytes() {
|
|
||||||
return reinterpret_cast<std::byte*>(cptr);
|
|
||||||
}
|
|
||||||
operator const u8*() const {
|
operator const u8*() const {
|
||||||
return cptr;
|
return cptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u8* GetPtr() {
|
||||||
|
return cptr;
|
||||||
|
}
|
||||||
|
|
||||||
const u8* GetPtr() const {
|
const u8* GetPtr() const {
|
||||||
return cptr;
|
return cptr;
|
||||||
}
|
}
|
||||||
const std::byte* GetBytes() const {
|
|
||||||
return reinterpret_cast<const std::byte*>(cptr);
|
std::span<std::byte> GetBytes(std::size_t size) {
|
||||||
|
return std::span{reinterpret_cast<std::byte*>(cptr), size > csize ? csize : size};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::span<const std::byte> GetBytes(std::size_t size) const {
|
||||||
|
return std::span{reinterpret_cast<const std::byte*>(cptr), size > csize ? csize : size};
|
||||||
|
}
|
||||||
|
|
||||||
std::size_t GetSize() const {
|
std::size_t GetSize() const {
|
||||||
return csize;
|
return csize;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryRef& operator+=(u32 offset_by) {
|
MemoryRef& operator+=(u32 offset_by) {
|
||||||
ASSERT(offset_by < csize);
|
ASSERT(offset_by < csize);
|
||||||
offset += offset_by;
|
offset += offset_by;
|
||||||
Init();
|
Init();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
MemoryRef operator+(u32 offset_by) const {
|
MemoryRef operator+(u32 offset_by) const {
|
||||||
ASSERT(offset_by < csize);
|
ASSERT(offset_by < csize);
|
||||||
return MemoryRef(backing_mem, offset + offset_by);
|
return MemoryRef(backing_mem, offset + offset_by);
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -2,12 +2,9 @@
|
|||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <bit>
|
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "common/texture.h"
|
#include "core/memory.h"
|
||||||
#include "core/core.h"
|
|
||||||
#include "core/settings.h"
|
|
||||||
#include "video_core/rasterizer_cache/cached_surface.h"
|
#include "video_core/rasterizer_cache/cached_surface.h"
|
||||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_state.h"
|
#include "video_core/renderer_opengl/gl_state.h"
|
||||||
@ -29,17 +26,16 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa
|
|||||||
void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
||||||
DEBUG_ASSERT(load_start >= addr && load_end <= end);
|
DEBUG_ASSERT(load_start >= addr && load_end <= end);
|
||||||
|
|
||||||
auto texture_ptr = VideoCore::g_memory->GetPhysicalRef(load_start);
|
auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start);
|
||||||
if (!texture_ptr) {
|
if (!source_ptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 upload_size = std::clamp(load_end - load_start, 0u, static_cast<u32>(texture_ptr.GetSize()));
|
const auto upload_size = std::clamp<std::size_t>(load_end - load_start, 0u, source_ptr.GetSize());
|
||||||
const u32 texture_size = width * height * GetBytesPerPixel(pixel_format);
|
const auto upload_data = source_ptr.GetBytes(upload_size);
|
||||||
const auto upload_data = std::span{texture_ptr.GetBytes(), upload_size};
|
|
||||||
|
|
||||||
if (gl_buffer.empty()) {
|
if (gl_buffer.empty()) {
|
||||||
gl_buffer.resize(texture_size);
|
gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
||||||
@ -51,7 +47,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
|||||||
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
||||||
Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer);
|
Pica::Texture::ConvertBGRToRGB(upload_data, gl_buffer);
|
||||||
} else {
|
} else {
|
||||||
std::memcpy(gl_buffer.data() + load_start - addr, texture_ptr, upload_size);
|
std::memcpy(gl_buffer.data() + load_start - addr, source_ptr, upload_size);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer);
|
UnswizzleTexture(*this, load_start, load_end, upload_data, gl_buffer);
|
||||||
@ -60,76 +56,53 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
|||||||
|
|
||||||
MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64));
|
MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64));
|
||||||
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
||||||
u8* dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr);
|
DEBUG_ASSERT(flush_start >= addr && flush_end <= end);
|
||||||
if (dst_buffer == nullptr) {
|
|
||||||
|
auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr);
|
||||||
|
if (!dest_ptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 byte_size = width * height * GetBytesPerPixel(pixel_format);
|
const auto download_size = std::clamp<std::size_t>(flush_end - flush_start, 0u, dest_ptr.GetSize());
|
||||||
DEBUG_ASSERT(gl_buffer.size() == byte_size);
|
const auto download_loc = dest_ptr.GetBytes(download_size);
|
||||||
|
|
||||||
// TODO: Should probably be done in ::Memory:: and check for other regions too
|
|
||||||
// same as loadglbuffer()
|
|
||||||
if (flush_start < Memory::VRAM_VADDR_END && flush_end > Memory::VRAM_VADDR_END)
|
|
||||||
flush_end = Memory::VRAM_VADDR_END;
|
|
||||||
|
|
||||||
if (flush_start < Memory::VRAM_VADDR && flush_end > Memory::VRAM_VADDR)
|
|
||||||
flush_start = Memory::VRAM_VADDR;
|
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
|
|
||||||
|
|
||||||
ASSERT(flush_start >= addr && flush_end <= end);
|
|
||||||
const u32 start_offset = flush_start - addr;
|
const u32 start_offset = flush_start - addr;
|
||||||
const u32 end_offset = flush_end - addr;
|
const u32 end_offset = flush_end - addr;
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
|
||||||
|
|
||||||
if (type == SurfaceType::Fill) {
|
if (type == SurfaceType::Fill) {
|
||||||
const u32 coarse_start_offset = start_offset - (start_offset % fill_size);
|
const u32 coarse_start_offset = start_offset - (start_offset % fill_size);
|
||||||
const u32 backup_bytes = start_offset % fill_size;
|
const u32 backup_bytes = start_offset % fill_size;
|
||||||
std::array<u8, 4> backup_data;
|
std::array<u8, 4> backup_data;
|
||||||
if (backup_bytes) {
|
if (backup_bytes) {
|
||||||
std::memcpy(&backup_data[0], &dst_buffer[coarse_start_offset], backup_bytes);
|
std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
|
for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
|
||||||
std::memcpy(&dst_buffer[offset], &fill_data[0],
|
std::memcpy(&dest_ptr[offset], &fill_data[0],
|
||||||
std::min(fill_size, end_offset - offset));
|
std::min(fill_size, end_offset - offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (backup_bytes)
|
if (backup_bytes)
|
||||||
std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
|
std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes);
|
||||||
} else if (!is_tiled) {
|
} else if (!is_tiled) {
|
||||||
ASSERT(type == SurfaceType::Color);
|
ASSERT(type == SurfaceType::Color);
|
||||||
if (pixel_format == PixelFormat::RGBA8 && GLES) {
|
if (pixel_format == PixelFormat::RGBA8 && GLES) {
|
||||||
for (std::size_t i = start_offset; i < flush_end - addr; i += 4) {
|
Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_loc);
|
||||||
dst_buffer[i] = (u8)gl_buffer[i + 3];
|
|
||||||
dst_buffer[i + 1] = (u8)gl_buffer[i + 2];
|
|
||||||
dst_buffer[i + 2] = (u8)gl_buffer[i + 1];
|
|
||||||
dst_buffer[i + 3] = (u8)gl_buffer[i];
|
|
||||||
}
|
|
||||||
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
||||||
for (std::size_t i = start_offset; i < flush_end - addr; i += 3) {
|
Pica::Texture::ConvertBGRToRGB(gl_buffer, download_loc);
|
||||||
dst_buffer[i] = (u8)gl_buffer[i + 2];
|
|
||||||
dst_buffer[i + 1] = (u8)gl_buffer[i + 1];
|
|
||||||
dst_buffer[i + 2] = (u8)gl_buffer[i];
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset],
|
std::memcpy(download_loc.data() + start_offset, gl_buffer.data() + start_offset, flush_end - flush_start);
|
||||||
flush_end - flush_start);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::span<std::byte> texture_data{(std::byte*)dst_buffer + start_offset, byte_size};
|
SwizzleTexture(*this, flush_start, flush_end, gl_buffer, download_loc);
|
||||||
SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
|
MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64));
|
||||||
void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
|
void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
|
||||||
if (type == SurfaceType::Fill) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
|
MICROPROFILE_SCOPE(RasterizerCache_TextureUL);
|
||||||
ASSERT(gl_buffer.size() == width * height * GetBytesPerPixel(pixel_format));
|
|
||||||
|
|
||||||
// Load data from memory to the surface
|
// Load data from memory to the surface
|
||||||
GLint x0 = static_cast<GLint>(rect.left);
|
GLint x0 = static_cast<GLint>(rect.left);
|
||||||
@ -138,15 +111,13 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
|
|||||||
|
|
||||||
GLuint target_tex = texture.handle;
|
GLuint target_tex = texture.handle;
|
||||||
|
|
||||||
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
|
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in surface
|
||||||
// surface
|
|
||||||
OGLTexture unscaled_tex;
|
OGLTexture unscaled_tex;
|
||||||
if (res_scale != 1) {
|
if (res_scale != 1) {
|
||||||
x0 = 0;
|
x0 = 0;
|
||||||
y0 = 0;
|
y0 = 0;
|
||||||
|
|
||||||
unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
|
unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
|
||||||
|
|
||||||
target_tex = unscaled_tex.handle;
|
target_tex = unscaled_tex.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,8 +214,11 @@ const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
|
|||||||
.size = size
|
.size = size
|
||||||
};
|
};
|
||||||
|
|
||||||
if (auto it = search.lower_bound(key); it != search.end()) {
|
for (auto it = search.lower_bound(key); it != search.end(); it++) {
|
||||||
return *it;
|
// Attempt to find a free buffer that fits the requested data
|
||||||
|
if (it->IsFree()) {
|
||||||
|
return *it;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
StagingBuffer staging{};
|
StagingBuffer staging{};
|
||||||
@ -243,7 +246,7 @@ const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
|
|||||||
|
|
||||||
// Insert it to the cache and return the memory
|
// Insert it to the cache and return the memory
|
||||||
staging.mapped = std::span{reinterpret_cast<std::byte*>(data), size};
|
staging.mapped = std::span{reinterpret_cast<std::byte*>(data), size};
|
||||||
const auto& [it, _] = search.insert(std::move(staging));
|
const auto& it = search.insert(std::move(staging));
|
||||||
return *it;
|
return *it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,10 +16,27 @@ struct StagingBuffer {
|
|||||||
u32 size = 0;
|
u32 size = 0;
|
||||||
std::span<std::byte> mapped{};
|
std::span<std::byte> mapped{};
|
||||||
OGLBuffer buffer{};
|
OGLBuffer buffer{};
|
||||||
|
mutable OGLSync buffer_lock{};
|
||||||
|
|
||||||
bool operator<(const StagingBuffer& other) const {
|
bool operator<(const StagingBuffer& other) const {
|
||||||
return size < other.size;
|
return size < other.size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the buffer does not take part in pending transfer operations
|
||||||
|
bool IsFree() const {
|
||||||
|
GLint status;
|
||||||
|
glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status);
|
||||||
|
return status == GL_SIGNALED;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prevents the runtime from reusing the buffer until the transfer operation is complete
|
||||||
|
void Lock() const {
|
||||||
|
if (buffer_lock) {
|
||||||
|
buffer_lock.Release();
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer_lock.Create();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Driver;
|
class Driver;
|
||||||
@ -33,6 +50,9 @@ public:
|
|||||||
TextureRuntime(Driver& driver);
|
TextureRuntime(Driver& driver);
|
||||||
~TextureRuntime() = default;
|
~TextureRuntime() = default;
|
||||||
|
|
||||||
|
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
||||||
|
const StagingBuffer& FindStaging(u32 size, bool upload);
|
||||||
|
|
||||||
/// Copies the GPU pixel data to the provided pixels buffer
|
/// Copies the GPU pixel data to the provided pixels buffer
|
||||||
void ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy,
|
void ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy,
|
||||||
PixelFormat format, std::span<std::byte> pixels);
|
PixelFormat format, std::span<std::byte> pixels);
|
||||||
@ -49,14 +69,11 @@ public:
|
|||||||
/// Generates mipmaps for all the available levels of the texture
|
/// Generates mipmaps for all the available levels of the texture
|
||||||
void GenerateMipmaps(OGLTexture& texture, u32 max_level);
|
void GenerateMipmaps(OGLTexture& texture, u32 max_level);
|
||||||
|
|
||||||
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
|
||||||
const StagingBuffer& FindStaging(u32 size, bool upload);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Driver& driver;
|
Driver& driver;
|
||||||
OGLFramebuffer read_fbo, draw_fbo;
|
OGLFramebuffer read_fbo, draw_fbo;
|
||||||
std::set<StagingBuffer> upload_buffers;
|
std::multiset<StagingBuffer> upload_buffers;
|
||||||
std::set<StagingBuffer> download_buffers;
|
std::multiset<StagingBuffer> download_buffers;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
@ -152,12 +152,17 @@ public:
|
|||||||
~OGLSync() {
|
~OGLSync() {
|
||||||
Release();
|
Release();
|
||||||
}
|
}
|
||||||
|
|
||||||
OGLSync& operator=(OGLSync&& o) noexcept {
|
OGLSync& operator=(OGLSync&& o) noexcept {
|
||||||
Release();
|
Release();
|
||||||
handle = std::exchange(o.handle, nullptr);
|
handle = std::exchange(o.handle, nullptr);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
explicit operator bool() const noexcept {
|
||||||
|
return handle != 0;
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a new internal OpenGL resource and stores the handle
|
/// Creates a new internal OpenGL resource and stores the handle
|
||||||
void Create();
|
void Create();
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user