From 78be1e7c17b598c43989223aeaf498c516a35e02 Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Sun, 11 Sep 2022 17:09:48 +0300 Subject: [PATCH] rasterizer_cache: Use PBO staging buffer cache for texture uploads/downloads --- src/citra_qt/bootmanager.cpp | 1 - .../rasterizer_cache/cached_surface.cpp | 114 +++------------- .../rasterizer_cache/cached_surface.h | 10 +- .../rasterizer_cache/morton_swizzle.h | 4 +- .../rasterizer_cache/rasterizer_cache.cpp | 124 +++++++++++++++--- .../rasterizer_cache/rasterizer_cache.h | 12 +- .../rasterizer_cache/texture_runtime.cpp | 47 +++---- .../rasterizer_cache/texture_runtime.h | 14 +- src/video_core/renderer_opengl/gl_driver.cpp | 2 +- src/video_core/texture/texture_decode.cpp | 7 +- 10 files changed, 178 insertions(+), 157 deletions(-) diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp index 43bd98b57..0bd461808 100644 --- a/src/citra_qt/bootmanager.cpp +++ b/src/citra_qt/bootmanager.cpp @@ -116,7 +116,6 @@ public: QSurfaceFormat format; format.setVersion(4, 4); format.setProfile(QSurfaceFormat::CoreProfile); - format.setOption(QSurfaceFormat::DebugContext); // TODO: expose a setting for buffer value (ie default/single/double/triple) format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior); format.setSwapInterval(0); diff --git a/src/video_core/rasterizer_cache/cached_surface.cpp b/src/video_core/rasterizer_cache/cached_surface.cpp index 67edfd930..201fa5a09 100644 --- a/src/video_core/rasterizer_cache/cached_surface.cpp +++ b/src/video_core/rasterizer_cache/cached_surface.cpp @@ -4,14 +4,12 @@ #include "common/microprofile.h" #include "common/scope_exit.h" -#include "core/memory.h" #include "video_core/rasterizer_cache/cached_surface.h" #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/texture_downloader_es.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h" -#include "video_core/video_core.h" namespace OpenGL { @@ -22,89 +20,8 @@ CachedSurface::~CachedSurface() { } } -MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) { - DEBUG_ASSERT(load_start >= addr && load_end <= end); - - auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start); - if (!source_ptr) [[unlikely]] { - return; - } - - const auto start_offset = load_start - addr; - const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); - const auto upload_size = static_cast(upload_data.size()); - - if (gl_buffer.empty()) { - gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format)); - } - - MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - - if (!is_tiled) { - ASSERT(type == SurfaceType::Color); - - const auto dest_buffer = std::span{gl_buffer.begin() + start_offset, upload_size}; - if (pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); - } else if (pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); - } else { - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); - } - } else { - UnswizzleTexture(*this, start_offset, upload_data, gl_buffer); - } -} - -MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) { - DEBUG_ASSERT(flush_start >= addr && flush_end <= end); - - auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); - if (!dest_ptr) [[unlikely]] { - return; - } - - const auto start_offset = flush_start - addr; - const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); - const auto download_size = static_cast(download_dest.size()); - - MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); - - if (type == SurfaceType::Fill) { - const u32 coarse_start_offset = start_offset - (start_offset % fill_size); - const u32 backup_bytes = start_offset % fill_size; - std::array backup_data; - if (backup_bytes) { - std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); - } - - for (u32 offset = coarse_start_offset; offset < download_size; offset += fill_size) { - std::memcpy(&dest_ptr[offset], &fill_data[0], - std::min(fill_size, download_size - offset)); - } - - if (backup_bytes) - std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!is_tiled) { - ASSERT(type == SurfaceType::Color); - - const auto download_data = std::span{gl_buffer.begin() + start_offset, download_size}; - if (pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_data); - } else if (pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(gl_buffer, download_data); - } else { - std::memcpy(download_dest.data(), download_data.data(), download_size); - } - } else { - SwizzleTexture(*this, start_offset, gl_buffer, download_dest); - } -} - MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(Common::Rectangle rect) { +void CachedSurface::UploadTexture(Common::Rectangle rect, const StagingBuffer& staging) { MICROPROFILE_SCOPE(RasterizerCache_TextureUL); // Load data from memory to the surface @@ -136,17 +53,23 @@ void CachedSurface::UploadGLTexture(Common::Rectangle rect) { ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); - glActiveTexture(GL_TEXTURE0); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); + glActiveTexture(GL_TEXTURE0); glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), static_cast(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[buffer_offset]); + reinterpret_cast(buffer_offset)); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + staging.Lock(); cur_state.texture_units[0].texture_2d = old_tex; cur_state.Apply(); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + if (res_scale != 1) { auto scaled_rect = rect; scaled_rect.left *= res_scale; @@ -178,22 +101,17 @@ void CachedSurface::UploadGLTexture(Common::Rectangle rect) { } MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect) { +void CachedSurface::DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging) { MICROPROFILE_SCOPE(RasterizerCache_TextureDL); OpenGLState state = OpenGLState::GetCurState(); OpenGLState prev_state = state; SCOPE_EXIT({ prev_state.Apply(); }); - const u32 download_size = width * height * GetBytesPerPixel(pixel_format); - - if (gl_buffer.empty()) { - gl_buffer.resize(download_size); - } - // Ensure no bad interactions with GL_PACK_ALIGNMENT ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle); const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush @@ -232,11 +150,12 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect) { if (GLES) { owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect.GetHeight(), rect.GetWidth(), - &gl_buffer[buffer_offset]); + reinterpret_cast(buffer_offset)); } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, &gl_buffer[buffer_offset]); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); } } else { + const u32 download_size = width * height * GetBytesPerPixel(pixel_format); const BufferTextureCopy texture_download = { .buffer_offset = buffer_offset, .buffer_size = download_size, @@ -248,9 +167,10 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle& rect) { .texture_extent = {rect.GetWidth(), rect.GetHeight()} }; - runtime.ReadTexture(texture, texture_download, pixel_format, gl_buffer); + runtime.ReadTexture(texture, texture_download, pixel_format, staging.mapped); } + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); } diff --git a/src/video_core/rasterizer_cache/cached_surface.h b/src/video_core/rasterizer_cache/cached_surface.h index 092fcef49..66152b52a 100644 --- a/src/video_core/rasterizer_cache/cached_surface.h +++ b/src/video_core/rasterizer_cache/cached_surface.h @@ -44,6 +44,7 @@ private: }; class RasterizerCache; +class StagingBuffer; class CachedSurface : public SurfaceParams, public std::enable_shared_from_this { public: @@ -51,13 +52,9 @@ public: : SurfaceParams(params), owner(owner), runtime(runtime) {} ~CachedSurface(); - /// Read/Write data in 3DS memory to/from gl_buffer - void LoadGLBuffer(PAddr load_start, PAddr load_end); - void FlushGLBuffer(PAddr flush_start, PAddr flush_end); - /// Upload/Download data in gl_buffer in/to this surface's texture - void UploadGLTexture(Common::Rectangle rect); - void DownloadGLTexture(const Common::Rectangle& rect); + void UploadTexture(Common::Rectangle rect, const StagingBuffer& staging); + void DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging); bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; @@ -100,7 +97,6 @@ public: public: bool registered = false; SurfaceRegions invalid_regions; - std::vector gl_buffer; // Number of bytes to read from fill_data u32 fill_size = 0; diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index bb4d72479..01a87317a 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -203,7 +203,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, // the tile affected to a temporary buffer and copy the part we are interested in if (start_offset < aligned_start_offset && !morton_to_linear) { std::array tmp_buf; - auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); + auto linear_data = linear_buffer.last(linear_buffer.size_bytes() - linear_offset); MortonCopyTile(stride, tmp_buf, linear_data); std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset, @@ -215,7 +215,7 @@ static void MortonCopy(u32 stride, u32 height, u32 start_offset, const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset; while (tiled_offset < buffer_end) { - auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size); + auto linear_data = linear_buffer.last(linear_buffer.size_bytes() - linear_offset); auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); MortonCopyTile(stride, tiled_data, linear_data); tiled_offset += tile_size; diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.cpp b/src/video_core/rasterizer_cache/rasterizer_cache.cpp index 4dc6f662a..6af6e672e 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.cpp +++ b/src/video_core/rasterizer_cache/rasterizer_cache.cpp @@ -11,6 +11,7 @@ #include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_format_reinterpreter.h" +#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/texture_downloader_es.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h" @@ -730,11 +731,11 @@ void RasterizerCache::DuplicateSurface(const Surface& src_surface, } void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { - if (size == 0) + if (size == 0) [[unlikely]] { return; + } const SurfaceInterval validate_interval(addr, addr + size); - if (surface->type == SurfaceType::Fill) { // Sanity check, fill surfaces will always be valid when used ASSERT(surface->IsRegionValid(validate_interval)); @@ -742,15 +743,16 @@ void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 si } auto validate_regions = surface->invalid_regions & validate_interval; - auto notify_validated = [&](SurfaceInterval interval) { + auto NotifyValidated = [&](SurfaceInterval interval) { surface->invalid_regions.erase(interval); validate_regions.erase(interval); }; while (true) { const auto it = validate_regions.begin(); - if (it == validate_regions.end()) + if (it == validate_regions.end()) { break; + } const auto interval = *it & validate_interval; // Look for a valid surface to copy from @@ -761,14 +763,14 @@ void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 si if (copy_surface != nullptr) { SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); CopySurface(copy_surface, surface, copy_interval); - notify_validated(copy_interval); + NotifyValidated(copy_interval); continue; } // Try to find surface in cache with different format // that can can be reinterpreted to the requested format. if (ValidateByReinterpretation(surface, params, interval)) { - notify_validated(interval); + NotifyValidated(interval); continue; } // Could not find a matching reinterpreter, check if we need to implement a @@ -788,12 +790,105 @@ void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 si // Load data from 3DS memory FlushRegion(params.addr, params.size); - surface->LoadGLBuffer(params.addr, params.end); - surface->UploadGLTexture(surface->GetSubRect(params)); - notify_validated(params.GetInterval()); + UploadSurface(surface, interval); + NotifyValidated(params.GetInterval()); } } +MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Load", MP_RGB(128, 192, 64)); +void RasterizerCache::UploadSurface(const Surface& surface, const SurfaceInterval& interval) { + const SurfaceParams info = surface->FromInterval(interval); + const u32 load_start = info.addr; + const u32 load_end = info.end; + ASSERT(load_start >= surface->addr && load_end <= surface->end); + + const StagingBuffer& staging = runtime.FindStaging( + surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true); + auto source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); + if (!source_ptr) [[unlikely]] { + return; + } + + const auto start_offset = load_start - surface->addr; + const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); + const auto upload_size = static_cast(upload_data.size()); + + MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); + + if (!surface->is_tiled) { + ASSERT(surface->type == SurfaceType::Color); + + const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); + if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { + Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); + } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { + Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); + } else { + std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + } + } else { + UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); + } + + surface->UploadTexture(surface->GetSubRect(info), staging); +} + +MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64)); +void RasterizerCache::DownloadSurface(const Surface& surface, const SurfaceInterval& interval) { + const u32 flush_start = boost::icl::first(interval); + const u32 flush_end = boost::icl::last_next(interval); + ASSERT(flush_start >= surface->addr && flush_end <= surface->end); + + const StagingBuffer& staging = runtime.FindStaging( + surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadTexture(surface->GetSubRect(params), staging); + } + + auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); + if (!dest_ptr) [[unlikely]] { + return; + } + + const auto start_offset = flush_start - surface->addr; + const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); + const auto download_size = static_cast(download_dest.size()); + + MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); + + if (surface->type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % surface->fill_size); + const u32 backup_bytes = start_offset % surface->fill_size; + std::array backup_data; + if (backup_bytes) { + std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); + } + + for (u32 offset = coarse_start_offset; offset < download_size; offset += surface->fill_size) { + std::memcpy(&dest_ptr[offset], &surface->fill_data[0], + std::min(surface->fill_size, download_size - offset)); + } + + if (backup_bytes) + std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!surface->is_tiled) { + ASSERT(surface->type == SurfaceType::Color); + + const auto download_data = staging.mapped.subspan(start_offset, download_size); + if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { + Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); + } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { + Pica::Texture::ConvertBGRToRGB(download_data, download_dest); + } else { + std::memcpy(download_dest.data(), download_data.data(), download_size); + } + } else { + SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); + } +} + + bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams& params, const SurfaceInterval& interval) { @@ -903,8 +998,9 @@ bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { std::lock_guard lock{mutex}; - if (size == 0) + if (size == 0) [[unlikely]] { return; + } const SurfaceInterval flush_interval(addr, addr + size); SurfaceRegions flushed_intervals; @@ -922,14 +1018,10 @@ void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { // Sanity check, this surface is the last one that marked this region dirty ASSERT(surface->IsRegionValid(interval)); - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadGLTexture(surface->GetSubRect(params)); - } - - surface->FlushGLBuffer(boost::icl::first(interval), boost::icl::last_next(interval)); + DownloadSurface(surface, interval); flushed_intervals += interval; } + // Reset dirty regions dirty_regions -= flushed_intervals; } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 152f2529b..273f4e1a9 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -102,15 +102,21 @@ private: /// Update surface's texture for given region when necessary void ValidateSurface(const Surface& surface, PAddr addr, u32 size); - // Returns false if there is a surface in the cache at the interval with the same bit-width, + /// Copies pixel data in interval from the guest VRAM to the host GPU surface + void UploadSurface(const Surface& surface, const SurfaceInterval& interval); + + /// Copies pixel data in interval from the host GPU surface to the guest VRAM + void DownloadSurface(const Surface& surface, const SurfaceInterval& interval); + + /// Returns false if there is a surface in the cache at the interval with the same bit-width, bool NoUnimplementedReinterpretations(const OpenGL::Surface& surface, OpenGL::SurfaceParams& params, const OpenGL::SurfaceInterval& interval); - // Return true if a surface with an invalid pixel format exists at the interval + /// Return true if a surface with an invalid pixel format exists at the interval bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); - // Attempt to find a reinterpretable surface in the cache and use it to copy for validation + /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, const SurfaceInterval& interval); diff --git a/src/video_core/rasterizer_cache/texture_runtime.cpp b/src/video_core/rasterizer_cache/texture_runtime.cpp index 50b8f8de0..588515a0e 100644 --- a/src/video_core/rasterizer_cache/texture_runtime.cpp +++ b/src/video_core/rasterizer_cache/texture_runtime.cpp @@ -212,46 +212,49 @@ void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) { } const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { + const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER; const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT; auto& search = upload ? upload_buffers : download_buffers; - const StagingBuffer key = { - .size = size - }; - - for (auto it = search.lower_bound(key); it != search.end(); it++) { - // Attempt to find a free buffer that fits the requested data - if (it->IsFree()) { + // Attempt to find a free buffer that fits the requested data + for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) { + if (!upload || it->IsFree()) { return *it; } } - StagingBuffer staging{}; - staging.buffer.Create(); + OGLBuffer buffer{}; + buffer.Create(); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); + glBindBuffer(target, buffer.handle); // Allocate a new buffer and map the data to the host - void* data = nullptr; + std::byte* data = nullptr; if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) { const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT; - glBufferStorageEXT(GL_PIXEL_UNPACK_BUFFER, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT | - GL_MAP_COHERENT_BIT_EXT); - data = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | - GL_MAP_COHERENT_BIT_EXT); + glBufferStorageEXT(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT | + GL_MAP_COHERENT_BIT_EXT); + data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | + GL_MAP_COHERENT_BIT_EXT)); } else if (driver.HasArbBufferStorage()) { const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT; - glBufferStorage(GL_PIXEL_UNPACK_BUFFER, size, nullptr, storage | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT); - data = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, access | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT); + glBufferStorage(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT); + data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT)); } else { UNIMPLEMENTED(); } - // Insert it to the cache and return the memory - staging.mapped = std::span{reinterpret_cast(data), size}; - const auto& it = search.insert(std::move(staging)); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + StagingBuffer staging = { + .buffer = std::move(buffer), + .mapped = std::span{data, size}, + .size = size + }; + + const auto& it = search.emplace(std::move(staging)); return *it; } diff --git a/src/video_core/rasterizer_cache/texture_runtime.h b/src/video_core/rasterizer_cache/texture_runtime.h index ab8dfca84..d7758cef1 100644 --- a/src/video_core/rasterizer_cache/texture_runtime.h +++ b/src/video_core/rasterizer_cache/texture_runtime.h @@ -13,10 +13,10 @@ namespace OpenGL { struct FormatTuple; struct StagingBuffer { - u32 size = 0; - std::span mapped{}; OGLBuffer buffer{}; mutable OGLSync buffer_lock{}; + std::span mapped{}; + u32 size{}; bool operator<(const StagingBuffer& other) const { return size < other.size; @@ -24,9 +24,13 @@ struct StagingBuffer { /// Returns true if the buffer does not take part in pending transfer operations bool IsFree() const { - GLint status; - glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status); - return status == GL_SIGNALED; + if (buffer_lock) { + GLint status; + glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status); + return status == GL_SIGNALED; + } + + return true; } /// Prevents the runtime from reusing the buffer until the transfer operation is complete diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 6d87916d5..9dd6cbb44 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -79,7 +79,7 @@ Driver::Driver(bool gles) : is_gles{gles} { * Qualcomm has some spammy info messages that are marked as errors but not important * https://developer.qualcomm.com/comment/11845 */ - glEnable(GL_DEBUG_OUTPUT); + glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glDebugMessageCallback(DebugHandler, nullptr); #endif diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp index bce17939b..940747b4d 100644 --- a/src/video_core/texture/texture_decode.cpp +++ b/src/video_core/texture/texture_decode.cpp @@ -224,9 +224,10 @@ TextureInfo TextureInfo::FromPicaRegister(const TexturingRegs::TextureConfig& co void ConvertBGRToRGB(std::span source, std::span dest) { for (std::size_t i = 0; i < source.size(); i += 3) { - dest[i] = source[i + 2]; - dest[i + 1] = source[i + 1]; - dest[i + 2] = source[i]; + u32 bgr{}; + std::memcpy(&bgr, source.data() + i, 3); + const u32 rgb = std::byteswap(bgr << 8); + std::memcpy(dest.data(), &rgb, 3); } }