diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index a09c8a453..8f82f4662 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -908,7 +908,7 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - const auto& staging = runtime.FindStaging( + const auto staging = runtime.FindStaging( load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true); MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(load_info.addr); if (!source_ptr) [[unlikely]] { @@ -939,7 +939,7 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval const u32 flush_end = boost::icl::last_next(interval); ASSERT(flush_start >= surface->addr && flush_end <= surface->end); - const auto& staging = runtime.FindStaging( + const auto staging = runtime.FindStaging( flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false); const BufferTextureCopy download = {.buffer_offset = 0, .buffer_size = staging.size, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 328b93dea..e496e0874 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -33,11 +33,11 @@ static bool IsVendorIntel() { RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver) : driver{driver}, runtime{driver}, res_cache{*this, runtime}, is_amd(IsVendorAmd()), - vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), - uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), - index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), - texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), - texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false) { + vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), + uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), + index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE), + texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE), + texture_lf_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE) { // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 state.clip_distance[0] = true; diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 1ff5ba62d..096d085d5 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -12,32 +12,21 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd, - bool prefer_coherent) +OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback, bool prefer_coherent) : gl_target(target), buffer_size(size) { gl_buffer.Create(); glBindBuffer(gl_target, gl_buffer.handle); - GLsizeiptr allocate_size = size; - if (array_buffer_for_amd) { - // On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer - // read position is near the end and is an out-of-bound access to the vertex buffer. This is - // probably a bug in the driver and is related to the usage of vec3 attributes in the - // vertex array. Doubling the allocation size for the vertex buffer seems to avoid the - // crash. - allocate_size *= 2; - } - if (GLAD_GL_ARB_buffer_storage) { persistent = true; coherent = prefer_coherent; GLbitfield flags = - GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); - glBufferStorage(gl_target, allocate_size, nullptr, flags); + (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0); + glBufferStorage(gl_target, size, nullptr, flags); mapped_ptr = static_cast(glMapBufferRange( - gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT))); + gl_target, 0, buffer_size, flags | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0))); } else { - glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW); + glBufferData(gl_target, size, nullptr, GL_STREAM_DRAW); } } @@ -78,8 +67,8 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a if (invalidate || !persistent) { MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | - (coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) | + GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | (persistent ? GL_MAP_PERSISTENT_BIT : 0) | + (coherent ? GL_MAP_COHERENT_BIT : 0) | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0) | (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); mapped_ptr = static_cast( glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); @@ -92,7 +81,7 @@ std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a void OGLStreamBuffer::Unmap(GLsizeiptr size) { ASSERT(size <= mapped_size); - if (!coherent && size > 0) { + if (!coherent && !readback && size > 0) { glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); } diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 1a2853198..53d259a04 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -10,8 +10,7 @@ namespace OpenGL { class OGLStreamBuffer : private NonCopyable { public: - explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool array_buffer_for_amd, - bool prefer_coherent = false); + explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback = false, bool prefer_coherent = false); ~OGLStreamBuffer(); GLuint GetHandle() const; @@ -33,6 +32,7 @@ private: OGLBuffer gl_buffer; GLenum gl_target; + bool readback = false; bool coherent = false; bool persistent = false; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 4d28a2015..a49f82f76 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -36,7 +36,7 @@ static constexpr std::array COLOR_TUPLES_OES = { FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 }; -GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { +[[nodiscard]] GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { switch (type) { case VideoCore::SurfaceType::Color: case VideoCore::SurfaceType::Texture: @@ -53,9 +53,13 @@ GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { return GL_COLOR_BUFFER_BIT; } +constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; + TextureRuntime::TextureRuntime(Driver& driver) - : driver{driver}, downloader_es{false}, filterer{Settings::values.texture_filter_name, - VideoCore::GetResolutionScaleFactor()} { + : driver{driver}, filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()}, + downloader_es{false}, upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE}, + download_buffer{GL_PIXEL_PACK_BUFFER, DOWNLOAD_BUFFER_SIZE, true} { read_fbo.Create(); draw_fbo.Create(); @@ -70,51 +74,14 @@ TextureRuntime::TextureRuntime(Driver& driver) Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); } -const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { - const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER; - const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT; - auto& search = upload ? upload_buffers : download_buffers; +StagingData TextureRuntime::FindStaging(u32 size, bool upload) { + auto& buffer = upload ? upload_buffer : download_buffer; + auto [data, offset, invalidate] = buffer.Map(size, 4); - // Attempt to find a free buffer that fits the requested data - for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) { - if (!upload || it->IsFree()) { - it->mapped = std::span{it->mapped.data(), size}; - return *it; - } - } - - OGLBuffer buffer{}; - buffer.Create(); - - glBindBuffer(target, buffer.handle); - - // Allocate a new buffer and map the data to the host - std::byte* data = nullptr; - if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) { - const GLbitfield storage = - upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT; - glBufferStorageEXT(target, size, nullptr, - storage | GL_MAP_PERSISTENT_BIT_EXT | GL_MAP_COHERENT_BIT_EXT); - data = reinterpret_cast(glMapBufferRange( - target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | GL_MAP_COHERENT_BIT_EXT)); - } else if (driver.HasArbBufferStorage()) { - const GLbitfield storage = - upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT; - glBufferStorage(target, size, nullptr, - storage | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); - data = reinterpret_cast(glMapBufferRange( - target, 0, size, access | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT)); - } else { - UNIMPLEMENTED(); - } - - glBindBuffer(target, 0); - - StagingBuffer staging = { - .buffer = std::move(buffer), .mapped = std::span{data, size}, .size = size}; - - const auto& it = search.emplace(std::move(staging)); - return *it; + return StagingData{.buffer = buffer.GetHandle(), + .size = size, + .mapped = std::span{reinterpret_cast(data), size}, + .buffer_offset = offset}; } const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) { @@ -371,41 +338,40 @@ Surface::~Surface() { } MICROPROFILE_DEFINE(OpenGL_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging) { +void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging) { MICROPROFILE_SCOPE(OpenGL_Upload); // Ensure no bad interactions with GL_UNPACK_ALIGNMENT ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); - const bool is_scaled = res_scale != 1; if (is_scaled) { ScaledUpload(upload, staging); } else { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer); + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, texture.handle); const auto& tuple = runtime.GetFormatTuple(pixel_format); glTexSubImage2D(GL_TEXTURE_2D, upload.texture_level, upload.texture_rect.left, upload.texture_rect.bottom, upload.texture_rect.GetWidth(), - upload.texture_rect.GetHeight(), tuple.format, tuple.type, 0); + upload.texture_rect.GetHeight(), tuple.format, tuple.type, + reinterpret_cast(staging.buffer_offset)); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + runtime.upload_buffer.Unmap(staging.size); } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - - // Lock the staging buffer until glTexSubImage completes - staging.Lock(); InvalidateAllWatcher(); } MICROPROFILE_DEFINE(OpenGL_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 64)); -void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging) { +void Surface::Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging) { MICROPROFILE_SCOPE(OpenGL_Download); // Ensure no bad interactions with GL_PACK_ALIGNMENT @@ -415,11 +381,11 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi SCOPE_EXIT({ prev_state.Apply(); }); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); - glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle); + glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer); const bool is_scaled = res_scale != 1; if (is_scaled) { - ScaledDownload(download); + ScaledDownload(download, staging); } else { runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, download.texture_level, GL_TEXTURE_2D, type, texture); @@ -427,15 +393,17 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi const auto& tuple = runtime.GetFormatTuple(pixel_format); glReadPixels(download.texture_rect.left, download.texture_rect.bottom, download.texture_rect.GetWidth(), download.texture_rect.GetHeight(), - tuple.format, tuple.type, 0); + tuple.format, tuple.type, + reinterpret_cast(staging.buffer_offset)); + + runtime.download_buffer.Unmap(staging.size); } - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); } void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, - const StagingBuffer& staging) { + const StagingData& staging) { const u32 rect_width = upload.texture_rect.GetWidth(); const u32 rect_height = upload.texture_rect.GetHeight(); const auto scaled_rect = upload.texture_rect * res_scale; @@ -468,7 +436,7 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, } } -void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { +void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging) { const u32 rect_width = download.texture_rect.GetWidth(); const u32 rect_height = download.texture_rect.GetHeight(); const VideoCore::Rect2D scaled_rect = download.texture_rect * res_scale; @@ -498,11 +466,14 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download) { const auto& tuple = runtime.GetFormatTuple(pixel_format); if (driver.IsOpenGLES()) { const auto& downloader_es = runtime.GetDownloaderES(); - downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect_height, - rect_width, 0); + downloader_es.GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, rect_height, rect_width, + reinterpret_cast(staging.buffer_offset)); } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, 0); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + reinterpret_cast(staging.buffer_offset)); } + + runtime.download_buffer.Unmap(staging.size); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index ea46cb4b3..eb89c164e 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -8,6 +8,7 @@ #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/surface_base.h" #include "video_core/renderer_opengl/gl_format_reinterpreter.h" +#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/texture_downloader_es.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h" @@ -19,35 +20,11 @@ struct FormatTuple { GLenum type; }; -struct StagingBuffer { - OGLBuffer buffer{}; - mutable OGLSync buffer_lock{}; - mutable std::span mapped{}; - u32 size{}; - - bool operator<(const StagingBuffer& other) const { - return size < other.size; - } - - /// Returns true if the buffer does not take part in pending transfer operations - bool IsFree() const { - if (buffer_lock) { - GLint status; - glGetSynciv(buffer_lock.handle, GL_SYNC_STATUS, 1, nullptr, &status); - return status == GL_SIGNALED; - } - - return true; - } - - /// Prevents the runtime from reusing the buffer until the transfer operation is complete - void Lock() const { - if (buffer_lock) { - buffer_lock.Release(); - } - - buffer_lock.Create(); - } +struct StagingData { + GLuint buffer; + u32 size = 0; + std::span mapped{}; + GLintptr buffer_offset = 0; }; class Driver; @@ -65,7 +42,7 @@ public: ~TextureRuntime() = default; /// Maps an internal staging buffer of the provided size of pixel uploads/downloads - const StagingBuffer& FindStaging(u32 size, bool upload); + StagingData FindStaging(u32 size, bool upload); /// Returns the OpenGL format tuple associated with the provided pixel format const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); @@ -122,17 +99,12 @@ private: private: Driver& driver; - TextureDownloaderES downloader_es; TextureFilterer filterer; + TextureDownloaderES downloader_es; std::array reinterpreters; - - // Staging buffers stored in increasing size - std::multiset upload_buffers; - std::multiset download_buffers; - OGLFramebuffer read_fbo, draw_fbo; - - // Recycled textures to reduce driver allocation overhead std::unordered_multimap texture_recycler; + OGLStreamBuffer upload_buffer, download_buffer; + OGLFramebuffer read_fbo, draw_fbo; }; class Surface : public VideoCore::SurfaceBase { @@ -141,10 +113,10 @@ public: ~Surface() override; /// Uploads pixel data in staging to a rectangle region of the surface texture - void Upload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging); + void Upload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); /// Downloads pixel data to staging from a rectangle region of the surface texture - void Download(const VideoCore::BufferTextureCopy& download, const StagingBuffer& staging); + void Download(const VideoCore::BufferTextureCopy& download, const StagingData& staging); /// Returns the bpp of the internal surface format u32 GetInternalBytesPerPixel() const { @@ -153,10 +125,10 @@ public: private: /// Uploads pixel data to scaled texture - void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingBuffer& staging); + void ScaledUpload(const VideoCore::BufferTextureCopy& upload, const StagingData& staging); /// Downloads scaled image by downscaling the requested rectangle - void ScaledDownload(const VideoCore::BufferTextureCopy& download); + void ScaledDownload(const VideoCore::BufferTextureCopy& download, const StagingData& staging); private: TextureRuntime& runtime;