diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index d54bcf519..ebfb6b7ac 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -174,15 +174,13 @@ private: SurfaceMap dirty_regions; SurfaceSet remove_surfaces; u16 resolution_scale_factor; - std::vector> download_queue; std::unordered_map texture_cube_cache; }; template RasterizerCache::RasterizerCache(Memory::MemorySystem& memory_, TextureRuntime& runtime_) - : memory{memory_}, runtime{runtime_} { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); -} + : memory{memory_}, runtime{runtime_}, resolution_scale_factor{ + VideoCore::GetResolutionScaleFactor()} {} template template @@ -597,13 +595,15 @@ template auto RasterizerCache::GetTextureCube(const TextureCubeConfig& config) -> const Surface& { auto [it, new_surface] = texture_cube_cache.try_emplace(config); if (new_surface) { - SurfaceParams cube_params = {.addr = config.px, - .width = config.width, - .height = config.width, - .stride = config.width, - .texture_type = TextureType::CubeMap, - .pixel_format = PixelFormatFromTextureFormat(config.format), - .type = SurfaceType::Texture}; + SurfaceParams cube_params = { + .addr = config.px, + .width = config.width, + .height = config.width, + .stride = config.width, + .texture_type = TextureType::CubeMap, + .pixel_format = PixelFormatFromTextureFormat(config.format), + .type = SurfaceType::Texture, + }; it->second = CreateSurface(cube_params); } @@ -915,6 +915,7 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i const auto staging = runtime.FindStaging( load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true); + MemoryRef source_ptr = memory.GetPhysicalRef(load_info.addr); if (!source_ptr) [[unlikely]] { return; @@ -924,11 +925,12 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, runtime.NeedsConvertion(surface->pixel_format)); - const BufferTextureCopy upload = {.buffer_offset = 0, - .buffer_size = staging.size, - .texture_rect = surface->GetSubRect(load_info), - .texture_level = 0}; - + const BufferTextureCopy upload = { + .buffer_offset = 0, + .buffer_size = staging.size, + .texture_rect = surface->GetSubRect(load_info), + .texture_level = 0, + }; surface->Upload(upload, staging); } @@ -942,25 +944,25 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval const auto staging = runtime.FindStaging( flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false); - const BufferTextureCopy download = {.buffer_offset = 0, - .buffer_size = staging.size, - .texture_rect = surface->GetSubRect(flush_info), - .texture_level = 0}; + const BufferTextureCopy download = { + .buffer_offset = 0, + .buffer_size = staging.size, + .texture_rect = surface->GetSubRect(flush_info), + .texture_level = 0, + }; surface->Download(download, staging); + runtime.Finish(); + MemoryRef dest_ptr = memory.GetPhysicalRef(flush_start); if (!dest_ptr) [[unlikely]] { return; } const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); - - download_queue.push_back([this, surface, flush_start, flush_end, flush_info, - mapped = staging.mapped, download_dest]() { - EncodeTexture(flush_info, flush_start, flush_end, mapped, download_dest, - runtime.NeedsConvertion(surface->pixel_format)); - }); + EncodeTexture(flush_info, flush_start, flush_end, staging.mapped, download_dest, + runtime.NeedsConvertion(surface->pixel_format)); } template @@ -1122,17 +1124,6 @@ void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface flushed_intervals += interval; } - // Batch execute all requested downloads. This gives more time for them to complete - // before we issue the CPU to GPU flush and reduces scheduler slot switches in Vulkan - if (!download_queue.empty()) { - runtime.Finish(); - for (const auto& download_func : download_queue) { - download_func(); - } - - download_queue.clear(); - } - // Reset dirty regions dirty_regions -= flushed_intervals; } diff --git a/src/video_core/renderer_opengl/gl_driver.cpp b/src/video_core/renderer_opengl/gl_driver.cpp index 1db851455..423977640 100644 --- a/src/video_core/renderer_opengl/gl_driver.cpp +++ b/src/video_core/renderer_opengl/gl_driver.cpp @@ -74,16 +74,16 @@ Driver::Driver(bool gles, bool enable_debug) : is_gles{gles} { if (!gladLoadGL()) { return; } +#endif /* * Qualcomm has some spammy info messages that are marked as errors but not important * https://developer.qualcomm.com/comment/11845 */ - if (!gles) { + if (enable_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); } -#endif ReportDriverInfo(); DeduceVendor(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e1dfed021..03766cf09 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -15,7 +15,6 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/pica_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/video_core.h" namespace OpenGL { @@ -66,7 +65,7 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory_, Frontend::EmuW // Set vertex attributes for software shader path state.draw.vertex_array = sw_vao.handle; - state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.draw.vertex_buffer = vertex_buffer.Handle(); state.Apply(); glVertexAttribPointer(ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), @@ -111,16 +110,16 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory_, Frontend::EmuW state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle; state.Apply(); glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.Handle()); glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.Handle()); glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum()); - glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.Handle()); // Bind index buffer for hardware shader path state.draw.vertex_array = hw_vao.handle; state.Apply(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.Handle()); glEnable(GL_BLEND); @@ -166,7 +165,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset, PAddr base_address = vertex_attributes.GetPhysicalBaseAddress(); state.draw.vertex_array = hw_vao.handle; - state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.draw.vertex_buffer = vertex_buffer.Handle(); state.Apply(); std::array enable_attributes{}; @@ -305,7 +304,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) { return false; } - state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.draw.vertex_buffer = vertex_buffer.Handle(); state.Apply(); u8* buffer_ptr; @@ -625,7 +624,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { succeeded = AccelerateDrawBatchInternal(is_indexed); } else { state.draw.vertex_array = sw_vao.handle; - state.draw.vertex_buffer = vertex_buffer.GetHandle(); + state.draw.vertex_buffer = vertex_buffer.Handle(); shader_program_manager.UseTrivialVertexShader(); shader_program_manager.UseTrivialGeometryShader(); shader_program_manager.ApplyTo(state); @@ -1184,7 +1183,7 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() { GLintptr offset; bool invalidate; std::size_t bytes_used = 0; - glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle()); + glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.Handle()); std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f)); // Sync the lighting luts @@ -1254,7 +1253,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { GLintptr offset; bool invalidate; std::size_t bytes_used = 0; - glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle()); + glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.Handle()); std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(Common::Vec4f)); // helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap @@ -1349,7 +1348,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() { void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { // glBindBufferRange below also changes the generic buffer binding point, so we sync the state // first - state.draw.uniform_buffer = uniform_buffer.GetHandle(); + state.draw.uniform_buffer = uniform_buffer.Handle(); state.Apply(); bool sync_vs = accelerate_draw; @@ -1371,7 +1370,7 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs); std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms)); glBindBufferRange(GL_UNIFORM_BUFFER, static_cast(Pica::Shader::UniformBindings::VS), - uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms)); + uniform_buffer.Handle(), offset + used_bytes, sizeof(vs_uniforms)); used_bytes += uniform_size_aligned_vs; } @@ -1380,7 +1379,7 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) { sizeof(Pica::Shader::UniformData)); glBindBufferRange( GL_UNIFORM_BUFFER, static_cast(Pica::Shader::UniformBindings::Common), - uniform_buffer.GetHandle(), offset + used_bytes, sizeof(Pica::Shader::UniformData)); + uniform_buffer.Handle(), offset + used_bytes, sizeof(Pica::Shader::UniformData)); uniform_block_data.dirty = false; used_bytes += uniform_size_aligned_fs; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6ff4254af..142accca8 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -151,11 +151,11 @@ private: OGLTexture default_texture; std::array texture_samplers; - OGLStreamBuffer vertex_buffer; - OGLStreamBuffer uniform_buffer; - OGLStreamBuffer index_buffer; - OGLStreamBuffer texture_buffer; - OGLStreamBuffer texture_lf_buffer; + StreamBuffer vertex_buffer; + StreamBuffer uniform_buffer; + StreamBuffer index_buffer; + StreamBuffer texture_buffer; + StreamBuffer texture_lf_buffer; OGLFramebuffer framebuffer; GLint uniform_buffer_alignment; std::size_t uniform_size_aligned_vs; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 89944f80e..519519d97 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -2,15 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include "common/common_funcs.h" -#include "common/logging/log.h" +#include "common/common_types.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_vars.h" namespace OpenGL { -OpenGLState OpenGLState::cur_state; +OpenGLState OpenGLState::cur_state{}; OpenGLState::OpenGLState() { // These all match default OpenGL values diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index d973e0dd2..3dde7ea2b 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -4,96 +4,107 @@ #include "common/alignment.h" #include "common/assert.h" -#include "common/microprofile.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" -MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", - MP_RGB(128, 128, 192)); - namespace OpenGL { -OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback_, - bool prefer_coherent) - : gl_target(target), readback(readback_), buffer_size(size) { +StreamBuffer::StreamBuffer(GLenum target, size_t size_) + : gl_target{target}, buffer_size{size_}, slot_size{buffer_size / SYNC_POINTS}, + buffer_storage{bool(GLAD_GL_ARB_buffer_storage)} { + for (int i = 0; i < SYNC_POINTS; i++) { + fences[i].Create(); + } + gl_buffer.Create(); glBindBuffer(gl_target, gl_buffer.handle); - if (GLAD_GL_ARB_buffer_storage) { - persistent = true; - coherent = prefer_coherent; - GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | GL_MAP_PERSISTENT_BIT | - (coherent ? GL_MAP_COHERENT_BIT : 0); - glBufferStorage(gl_target, size, nullptr, flags); - mapped_ptr = static_cast( - glMapBufferRange(gl_target, 0, buffer_size, - flags | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0))); + if (buffer_storage) { + glBufferStorage(gl_target, buffer_size, nullptr, + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); + mapped_ptr = + (u8*)glMapBufferRange(gl_target, 0, buffer_size, + GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); } else { - glBufferData(gl_target, size, nullptr, GL_STREAM_DRAW); + glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW); } } -OGLStreamBuffer::~OGLStreamBuffer() { - if (persistent) { +StreamBuffer::~StreamBuffer() { + if (buffer_storage) { glBindBuffer(gl_target, gl_buffer.handle); glUnmapBuffer(gl_target); } - gl_buffer.Release(); } -GLuint OGLStreamBuffer::GetHandle() const { - return gl_buffer.handle; -} - -GLsizeiptr OGLStreamBuffer::GetSize() const { - return buffer_size; -} - -std::tuple OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) { - ASSERT(size <= buffer_size); - ASSERT(alignment <= buffer_size); +std::tuple StreamBuffer::Map(u64 size, u64 alignment) { mapped_size = size; if (alignment > 0) { - buffer_pos = Common::AlignUp(buffer_pos, alignment); + iterator = Common::AlignUp(iterator, alignment); } + // Insert waiting slots for used memory + for (u32 i = Slot(used_iterator); i < Slot(iterator); i++) { + fences[i].Create(); + } + used_iterator = iterator; + + // Wait for new slots to end of buffer + for (u32 i = Slot(free_iterator) + 1; i <= Slot(iterator + size) && i < SYNC_POINTS; i++) { + glClientWaitSync(fences[i].handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + fences[i].Release(); + } + + // If we allocate a large amount of memory (A), commit a smaller amount, then allocate memory + // smaller than allocation A, we will have already waited for these fences in A, but not used + // the space. In this case, don't set m_free_iterator to a position before that which we know + // is safe to use, which would result in waiting on the same fence(s) next time. + if ((iterator + size) > free_iterator) { + free_iterator = iterator + size; + } + + // If buffer is full bool invalidate = false; - if (buffer_pos + size > buffer_size) { - buffer_pos = 0; + if (iterator + size >= buffer_size) { invalidate = true; - if (persistent) { - glUnmapBuffer(gl_target); + // Insert waiting slots in unused space at the end of the buffer + for (int i = Slot(used_iterator); i < SYNC_POINTS; i++) { + fences[i].Create(); } + + // Move to the start + used_iterator = iterator = 0; // offset 0 is always aligned + + // Wait for space at the start + for (int i = 0; i <= Slot(iterator + size); i++) { + glClientWaitSync(fences[i].handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); + fences[i].Release(); + } + free_iterator = iterator + size; } - if (invalidate || !persistent) { - MICROPROFILE_SCOPE(OpenGL_StreamBuffer); - GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | - (persistent ? GL_MAP_PERSISTENT_BIT : 0) | - (coherent ? GL_MAP_COHERENT_BIT : 0) | - (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0) | - (invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT); - mapped_ptr = static_cast( - glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags)); - mapped_offset = buffer_pos; + u8* pointer{}; + if (buffer_storage) { + pointer = mapped_ptr + iterator; + } else { + pointer = (u8*)glMapBufferRange(gl_target, iterator, size, + GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT | + GL_MAP_UNSYNCHRONIZED_BIT); } - return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate); + return std::make_tuple(pointer, iterator, invalidate); } -void OGLStreamBuffer::Unmap(GLsizeiptr size) { - ASSERT(size <= mapped_size); +void StreamBuffer::Unmap(u64 used_size) { + ASSERT_MSG(used_size <= mapped_size, "Reserved size {} is too small compared to {}", + mapped_size, used_size); - if (!coherent && !readback && size > 0) { - glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size); - } - - if (!persistent) { + if (!buffer_storage) { + glFlushMappedBufferRange(gl_target, 0, used_size); glUnmapBuffer(gl_target); } - - buffer_pos += size; + iterator += used_size; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h index 5ca27dbd7..8dc2ebefb 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.h +++ b/src/video_core/renderer_opengl/gl_stream_buffer.h @@ -8,40 +8,50 @@ namespace OpenGL { -class OGLStreamBuffer : private NonCopyable { +class StreamBuffer { + static constexpr std::size_t SYNC_POINTS = 16; + public: - explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback = false, - bool prefer_coherent = false); - ~OGLStreamBuffer(); + StreamBuffer(GLenum target, size_t size); + ~StreamBuffer(); - GLuint GetHandle() const; - GLsizeiptr GetSize() const; + [[nodiscard]] GLuint Handle() const noexcept { + return gl_buffer.handle; + } - /* - * Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes - * and the optional alignment requirement. - * If the buffer is full, the whole buffer is reallocated which invalidates old chunks. - * The return values are the pointer to the new chunk, the offset within the buffer, - * and the invalidation flag for previous chunks. - * The actual used size must be specified on unmapping the chunk. + [[nodiscard]] size_t Size() const noexcept { + return buffer_size; + } + + /* This mapping function will return a pair of: + * - the pointer to the mapped buffer + * - the offset into the real GPU buffer (always multiple of stride) + * On mapping, the maximum of size for allocation has to be set. + * The size really pushed into this fifo only has to be known on Unmapping. + * Mapping invalidates the current buffer content, + * so it isn't allowed to access the old content any more. */ - std::tuple Map(GLsizeiptr size, GLintptr alignment = 0); - - void Unmap(GLsizeiptr size); + std::tuple Map(u64 size, u64 alignment = 0); + void Unmap(u64 used_size); private: - OGLBuffer gl_buffer; + [[nodiscard]] u64 Slot(u64 offset) noexcept { + return offset / slot_size; + } + GLenum gl_target; + size_t buffer_size; + size_t slot_size; + bool buffer_storage{}; + u8* mapped_ptr{}; + u64 mapped_size; - bool readback = false; - bool coherent = false; - bool persistent = false; + u64 iterator = 0; + u64 used_iterator = 0; + u64 free_iterator = 0; - GLintptr buffer_pos = 0; - GLsizeiptr buffer_size = 0; - GLintptr mapped_offset = 0; - GLsizeiptr mapped_size = 0; - u8* mapped_ptr = nullptr; + OGLBuffer gl_buffer; + std::array fences{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 6d9e974db..6b2b7fc8a 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -53,16 +53,15 @@ static constexpr std::array COLOR_TUPLES_OES = { return GL_COLOR_BUFFER_BIT; } -constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; -constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr std::size_t UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024; +constexpr std::size_t DOWNLOAD_BUFFER_SIZE = 4 * 1024 * 1024; TextureRuntime::TextureRuntime(Driver& driver) : driver{driver}, filterer{Settings::values.texture_filter_name.GetValue(), VideoCore::GetResolutionScaleFactor()}, - upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE}, download_buffer{ - GL_PIXEL_PACK_BUFFER, - DOWNLOAD_BUFFER_SIZE, true} { + upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE} { + download_buffer.resize(DOWNLOAD_BUFFER_SIZE); read_fbo.Create(); draw_fbo.Create(); @@ -77,13 +76,22 @@ TextureRuntime::TextureRuntime(Driver& driver) } StagingData TextureRuntime::FindStaging(u32 size, bool upload) { - auto& buffer = upload ? upload_buffer : download_buffer; - auto [data, offset, invalidate] = buffer.Map(size, 4); + if (!upload) { + ASSERT_MSG(download_buffer.size() <= size, "Download buffer to small"); + return StagingData{ + .size = size, + .mapped = std::span{download_buffer.data(), size}, + .buffer_offset = 0, + }; + } - return StagingData{.buffer = buffer.GetHandle(), - .size = size, - .mapped = std::span{data, size}, - .buffer_offset = offset}; + auto [data, offset, invalidate] = upload_buffer.Map(size, 4); + return StagingData{ + .buffer = upload_buffer.Handle(), + .size = size, + .mapped = std::span{data, size}, + .buffer_offset = offset, + }; } const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) { @@ -333,6 +341,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer); + // Unmap the buffer FindStaging mapped beforehand + runtime.upload_buffer.Unmap(staging.size); + glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, texture.handle); @@ -343,7 +354,6 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa reinterpret_cast(staging.buffer_offset)); glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - runtime.upload_buffer.Unmap(staging.size); } InvalidateAllWatcher(); @@ -360,7 +370,6 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi SCOPE_EXIT({ prev_state.Apply(); }); glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); - glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer); const bool is_scaled = res_scale != 1; if (is_scaled) { @@ -372,9 +381,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi const auto& tuple = runtime.GetFormatTuple(pixel_format); glReadPixels(download.texture_rect.left, download.texture_rect.bottom, download.texture_rect.GetWidth(), download.texture_rect.GetHeight(), - tuple.format, tuple.type, reinterpret_cast(staging.buffer_offset)); - - runtime.download_buffer.Unmap(staging.size); + tuple.format, tuple.type, staging.mapped.data()); } glPixelStorei(GL_PACK_ROW_LENGTH, 0); @@ -393,20 +400,24 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const Sta unscaled_params.res_scale = 1; Surface unscaled_surface{unscaled_params, runtime}; - const VideoCore::BufferTextureCopy unscaled_upload = {.buffer_offset = upload.buffer_offset, - .buffer_size = upload.buffer_size, - .texture_rect = unscaled_rect}; + const VideoCore::BufferTextureCopy unscaled_upload = { + .buffer_offset = upload.buffer_offset, + .buffer_size = upload.buffer_size, + .texture_rect = unscaled_rect, + }; unscaled_surface.Upload(unscaled_upload, staging); const auto& filterer = runtime.GetFilterer(); if (!filterer.Filter(unscaled_surface.texture, unscaled_rect, texture, scaled_rect, type)) { - const VideoCore::TextureBlit blit = {.src_level = 0, - .dst_level = upload.texture_level, - .src_layer = 0, - .dst_layer = 0, - .src_rect = unscaled_rect, - .dst_rect = scaled_rect}; + const VideoCore::TextureBlit blit = { + .src_level = 0, + .dst_level = upload.texture_level, + .src_layer = 0, + .dst_layer = 0, + .src_rect = unscaled_rect, + .dst_rect = scaled_rect, + }; // If filtering fails, resort to normal blitting runtime.BlitTextures(unscaled_surface, *this, blit); @@ -428,14 +439,15 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, unscaled_params.res_scale = 1; Surface unscaled_surface{unscaled_params, runtime}; - const VideoCore::TextureBlit blit = {.src_level = download.texture_level, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_rect = scaled_rect, - .dst_rect = unscaled_rect}; - // Blit the scaled rectangle to the unscaled texture + const VideoCore::TextureBlit blit = { + .src_level = download.texture_level, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_rect = scaled_rect, + .dst_rect = unscaled_rect, + }; runtime.BlitTextures(*this, unscaled_surface, blit); glActiveTexture(GL_TEXTURE0); @@ -446,13 +458,10 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download, runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type, unscaled_surface.texture); glReadPixels(0, 0, rect_width, rect_height, tuple.format, tuple.type, - reinterpret_cast(staging.buffer_offset)); + staging.mapped.data()); } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - reinterpret_cast(staging.buffer_offset)); + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, staging.mapped.data()); } - - runtime.download_buffer.Unmap(staging.size); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 6e76316ed..ed2a423b7 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -23,7 +23,7 @@ struct StagingData { GLuint buffer; u32 size = 0; std::span mapped{}; - GLintptr buffer_offset = 0; + u64 buffer_offset = 0; }; class Driver; @@ -46,6 +46,7 @@ public: /// Returns the OpenGL format tuple associated with the provided pixel format const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); + /// Causes a GPU command flush void Finish() const {} /// Allocates an OpenGL texture with the specified dimentions and format @@ -92,7 +93,8 @@ private: TextureFilterer filterer; std::array reinterpreters; std::unordered_multimap texture_recycler; - OGLStreamBuffer upload_buffer, download_buffer; + StreamBuffer upload_buffer; + std::vector download_buffer; OGLFramebuffer read_fbo, draw_fbo; };