renderer_opengl: Rewrite stream buffer
* New implementation is based on Dolphin's MapAndSync and BufferStorage buffers. Replacing orphaning with syncrhonization which should make it much faster than before. * Texture downloads don't use PBO anymore since it didn't offer any speed benefits. Finally a bug was fixed that only affected the glBufferData fallback path and should fix android/gles
This commit is contained in:
@ -174,15 +174,13 @@ private:
|
||||
SurfaceMap dirty_regions;
|
||||
SurfaceSet remove_surfaces;
|
||||
u16 resolution_scale_factor;
|
||||
std::vector<std::function<void()>> download_queue;
|
||||
std::unordered_map<TextureCubeConfig, Surface> texture_cube_cache;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
RasterizerCache<T>::RasterizerCache(Memory::MemorySystem& memory_, TextureRuntime& runtime_)
|
||||
: memory{memory_}, runtime{runtime_} {
|
||||
resolution_scale_factor = VideoCore::GetResolutionScaleFactor();
|
||||
}
|
||||
: memory{memory_}, runtime{runtime_}, resolution_scale_factor{
|
||||
VideoCore::GetResolutionScaleFactor()} {}
|
||||
|
||||
template <class T>
|
||||
template <MatchFlags find_flags>
|
||||
@ -597,13 +595,15 @@ template <class T>
|
||||
auto RasterizerCache<T>::GetTextureCube(const TextureCubeConfig& config) -> const Surface& {
|
||||
auto [it, new_surface] = texture_cube_cache.try_emplace(config);
|
||||
if (new_surface) {
|
||||
SurfaceParams cube_params = {.addr = config.px,
|
||||
.width = config.width,
|
||||
.height = config.width,
|
||||
.stride = config.width,
|
||||
.texture_type = TextureType::CubeMap,
|
||||
.pixel_format = PixelFormatFromTextureFormat(config.format),
|
||||
.type = SurfaceType::Texture};
|
||||
SurfaceParams cube_params = {
|
||||
.addr = config.px,
|
||||
.width = config.width,
|
||||
.height = config.width,
|
||||
.stride = config.width,
|
||||
.texture_type = TextureType::CubeMap,
|
||||
.pixel_format = PixelFormatFromTextureFormat(config.format),
|
||||
.type = SurfaceType::Texture,
|
||||
};
|
||||
|
||||
it->second = CreateSurface(cube_params);
|
||||
}
|
||||
@ -915,6 +915,7 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
|
||||
const auto staging = runtime.FindStaging(
|
||||
load_info.width * load_info.height * surface->GetInternalBytesPerPixel(), true);
|
||||
|
||||
MemoryRef source_ptr = memory.GetPhysicalRef(load_info.addr);
|
||||
if (!source_ptr) [[unlikely]] {
|
||||
return;
|
||||
@ -924,11 +925,12 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
|
||||
const BufferTextureCopy upload = {.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
.texture_rect = surface->GetSubRect(load_info),
|
||||
.texture_level = 0};
|
||||
|
||||
const BufferTextureCopy upload = {
|
||||
.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
.texture_rect = surface->GetSubRect(load_info),
|
||||
.texture_level = 0,
|
||||
};
|
||||
surface->Upload(upload, staging);
|
||||
}
|
||||
|
||||
@ -942,25 +944,25 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
|
||||
const auto staging = runtime.FindStaging(
|
||||
flush_info.width * flush_info.height * surface->GetInternalBytesPerPixel(), false);
|
||||
const BufferTextureCopy download = {.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
.texture_rect = surface->GetSubRect(flush_info),
|
||||
.texture_level = 0};
|
||||
|
||||
const BufferTextureCopy download = {
|
||||
.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
.texture_rect = surface->GetSubRect(flush_info),
|
||||
.texture_level = 0,
|
||||
};
|
||||
surface->Download(download, staging);
|
||||
|
||||
runtime.Finish();
|
||||
|
||||
MemoryRef dest_ptr = memory.GetPhysicalRef(flush_start);
|
||||
if (!dest_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
|
||||
download_queue.push_back([this, surface, flush_start, flush_end, flush_info,
|
||||
mapped = staging.mapped, download_dest]() {
|
||||
EncodeTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
});
|
||||
EncodeTexture(flush_info, flush_start, flush_end, staging.mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
}
|
||||
|
||||
template <class T>
|
||||
@ -1122,17 +1124,6 @@ void RasterizerCache<T>::FlushRegion(PAddr addr, u32 size, Surface flush_surface
|
||||
flushed_intervals += interval;
|
||||
}
|
||||
|
||||
// Batch execute all requested downloads. This gives more time for them to complete
|
||||
// before we issue the CPU to GPU flush and reduces scheduler slot switches in Vulkan
|
||||
if (!download_queue.empty()) {
|
||||
runtime.Finish();
|
||||
for (const auto& download_func : download_queue) {
|
||||
download_func();
|
||||
}
|
||||
|
||||
download_queue.clear();
|
||||
}
|
||||
|
||||
// Reset dirty regions
|
||||
dirty_regions -= flushed_intervals;
|
||||
}
|
||||
|
@ -74,16 +74,16 @@ Driver::Driver(bool gles, bool enable_debug) : is_gles{gles} {
|
||||
if (!gladLoadGL()) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Qualcomm has some spammy info messages that are marked as errors but not important
|
||||
* https://developer.qualcomm.com/comment/11845
|
||||
*/
|
||||
if (!gles) {
|
||||
if (enable_debug) {
|
||||
glEnable(GL_DEBUG_OUTPUT);
|
||||
glDebugMessageCallback(DebugHandler, nullptr);
|
||||
}
|
||||
#endif
|
||||
|
||||
ReportDriverInfo();
|
||||
DeduceVendor();
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||
#include "video_core/renderer_opengl/pica_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@ -66,7 +65,7 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory_, Frontend::EmuW
|
||||
|
||||
// Set vertex attributes for software shader path
|
||||
state.draw.vertex_array = sw_vao.handle;
|
||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||
state.draw.vertex_buffer = vertex_buffer.Handle();
|
||||
state.Apply();
|
||||
|
||||
glVertexAttribPointer(ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex),
|
||||
@ -111,16 +110,16 @@ RasterizerOpenGL::RasterizerOpenGL(Memory::MemorySystem& memory_, Frontend::EmuW
|
||||
state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(TextureUnits::TextureBufferLUT_LF.Enum());
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.GetHandle());
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_lf_buffer.Handle());
|
||||
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle());
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.Handle());
|
||||
glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum());
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle());
|
||||
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.Handle());
|
||||
|
||||
// Bind index buffer for hardware shader path
|
||||
state.draw.vertex_array = hw_vao.handle;
|
||||
state.Apply();
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.Handle());
|
||||
|
||||
glEnable(GL_BLEND);
|
||||
|
||||
@ -166,7 +165,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
|
||||
PAddr base_address = vertex_attributes.GetPhysicalBaseAddress();
|
||||
|
||||
state.draw.vertex_array = hw_vao.handle;
|
||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||
state.draw.vertex_buffer = vertex_buffer.Handle();
|
||||
state.Apply();
|
||||
|
||||
std::array<bool, 16> enable_attributes{};
|
||||
@ -305,7 +304,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) {
|
||||
return false;
|
||||
}
|
||||
|
||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||
state.draw.vertex_buffer = vertex_buffer.Handle();
|
||||
state.Apply();
|
||||
|
||||
u8* buffer_ptr;
|
||||
@ -625,7 +624,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
|
||||
succeeded = AccelerateDrawBatchInternal(is_indexed);
|
||||
} else {
|
||||
state.draw.vertex_array = sw_vao.handle;
|
||||
state.draw.vertex_buffer = vertex_buffer.GetHandle();
|
||||
state.draw.vertex_buffer = vertex_buffer.Handle();
|
||||
shader_program_manager.UseTrivialVertexShader();
|
||||
shader_program_manager.UseTrivialGeometryShader();
|
||||
shader_program_manager.ApplyTo(state);
|
||||
@ -1184,7 +1183,7 @@ void RasterizerOpenGL::SyncAndUploadLUTsLF() {
|
||||
GLintptr offset;
|
||||
bool invalidate;
|
||||
std::size_t bytes_used = 0;
|
||||
glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.GetHandle());
|
||||
glBindBuffer(GL_TEXTURE_BUFFER, texture_lf_buffer.Handle());
|
||||
std::tie(buffer, offset, invalidate) = texture_lf_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||
|
||||
// Sync the lighting luts
|
||||
@ -1254,7 +1253,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
|
||||
GLintptr offset;
|
||||
bool invalidate;
|
||||
std::size_t bytes_used = 0;
|
||||
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
|
||||
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.Handle());
|
||||
std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(Common::Vec4f));
|
||||
|
||||
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
|
||||
@ -1349,7 +1348,7 @@ void RasterizerOpenGL::SyncAndUploadLUTs() {
|
||||
void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) {
|
||||
// glBindBufferRange below also changes the generic buffer binding point, so we sync the state
|
||||
// first
|
||||
state.draw.uniform_buffer = uniform_buffer.GetHandle();
|
||||
state.draw.uniform_buffer = uniform_buffer.Handle();
|
||||
state.Apply();
|
||||
|
||||
bool sync_vs = accelerate_draw;
|
||||
@ -1371,7 +1370,7 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) {
|
||||
vs_uniforms.uniforms.SetFromRegs(Pica::g_state.regs.vs, Pica::g_state.vs);
|
||||
std::memcpy(uniforms + used_bytes, &vs_uniforms, sizeof(vs_uniforms));
|
||||
glBindBufferRange(GL_UNIFORM_BUFFER, static_cast<GLuint>(Pica::Shader::UniformBindings::VS),
|
||||
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(vs_uniforms));
|
||||
uniform_buffer.Handle(), offset + used_bytes, sizeof(vs_uniforms));
|
||||
used_bytes += uniform_size_aligned_vs;
|
||||
}
|
||||
|
||||
@ -1380,7 +1379,7 @@ void RasterizerOpenGL::UploadUniforms(bool accelerate_draw) {
|
||||
sizeof(Pica::Shader::UniformData));
|
||||
glBindBufferRange(
|
||||
GL_UNIFORM_BUFFER, static_cast<GLuint>(Pica::Shader::UniformBindings::Common),
|
||||
uniform_buffer.GetHandle(), offset + used_bytes, sizeof(Pica::Shader::UniformData));
|
||||
uniform_buffer.Handle(), offset + used_bytes, sizeof(Pica::Shader::UniformData));
|
||||
uniform_block_data.dirty = false;
|
||||
used_bytes += uniform_size_aligned_fs;
|
||||
}
|
||||
|
@ -151,11 +151,11 @@ private:
|
||||
|
||||
OGLTexture default_texture;
|
||||
std::array<SamplerInfo, 3> texture_samplers;
|
||||
OGLStreamBuffer vertex_buffer;
|
||||
OGLStreamBuffer uniform_buffer;
|
||||
OGLStreamBuffer index_buffer;
|
||||
OGLStreamBuffer texture_buffer;
|
||||
OGLStreamBuffer texture_lf_buffer;
|
||||
StreamBuffer vertex_buffer;
|
||||
StreamBuffer uniform_buffer;
|
||||
StreamBuffer index_buffer;
|
||||
StreamBuffer texture_buffer;
|
||||
StreamBuffer texture_lf_buffer;
|
||||
OGLFramebuffer framebuffer;
|
||||
GLint uniform_buffer_alignment;
|
||||
std::size_t uniform_size_aligned_vs;
|
||||
|
@ -2,15 +2,13 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OpenGLState OpenGLState::cur_state;
|
||||
OpenGLState OpenGLState::cur_state{};
|
||||
|
||||
OpenGLState::OpenGLState() {
|
||||
// These all match default OpenGL values
|
||||
|
@ -4,96 +4,107 @@
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
MP_RGB(128, 128, 192));
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback_,
|
||||
bool prefer_coherent)
|
||||
: gl_target(target), readback(readback_), buffer_size(size) {
|
||||
StreamBuffer::StreamBuffer(GLenum target, size_t size_)
|
||||
: gl_target{target}, buffer_size{size_}, slot_size{buffer_size / SYNC_POINTS},
|
||||
buffer_storage{bool(GLAD_GL_ARB_buffer_storage)} {
|
||||
for (int i = 0; i < SYNC_POINTS; i++) {
|
||||
fences[i].Create();
|
||||
}
|
||||
|
||||
gl_buffer.Create();
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
|
||||
if (GLAD_GL_ARB_buffer_storage) {
|
||||
persistent = true;
|
||||
coherent = prefer_coherent;
|
||||
GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) | GL_MAP_PERSISTENT_BIT |
|
||||
(coherent ? GL_MAP_COHERENT_BIT : 0);
|
||||
glBufferStorage(gl_target, size, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapBufferRange(gl_target, 0, buffer_size,
|
||||
flags | (!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0)));
|
||||
if (buffer_storage) {
|
||||
glBufferStorage(gl_target, buffer_size, nullptr,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
||||
mapped_ptr =
|
||||
(u8*)glMapBufferRange(gl_target, 0, buffer_size,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
||||
} else {
|
||||
glBufferData(gl_target, size, nullptr, GL_STREAM_DRAW);
|
||||
glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW);
|
||||
}
|
||||
}
|
||||
|
||||
OGLStreamBuffer::~OGLStreamBuffer() {
|
||||
if (persistent) {
|
||||
StreamBuffer::~StreamBuffer() {
|
||||
if (buffer_storage) {
|
||||
glBindBuffer(gl_target, gl_buffer.handle);
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
GLuint OGLStreamBuffer::GetHandle() const {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
GLsizeiptr OGLStreamBuffer::GetSize() const {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= buffer_size);
|
||||
ASSERT(alignment <= buffer_size);
|
||||
std::tuple<u8*, u64, bool> StreamBuffer::Map(u64 size, u64 alignment) {
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
iterator = Common::AlignUp(iterator, alignment);
|
||||
}
|
||||
|
||||
// Insert waiting slots for used memory
|
||||
for (u32 i = Slot(used_iterator); i < Slot(iterator); i++) {
|
||||
fences[i].Create();
|
||||
}
|
||||
used_iterator = iterator;
|
||||
|
||||
// Wait for new slots to end of buffer
|
||||
for (u32 i = Slot(free_iterator) + 1; i <= Slot(iterator + size) && i < SYNC_POINTS; i++) {
|
||||
glClientWaitSync(fences[i].handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
fences[i].Release();
|
||||
}
|
||||
|
||||
// If we allocate a large amount of memory (A), commit a smaller amount, then allocate memory
|
||||
// smaller than allocation A, we will have already waited for these fences in A, but not used
|
||||
// the space. In this case, don't set m_free_iterator to a position before that which we know
|
||||
// is safe to use, which would result in waiting on the same fence(s) next time.
|
||||
if ((iterator + size) > free_iterator) {
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
|
||||
// If buffer is full
|
||||
bool invalidate = false;
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
buffer_pos = 0;
|
||||
if (iterator + size >= buffer_size) {
|
||||
invalidate = true;
|
||||
|
||||
if (persistent) {
|
||||
glUnmapBuffer(gl_target);
|
||||
// Insert waiting slots in unused space at the end of the buffer
|
||||
for (int i = Slot(used_iterator); i < SYNC_POINTS; i++) {
|
||||
fences[i].Create();
|
||||
}
|
||||
|
||||
// Move to the start
|
||||
used_iterator = iterator = 0; // offset 0 is always aligned
|
||||
|
||||
// Wait for space at the start
|
||||
for (int i = 0; i <= Slot(iterator + size); i++) {
|
||||
glClientWaitSync(fences[i].handle, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
fences[i].Release();
|
||||
}
|
||||
free_iterator = iterator + size;
|
||||
}
|
||||
|
||||
if (invalidate || !persistent) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
GLbitfield flags = (readback ? GL_MAP_READ_BIT : GL_MAP_WRITE_BIT) |
|
||||
(persistent ? GL_MAP_PERSISTENT_BIT : 0) |
|
||||
(coherent ? GL_MAP_COHERENT_BIT : 0) |
|
||||
(!coherent && !readback ? GL_MAP_FLUSH_EXPLICIT_BIT : 0) |
|
||||
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
|
||||
mapped_offset = buffer_pos;
|
||||
u8* pointer{};
|
||||
if (buffer_storage) {
|
||||
pointer = mapped_ptr + iterator;
|
||||
} else {
|
||||
pointer = (u8*)glMapBufferRange(gl_target, iterator, size,
|
||||
GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
|
||||
GL_MAP_UNSYNCHRONIZED_BIT);
|
||||
}
|
||||
|
||||
return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
|
||||
return std::make_tuple(pointer, iterator, invalidate);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
ASSERT(size <= mapped_size);
|
||||
void StreamBuffer::Unmap(u64 used_size) {
|
||||
ASSERT_MSG(used_size <= mapped_size, "Reserved size {} is too small compared to {}",
|
||||
mapped_size, used_size);
|
||||
|
||||
if (!coherent && !readback && size > 0) {
|
||||
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
|
||||
}
|
||||
|
||||
if (!persistent) {
|
||||
if (!buffer_storage) {
|
||||
glFlushMappedBufferRange(gl_target, 0, used_size);
|
||||
glUnmapBuffer(gl_target);
|
||||
}
|
||||
|
||||
buffer_pos += size;
|
||||
iterator += used_size;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -8,40 +8,50 @@
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
class StreamBuffer {
|
||||
static constexpr std::size_t SYNC_POINTS = 16;
|
||||
|
||||
public:
|
||||
explicit OGLStreamBuffer(GLenum target, GLsizeiptr size, bool readback = false,
|
||||
bool prefer_coherent = false);
|
||||
~OGLStreamBuffer();
|
||||
StreamBuffer(GLenum target, size_t size);
|
||||
~StreamBuffer();
|
||||
|
||||
GLuint GetHandle() const;
|
||||
GLsizeiptr GetSize() const;
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return gl_buffer.handle;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, the offset within the buffer,
|
||||
* and the invalidation flag for previous chunks.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
return buffer_size;
|
||||
}
|
||||
|
||||
/* This mapping function will return a pair of:
|
||||
* - the pointer to the mapped buffer
|
||||
* - the offset into the real GPU buffer (always multiple of stride)
|
||||
* On mapping, the maximum of size for allocation has to be set.
|
||||
* The size really pushed into this fifo only has to be known on Unmapping.
|
||||
* Mapping invalidates the current buffer content,
|
||||
* so it isn't allowed to access the old content any more.
|
||||
*/
|
||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
std::tuple<u8*, u64, bool> Map(u64 size, u64 alignment = 0);
|
||||
void Unmap(u64 used_size);
|
||||
|
||||
private:
|
||||
OGLBuffer gl_buffer;
|
||||
[[nodiscard]] u64 Slot(u64 offset) noexcept {
|
||||
return offset / slot_size;
|
||||
}
|
||||
|
||||
GLenum gl_target;
|
||||
size_t buffer_size;
|
||||
size_t slot_size;
|
||||
bool buffer_storage{};
|
||||
u8* mapped_ptr{};
|
||||
u64 mapped_size;
|
||||
|
||||
bool readback = false;
|
||||
bool coherent = false;
|
||||
bool persistent = false;
|
||||
u64 iterator = 0;
|
||||
u64 used_iterator = 0;
|
||||
u64 free_iterator = 0;
|
||||
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr buffer_size = 0;
|
||||
GLintptr mapped_offset = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
OGLBuffer gl_buffer;
|
||||
std::array<OGLSync, SYNC_POINTS> fences{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -53,16 +53,15 @@ static constexpr std::array COLOR_TUPLES_OES = {
|
||||
return GL_COLOR_BUFFER_BIT;
|
||||
}
|
||||
|
||||
constexpr u32 UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr u32 DOWNLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr std::size_t UPLOAD_BUFFER_SIZE = 32 * 1024 * 1024;
|
||||
constexpr std::size_t DOWNLOAD_BUFFER_SIZE = 4 * 1024 * 1024;
|
||||
|
||||
TextureRuntime::TextureRuntime(Driver& driver)
|
||||
: driver{driver}, filterer{Settings::values.texture_filter_name.GetValue(),
|
||||
VideoCore::GetResolutionScaleFactor()},
|
||||
upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE}, download_buffer{
|
||||
GL_PIXEL_PACK_BUFFER,
|
||||
DOWNLOAD_BUFFER_SIZE, true} {
|
||||
upload_buffer{GL_PIXEL_UNPACK_BUFFER, UPLOAD_BUFFER_SIZE} {
|
||||
|
||||
download_buffer.resize(DOWNLOAD_BUFFER_SIZE);
|
||||
read_fbo.Create();
|
||||
draw_fbo.Create();
|
||||
|
||||
@ -77,13 +76,22 @@ TextureRuntime::TextureRuntime(Driver& driver)
|
||||
}
|
||||
|
||||
StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
auto& buffer = upload ? upload_buffer : download_buffer;
|
||||
auto [data, offset, invalidate] = buffer.Map(size, 4);
|
||||
if (!upload) {
|
||||
ASSERT_MSG(download_buffer.size() <= size, "Download buffer to small");
|
||||
return StagingData{
|
||||
.size = size,
|
||||
.mapped = std::span{download_buffer.data(), size},
|
||||
.buffer_offset = 0,
|
||||
};
|
||||
}
|
||||
|
||||
return StagingData{.buffer = buffer.GetHandle(),
|
||||
.size = size,
|
||||
.mapped = std::span<u8>{data, size},
|
||||
.buffer_offset = offset};
|
||||
auto [data, offset, invalidate] = upload_buffer.Map(size, 4);
|
||||
return StagingData{
|
||||
.buffer = upload_buffer.Handle(),
|
||||
.size = size,
|
||||
.mapped = std::span{data, size},
|
||||
.buffer_offset = offset,
|
||||
};
|
||||
}
|
||||
|
||||
const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) {
|
||||
@ -333,6 +341,9 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(stride));
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer);
|
||||
|
||||
// Unmap the buffer FindStaging mapped beforehand
|
||||
runtime.upload_buffer.Unmap(staging.size);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, texture.handle);
|
||||
|
||||
@ -343,7 +354,6 @@ void Surface::Upload(const VideoCore::BufferTextureCopy& upload, const StagingDa
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
|
||||
runtime.upload_buffer.Unmap(staging.size);
|
||||
}
|
||||
|
||||
InvalidateAllWatcher();
|
||||
@ -360,7 +370,6 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer);
|
||||
|
||||
const bool is_scaled = res_scale != 1;
|
||||
if (is_scaled) {
|
||||
@ -372,9 +381,7 @@ void Surface::Download(const VideoCore::BufferTextureCopy& download, const Stagi
|
||||
const auto& tuple = runtime.GetFormatTuple(pixel_format);
|
||||
glReadPixels(download.texture_rect.left, download.texture_rect.bottom,
|
||||
download.texture_rect.GetWidth(), download.texture_rect.GetHeight(),
|
||||
tuple.format, tuple.type, reinterpret_cast<void*>(staging.buffer_offset));
|
||||
|
||||
runtime.download_buffer.Unmap(staging.size);
|
||||
tuple.format, tuple.type, staging.mapped.data());
|
||||
}
|
||||
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
@ -393,20 +400,24 @@ void Surface::ScaledUpload(const VideoCore::BufferTextureCopy& upload, const Sta
|
||||
unscaled_params.res_scale = 1;
|
||||
Surface unscaled_surface{unscaled_params, runtime};
|
||||
|
||||
const VideoCore::BufferTextureCopy unscaled_upload = {.buffer_offset = upload.buffer_offset,
|
||||
.buffer_size = upload.buffer_size,
|
||||
.texture_rect = unscaled_rect};
|
||||
const VideoCore::BufferTextureCopy unscaled_upload = {
|
||||
.buffer_offset = upload.buffer_offset,
|
||||
.buffer_size = upload.buffer_size,
|
||||
.texture_rect = unscaled_rect,
|
||||
};
|
||||
|
||||
unscaled_surface.Upload(unscaled_upload, staging);
|
||||
|
||||
const auto& filterer = runtime.GetFilterer();
|
||||
if (!filterer.Filter(unscaled_surface.texture, unscaled_rect, texture, scaled_rect, type)) {
|
||||
const VideoCore::TextureBlit blit = {.src_level = 0,
|
||||
.dst_level = upload.texture_level,
|
||||
.src_layer = 0,
|
||||
.dst_layer = 0,
|
||||
.src_rect = unscaled_rect,
|
||||
.dst_rect = scaled_rect};
|
||||
const VideoCore::TextureBlit blit = {
|
||||
.src_level = 0,
|
||||
.dst_level = upload.texture_level,
|
||||
.src_layer = 0,
|
||||
.dst_layer = 0,
|
||||
.src_rect = unscaled_rect,
|
||||
.dst_rect = scaled_rect,
|
||||
};
|
||||
|
||||
// If filtering fails, resort to normal blitting
|
||||
runtime.BlitTextures(unscaled_surface, *this, blit);
|
||||
@ -428,14 +439,15 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download,
|
||||
unscaled_params.res_scale = 1;
|
||||
Surface unscaled_surface{unscaled_params, runtime};
|
||||
|
||||
const VideoCore::TextureBlit blit = {.src_level = download.texture_level,
|
||||
.dst_level = 0,
|
||||
.src_layer = 0,
|
||||
.dst_layer = 0,
|
||||
.src_rect = scaled_rect,
|
||||
.dst_rect = unscaled_rect};
|
||||
|
||||
// Blit the scaled rectangle to the unscaled texture
|
||||
const VideoCore::TextureBlit blit = {
|
||||
.src_level = download.texture_level,
|
||||
.dst_level = 0,
|
||||
.src_layer = 0,
|
||||
.dst_layer = 0,
|
||||
.src_rect = scaled_rect,
|
||||
.dst_rect = unscaled_rect,
|
||||
};
|
||||
runtime.BlitTextures(*this, unscaled_surface, blit);
|
||||
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
@ -446,13 +458,10 @@ void Surface::ScaledDownload(const VideoCore::BufferTextureCopy& download,
|
||||
runtime.BindFramebuffer(GL_READ_FRAMEBUFFER, 0, GL_TEXTURE_2D, type,
|
||||
unscaled_surface.texture);
|
||||
glReadPixels(0, 0, rect_width, rect_height, tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
staging.mapped.data());
|
||||
} else {
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type,
|
||||
reinterpret_cast<void*>(staging.buffer_offset));
|
||||
glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, staging.mapped.data());
|
||||
}
|
||||
|
||||
runtime.download_buffer.Unmap(staging.size);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -23,7 +23,7 @@ struct StagingData {
|
||||
GLuint buffer;
|
||||
u32 size = 0;
|
||||
std::span<u8> mapped{};
|
||||
GLintptr buffer_offset = 0;
|
||||
u64 buffer_offset = 0;
|
||||
};
|
||||
|
||||
class Driver;
|
||||
@ -46,6 +46,7 @@ public:
|
||||
/// Returns the OpenGL format tuple associated with the provided pixel format
|
||||
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format);
|
||||
|
||||
/// Causes a GPU command flush
|
||||
void Finish() const {}
|
||||
|
||||
/// Allocates an OpenGL texture with the specified dimentions and format
|
||||
@ -92,7 +93,8 @@ private:
|
||||
TextureFilterer filterer;
|
||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||
std::unordered_multimap<VideoCore::HostTextureTag, OGLTexture> texture_recycler;
|
||||
OGLStreamBuffer upload_buffer, download_buffer;
|
||||
StreamBuffer upload_buffer;
|
||||
std::vector<u8> download_buffer;
|
||||
OGLFramebuffer read_fbo, draw_fbo;
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user