rasterizer_cache: Add staging buffer cache for uploads/downloads

*  In addition bump context version to 4.4 to enforce ARB_buffer_storage and use EXT_buffer_storage for GLES which is support on many mobile devices
This commit is contained in:
emufan4568
2022-09-09 11:14:51 +03:00
committed by GPUCode
parent 68ca206d53
commit 553c85456e
15 changed files with 224 additions and 3127 deletions

View File

@@ -116,7 +116,7 @@ public:
/// Create the original context that should be shared from
explicit OpenGLSharedContext(QSurface* surface) : surface(surface) {
QSurfaceFormat format;
format.setVersion(4, 3);
format.setVersion(4, 4);
format.setProfile(QSurfaceFormat::CoreProfile);
// TODO: expose a setting for buffer value (ie default/single/double/triple)
format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);

View File

@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <bit>
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/texture.h"
@@ -152,8 +153,9 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
}
bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
const auto& image_interface = Core::System::GetInstance().GetImageInterface();
Core::System& system = Core::System::GetInstance();
auto& custom_tex_cache = system.CustomTexCache();
const auto& image_interface = system.GetImageInterface();
if (custom_tex_cache.IsTextureCached(tex_hash)) {
custom_tex_info = custom_tex_cache.LookupTexture(tex_hash);
@@ -171,9 +173,7 @@ bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
return false;
}
const std::bitset<32> width_bits(custom_tex_info.width);
const std::bitset<32> height_bits(custom_tex_info.height);
if (width_bits.count() != 1 || height_bits.count() != 1) {
if (std::popcount(custom_tex_info.width) != 1 || std::popcount(custom_tex_info.height) != 1) {
LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path);
return false;
}
@@ -188,20 +188,19 @@ bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
// Make sure the texture size is a power of 2
// If not, the surface is actually a framebuffer
std::bitset<32> width_bits(width);
std::bitset<32> height_bits(height);
if (width_bits.count() != 1 || height_bits.count() != 1) {
if (std::popcount(width) != 1 || std::popcount(height) != 1) {
LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})",
tex_hash, width, height);
return;
}
// Dump texture to RGBA8 and encode as PNG
const auto& image_interface = Core::System::GetInstance().GetImageInterface();
auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
Core::System& system = Core::System::GetInstance();
const auto& image_interface = system.GetImageInterface();
auto& custom_tex_cache = system.CustomTexCache();
std::string dump_path =
fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir),
Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id);
system.Kernel().GetCurrentProcess()->codeset->program_id);
if (!FileUtil::CreateFullPath(dump_path)) {
LOG_ERROR(Render, "Unable to create {}", dump_path);
return;
@@ -269,7 +268,7 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
// surface
OGLTexture unscaled_tex;
if (res_scale != 1) {
if (IsScaled()) {
x0 = 0;
y0 = 0;
@@ -361,37 +360,32 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download",
MP_RGB(128, 192, 64));
void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) {
if (type == SurfaceType::Fill) {
return;
}
MICROPROFILE_SCOPE(RasterizerCache_TextureDL);
OpenGLState state = OpenGLState::GetCurState();
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
const u32 download_size = width * height * GetBytesPerPixel(pixel_format);
if (gl_buffer.empty()) {
gl_buffer.resize(download_size);
}
OpenGLState state = OpenGLState::GetCurState();
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format);
// If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
if (res_scale != 1) {
if (IsScaled()) {
auto scaled_rect = rect;
scaled_rect.left *= res_scale;
scaled_rect.top *= res_scale;
scaled_rect.right *= res_scale;
scaled_rect.bottom *= res_scale;
const Common::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
auto unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
OGLTexture unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
const TextureBlit texture_blit = {
.surface_type = type,
@@ -402,8 +396,8 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) {
.end = {scaled_rect.right, scaled_rect.top}
},
.dst_region = Region2D{
.start = {unscaled_tex_rect.left, unscaled_tex_rect.bottom},
.end = {unscaled_tex_rect.right, unscaled_tex_rect.top}
.start = {0, 0},
.end = {rect.GetWidth(), rect.GetHeight()}
}
};

View File

@@ -64,8 +64,9 @@ void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst
const ClearValue clear_value =
MakeClearValue(dst_surface->type, dst_surface->pixel_format, fill_buffer.data());
const ClearRect clear_rect = {
const TextureClear clear_rect = {
.surface_type = dst_surface->type,
.texture_format = dst_surface->pixel_format,
.texture_level = 0,
.rect = Rect2D{
.offset = {dst_rect.left, dst_rect.bottom},

View File

@@ -48,6 +48,10 @@ public:
end = addr + size;
}
bool IsScaled() const {
return res_scale > 1;
}
SurfaceInterval GetInterval() const {
return SurfaceInterval(addr, end);
}

View File

@@ -6,6 +6,7 @@
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/rasterizer_cache/texture_runtime.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
namespace OpenGL {
@@ -74,26 +75,26 @@ void TextureRuntime::ReadTexture(OGLTexture& texture, const BufferTextureCopy& c
tuple.format, tuple.type, pixels.data() + copy.buffer_offset);
}
bool TextureRuntime::ClearTexture(OGLTexture& texture, const ClearRect& rect, ClearValue value) {
bool TextureRuntime::ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
// Setup scissor rectangle according to the clear rectangle
OpenGLState state{};
state.scissor.enabled = true;
state.scissor.x = rect.rect.offset.x;
state.scissor.y = rect.rect.offset.y;
state.scissor.width = rect.rect.extent.width;
state.scissor.height = rect.rect.extent.height;
state.scissor.x = clear.rect.offset.x;
state.scissor.y = clear.rect.offset.y;
state.scissor.width = clear.rect.extent.width;
state.scissor.height = clear.rect.extent.height;
state.draw.draw_framebuffer = draw_fbo.handle;
state.Apply();
switch (rect.surface_type) {
switch (clear.surface_type) {
case SurfaceType::Color:
case SurfaceType::Texture:
case SurfaceType::Fill:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle,
rect.texture_level);
clear.texture_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
@@ -108,7 +109,7 @@ bool TextureRuntime::ClearTexture(OGLTexture& texture, const ClearRect& rect, Cl
case SurfaceType::Depth:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle,
rect.texture_level);
clear.texture_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
state.depth.write_mask = GL_TRUE;
@@ -119,7 +120,7 @@ bool TextureRuntime::ClearTexture(OGLTexture& texture, const ClearRect& rect, Cl
case SurfaceType::DepthStencil:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
texture.handle, rect.texture_level);
texture.handle, clear.texture_level);
state.depth.write_mask = GL_TRUE;
state.stencil.write_mask = -1;
@@ -205,4 +206,43 @@ void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) {
glGenerateMipmap(GL_TEXTURE_2D);
}
const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT;
auto& search = upload ? upload_buffers : download_buffers;
const StagingBuffer key = {
.size = size
};
if (auto it = search.lower_bound(key); it != search.end()) {
return *it;
}
StagingBuffer staging{};
staging.buffer.Create();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle);
// Allocate a new buffer and map the data to the host
void* data = nullptr;
if (GLES) {
const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT;
glBufferStorageEXT(GL_PIXEL_UNPACK_BUFFER, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT |
GL_MAP_COHERENT_BIT_EXT);
data = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT |
GL_MAP_COHERENT_BIT_EXT);
} else {
const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT;
glBufferStorage(GL_PIXEL_UNPACK_BUFFER, size, nullptr, storage | GL_MAP_PERSISTENT_BIT |
GL_MAP_COHERENT_BIT);
data = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, access | GL_MAP_PERSISTENT_BIT |
GL_MAP_COHERENT_BIT);
}
// Insert it to the cache and return the memory
staging.mapped = std::span{reinterpret_cast<std::byte*>(data), size};
const auto& [it, _] = search.insert(std::move(staging));
return *it;
}
} // namespace OpenGL

View File

@@ -4,6 +4,7 @@
#pragma once
#include <span>
#include <set>
#include "video_core/rasterizer_cache/types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -11,6 +12,16 @@ namespace OpenGL {
struct FormatTuple;
struct StagingBuffer {
u32 size = 0;
std::span<std::byte> mapped{};
OGLBuffer buffer{};
bool operator<(const StagingBuffer& other) const {
return size < other.size;
}
};
/**
* Provides texture manipulation functions to the rasterizer cache
* Separating this into a class makes it easier to abstract graphics API code
@@ -25,7 +36,7 @@ public:
PixelFormat format, std::span<std::byte> pixels);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(OGLTexture& texture, const ClearRect& rect, ClearValue value);
bool ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value);
/// Copies a rectangle of src_tex to another rectange of dst_rect
bool CopyTextures(OGLTexture& source, OGLTexture& dest, const TextureCopy& copy);
@@ -36,8 +47,13 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(OGLTexture& texture, u32 max_level);
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
const StagingBuffer& FindStaging(u32 size, bool upload);
private:
OGLFramebuffer read_fbo, draw_fbo;
std::set<StagingBuffer> upload_buffers;
std::set<StagingBuffer> download_buffers;
};
} // namespace OpenGL

View File

@@ -45,8 +45,9 @@ union ClearValue {
};
};
struct ClearRect {
struct TextureClear {
SurfaceType surface_type;
PixelFormat texture_format;
u32 texture_level;
Rect2D rect;
};

View File

@@ -53,7 +53,7 @@ void FrameDumperOpenGL::PresentLoop() {
mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
}
frame->render_fence.Wait();
glWaitSync(frame->render_fence.handle, 0, GL_TIMEOUT_IGNORED);
glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbos[current_pbo].handle);

View File

@@ -215,22 +215,6 @@ void OGLSync::Release() {
handle = 0;
}
void OGLSync::WaitHost() {
if (!handle) {
return;
}
glClientWaitSync(handle, 0, GL_TIMEOUT_IGNORED);
}
void OGLSync::Wait() {
if (!handle) {
return;
}
glWaitSync(handle, 0, GL_TIMEOUT_IGNORED);
}
void OGLVertexArray::Create() {
if (handle != 0)
return;

View File

@@ -164,12 +164,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
/// Causes the host to wait for the OpenGL semaphore
void WaitHost();
/// Causes the GPU to wait for the OpenGL semaphore
void Wait();
GLsync handle = 0;
};

View File

@@ -499,14 +499,18 @@ void RendererOpenGL::RenderToMailbox(const Layout::FramebufferLayout& layout,
// INTEL driver workaround. We can't delete the previous render sync object until we are
// sure that the presentation is done
frame->present_fence.WaitHost();
if (frame->present_fence.handle) {
glClientWaitSync(frame->present_fence.handle, 0, GL_TIMEOUT_IGNORED);
}
// Delete the draw fence if the frame wasn't presented
// delete the draw fence if the frame wasn't presented
frame->render_fence.Release();
// wait for the presentation to be done
frame->present_fence.Wait();
frame->present_fence.Release();
if (frame->present_fence.handle) {
glWaitSync(frame->present_fence.handle, 0, GL_TIMEOUT_IGNORED);
frame->present_fence.Release();
}
}
{
@@ -1145,7 +1149,7 @@ void RendererOpenGL::TryPresent(int timeout_ms, bool is_secondary) {
window.mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
}
frame->render_fence.Wait();
glWaitSync(frame->render_fence.handle, 0, GL_TIMEOUT_IGNORED);
// INTEL workaround.
// Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
@@ -1256,10 +1260,8 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
// Qualcomm has some spammy info messages that are marked as errors but not important
// https://developer.qualcomm.com/comment/11845
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
}
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
#endif
const std::string_view gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
@@ -1272,20 +1274,24 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
telemetry_session.AddField(user_system, "GPU_Vendor", std::string{gpu_vendor});
telemetry_session.AddField(user_system, "GPU_Model", std::string{gpu_model});
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string{gl_version});
if (gpu_vendor == "GDI Generic") {
return VideoCore::ResultStatus::ErrorGenericDrivers;
}
if (!(GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1)) {
if (!(GLAD_GL_VERSION_4_4 || GLAD_GL_ES_VERSION_3_2)) {
return VideoCore::ResultStatus::ErrorRendererInit;
}
// We require glBufferStorage(EXT)
if (GLES && !GLAD_GL_EXT_buffer_storage) {
return VideoCore::ResultStatus::ErrorRendererInit;
}
InitOpenGLObjects();
RefreshRasterizerSetting();
return VideoCore::ResultStatus::Success;

View File

@@ -39,8 +39,8 @@ void TextureDownloaderES::Test() {
}
glActiveTexture(GL_TEXTURE0);
const auto test = [this, &state](FormatTuple tuple, auto original_data, std::size_t tex_size,
auto data_generator) {
const auto test = [this, &state]<typename T>(FormatTuple tuple, std::vector<T> original_data,
std::size_t tex_size, auto data_generator) {
OGLTexture texture;
texture.Create();
state.texture_units[0].texture_2d = texture.handle;
@@ -55,7 +55,7 @@ void TextureDownloaderES::Test() {
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_sizei, tex_sizei, tuple.format, tuple.type,
original_data.data());
decltype(original_data) new_data(original_data.size());
std::vector<T> new_data(original_data.size());
glFinish();
auto start = std::chrono::high_resolution_clock::now();
GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_sizei, tex_sizei,
@@ -131,7 +131,7 @@ void main(){
state.draw.draw_framebuffer = depth32_fbo.handle;
state.renderbuffer = r32ui_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, max_size, max_size);
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, MAX_SIZE, MAX_SIZE);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r32ui_renderbuffer.handle);
glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);
@@ -139,7 +139,7 @@ void main(){
state.draw.draw_framebuffer = depth16_fbo.handle;
state.renderbuffer = r16_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, max_size, max_size);
glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, MAX_SIZE, MAX_SIZE);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r16_renderbuffer.handle);
@@ -152,7 +152,7 @@ void main(){
*/
GLuint TextureDownloaderES::ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
GLint height, GLint width) {
ASSERT(width <= max_size && height <= max_size);
ASSERT(width <= MAX_SIZE && height <= MAX_SIZE);
const OpenGLState cur_state = OpenGLState::GetCurState();
OpenGLState state;
state.texture_units[0] = {cur_state.texture_units[0].texture_2d, sampler.handle};

View File

@@ -11,26 +11,33 @@ namespace OpenGL {
class OpenGLState;
class TextureDownloaderES {
static constexpr u16 max_size = 1024;
public:
TextureDownloaderES(bool enable_depth_stencil);
void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type,
GLint height, GLint width, void* pixels);
private:
void Test();
GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
GLint height, GLint width);
private:
static constexpr u16 MAX_SIZE = 1024;
struct ConversionShader {
OGLProgram program;
GLint lod_location{-1};
};
OGLVertexArray vao;
OGLFramebuffer read_fbo_generic;
OGLFramebuffer depth32_fbo, depth16_fbo;
OGLRenderbuffer r32ui_renderbuffer, r16_renderbuffer;
struct ConversionShader {
OGLProgram program;
GLint lod_location{-1};
} d24_r32ui_conversion_shader, d16_r16_conversion_shader, d24s8_r32ui_conversion_shader;
ConversionShader d24_r32ui_conversion_shader;
ConversionShader d16_r16_conversion_shader;
ConversionShader d24s8_r32ui_conversion_shader;
OGLSampler sampler;
void Test();
GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, GLint height,
GLint width);
public:
TextureDownloaderES(bool enable_depth_stencil);
void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type, GLint height,
GLint width, void* pixels);
};
} // namespace OpenGL