rasterizer_cache: Add staging buffer cache for uploads/downloads

*  In addition bump context version to 4.4 to enforce ARB_buffer_storage and use EXT_buffer_storage for GLES which is support on many mobile devices
This commit is contained in:
emufan4568
2022-09-09 11:14:51 +03:00
committed by GPUCode
parent 424ed2df04
commit a7cfe99ca1
15 changed files with 224 additions and 3127 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -114,7 +114,7 @@ public:
/// Create the original context that should be shared from
explicit OpenGLSharedContext(QSurface* surface) : surface(surface) {
QSurfaceFormat format;
format.setVersion(4, 3);
format.setVersion(4, 4);
format.setProfile(QSurfaceFormat::CoreProfile);
// TODO: expose a setting for buffer value (ie default/single/double/triple)
format.setSwapBehavior(QSurfaceFormat::DefaultSwapBehavior);

View File

@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <bit>
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/texture.h"
@ -150,8 +151,9 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
}
bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
const auto& image_interface = Core::System::GetInstance().GetImageInterface();
Core::System& system = Core::System::GetInstance();
auto& custom_tex_cache = system.CustomTexCache();
const auto& image_interface = system.GetImageInterface();
if (custom_tex_cache.IsTextureCached(tex_hash)) {
custom_tex_info = custom_tex_cache.LookupTexture(tex_hash);
@ -169,9 +171,7 @@ bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
return false;
}
const std::bitset<32> width_bits(custom_tex_info.width);
const std::bitset<32> height_bits(custom_tex_info.height);
if (width_bits.count() != 1 || height_bits.count() != 1) {
if (std::popcount(custom_tex_info.width) != 1 || std::popcount(custom_tex_info.height) != 1) {
LOG_ERROR(Render_OpenGL, "Texture {} size is not a power of 2", path_info.path);
return false;
}
@ -186,20 +186,19 @@ bool CachedSurface::LoadCustomTexture(u64 tex_hash) {
void CachedSurface::DumpTexture(GLuint target_tex, u64 tex_hash) {
// Make sure the texture size is a power of 2
// If not, the surface is actually a framebuffer
std::bitset<32> width_bits(width);
std::bitset<32> height_bits(height);
if (width_bits.count() != 1 || height_bits.count() != 1) {
if (std::popcount(width) != 1 || std::popcount(height) != 1) {
LOG_WARNING(Render_OpenGL, "Not dumping {:016X} because size isn't a power of 2 ({}x{})",
tex_hash, width, height);
return;
}
// Dump texture to RGBA8 and encode as PNG
const auto& image_interface = Core::System::GetInstance().GetImageInterface();
auto& custom_tex_cache = Core::System::GetInstance().CustomTexCache();
Core::System& system = Core::System::GetInstance();
const auto& image_interface = system.GetImageInterface();
auto& custom_tex_cache = system.CustomTexCache();
std::string dump_path =
fmt::format("{}textures/{:016X}/", FileUtil::GetUserPath(FileUtil::UserPath::DumpDir),
Core::System::GetInstance().Kernel().GetCurrentProcess()->codeset->program_id);
system.Kernel().GetCurrentProcess()->codeset->program_id);
if (!FileUtil::CreateFullPath(dump_path)) {
LOG_ERROR(Render, "Unable to create {}", dump_path);
return;
@ -266,7 +265,7 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
// If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
// surface
OGLTexture unscaled_tex;
if (res_scale != 1) {
if (IsScaled()) {
x0 = 0;
y0 = 0;
@ -357,37 +356,32 @@ void CachedSurface::UploadGLTexture(Common::Rectangle<u32> rect) {
MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64));
void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) {
if (type == SurfaceType::Fill) {
return;
}
MICROPROFILE_SCOPE(RasterizerCache_TextureDL);
OpenGLState state = OpenGLState::GetCurState();
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
const u32 download_size = width * height * GetBytesPerPixel(pixel_format);
if (gl_buffer.empty()) {
gl_buffer.resize(download_size);
}
OpenGLState state = OpenGLState::GetCurState();
OpenGLState prev_state = state;
SCOPE_EXIT({ prev_state.Apply(); });
// Ensure no bad interactions with GL_PACK_ALIGNMENT
ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0);
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(stride));
const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format);
// If not 1x scale, blit scaled texture to a new 1x texture and use that to flush
if (res_scale != 1) {
if (IsScaled()) {
auto scaled_rect = rect;
scaled_rect.left *= res_scale;
scaled_rect.top *= res_scale;
scaled_rect.right *= res_scale;
scaled_rect.bottom *= res_scale;
const Common::Rectangle<u32> unscaled_tex_rect{0, rect.GetHeight(), rect.GetWidth(), 0};
auto unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
OGLTexture unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight());
const TextureBlit texture_blit = {
.surface_type = type,
@ -398,8 +392,8 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) {
.end = {scaled_rect.right, scaled_rect.top}
},
.dst_region = Region2D{
.start = {unscaled_tex_rect.left, unscaled_tex_rect.bottom},
.end = {unscaled_tex_rect.right, unscaled_tex_rect.top}
.start = {0, 0},
.end = {rect.GetWidth(), rect.GetHeight()}
}
};

View File

@ -63,8 +63,9 @@ void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst
const ClearValue clear_value =
MakeClearValue(dst_surface->type, dst_surface->pixel_format, fill_buffer.data());
const ClearRect clear_rect = {
const TextureClear clear_rect = {
.surface_type = dst_surface->type,
.texture_format = dst_surface->pixel_format,
.texture_level = 0,
.rect = Rect2D{
.offset = {dst_rect.left, dst_rect.bottom},

View File

@ -48,6 +48,10 @@ public:
end = addr + size;
}
bool IsScaled() const {
return res_scale > 1;
}
SurfaceInterval GetInterval() const {
return SurfaceInterval(addr, end);
}

View File

@ -6,6 +6,7 @@
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/rasterizer_cache/texture_runtime.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
namespace OpenGL {
@ -74,26 +75,26 @@ void TextureRuntime::ReadTexture(OGLTexture& texture, const BufferTextureCopy& c
tuple.format, tuple.type, pixels.data() + copy.buffer_offset);
}
bool TextureRuntime::ClearTexture(OGLTexture& texture, const ClearRect& rect, ClearValue value) {
bool TextureRuntime::ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
// Setup scissor rectangle according to the clear rectangle
OpenGLState state{};
state.scissor.enabled = true;
state.scissor.x = rect.rect.offset.x;
state.scissor.y = rect.rect.offset.y;
state.scissor.width = rect.rect.extent.width;
state.scissor.height = rect.rect.extent.height;
state.scissor.x = clear.rect.offset.x;
state.scissor.y = clear.rect.offset.y;
state.scissor.width = clear.rect.extent.width;
state.scissor.height = clear.rect.extent.height;
state.draw.draw_framebuffer = draw_fbo.handle;
state.Apply();
switch (rect.surface_type) {
switch (clear.surface_type) {
case SurfaceType::Color:
case SurfaceType::Texture:
case SurfaceType::Fill:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle,
rect.texture_level);
clear.texture_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
@ -108,7 +109,7 @@ bool TextureRuntime::ClearTexture(OGLTexture& texture, const ClearRect& rect, Cl
case SurfaceType::Depth:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle,
rect.texture_level);
clear.texture_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
state.depth.write_mask = GL_TRUE;
@ -119,7 +120,7 @@ bool TextureRuntime::ClearTexture(OGLTexture& texture, const ClearRect& rect, Cl
case SurfaceType::DepthStencil:
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
texture.handle, rect.texture_level);
texture.handle, clear.texture_level);
state.depth.write_mask = GL_TRUE;
state.stencil.write_mask = -1;
@ -205,4 +206,43 @@ void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) {
glGenerateMipmap(GL_TEXTURE_2D);
}
const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT;
auto& search = upload ? upload_buffers : download_buffers;
const StagingBuffer key = {
.size = size
};
if (auto it = search.lower_bound(key); it != search.end()) {
return *it;
}
StagingBuffer staging{};
staging.buffer.Create();
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle);
// Allocate a new buffer and map the data to the host
void* data = nullptr;
if (GLES) {
const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT;
glBufferStorageEXT(GL_PIXEL_UNPACK_BUFFER, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT |
GL_MAP_COHERENT_BIT_EXT);
data = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT |
GL_MAP_COHERENT_BIT_EXT);
} else {
const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT;
glBufferStorage(GL_PIXEL_UNPACK_BUFFER, size, nullptr, storage | GL_MAP_PERSISTENT_BIT |
GL_MAP_COHERENT_BIT);
data = glMapBufferRange(GL_PIXEL_UNPACK_BUFFER, 0, size, access | GL_MAP_PERSISTENT_BIT |
GL_MAP_COHERENT_BIT);
}
// Insert it to the cache and return the memory
staging.mapped = std::span{reinterpret_cast<std::byte*>(data), size};
const auto& [it, _] = search.insert(std::move(staging));
return *it;
}
} // namespace OpenGL

View File

@ -4,6 +4,7 @@
#pragma once
#include <span>
#include <set>
#include "video_core/rasterizer_cache/types.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@ -11,6 +12,16 @@ namespace OpenGL {
struct FormatTuple;
struct StagingBuffer {
u32 size = 0;
std::span<std::byte> mapped{};
OGLBuffer buffer{};
bool operator<(const StagingBuffer& other) const {
return size < other.size;
}
};
/**
* Provides texture manipulation functions to the rasterizer cache
* Separating this into a class makes it easier to abstract graphics API code
@ -25,7 +36,7 @@ public:
PixelFormat format, std::span<std::byte> pixels);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(OGLTexture& texture, const ClearRect& rect, ClearValue value);
bool ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value);
/// Copies a rectangle of src_tex to another rectange of dst_rect
bool CopyTextures(OGLTexture& source, OGLTexture& dest, const TextureCopy& copy);
@ -36,8 +47,13 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(OGLTexture& texture, u32 max_level);
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
const StagingBuffer& FindStaging(u32 size, bool upload);
private:
OGLFramebuffer read_fbo, draw_fbo;
std::set<StagingBuffer> upload_buffers;
std::set<StagingBuffer> download_buffers;
};
} // namespace OpenGL

View File

@ -45,8 +45,9 @@ union ClearValue {
};
};
struct ClearRect {
struct TextureClear {
SurfaceType surface_type;
PixelFormat texture_format;
u32 texture_level;
Rect2D rect;
};

View File

@ -47,7 +47,7 @@ void FrameDumperOpenGL::PresentLoop(std::stop_token stop_token) {
mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
}
frame->render_fence.Wait();
glWaitSync(frame->render_fence.handle, 0, GL_TIMEOUT_IGNORED);
glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
glBindBuffer(GL_PIXEL_PACK_BUFFER, pbos[current_pbo].handle);

View File

@ -215,22 +215,6 @@ void OGLSync::Release() {
handle = 0;
}
void OGLSync::WaitHost() {
if (!handle) {
return;
}
glClientWaitSync(handle, 0, GL_TIMEOUT_IGNORED);
}
void OGLSync::Wait() {
if (!handle) {
return;
}
glWaitSync(handle, 0, GL_TIMEOUT_IGNORED);
}
void OGLVertexArray::Create() {
if (handle != 0)
return;

View File

@ -164,12 +164,6 @@ public:
/// Deletes the internal OpenGL resource
void Release();
/// Causes the host to wait for the OpenGL semaphore
void WaitHost();
/// Causes the GPU to wait for the OpenGL semaphore
void Wait();
GLsync handle = 0;
};

View File

@ -488,14 +488,18 @@ void RendererOpenGL::RenderToMailbox(const Layout::FramebufferLayout& layout,
// INTEL driver workaround. We can't delete the previous render sync object until we are
// sure that the presentation is done
frame->present_fence.WaitHost();
if (frame->present_fence.handle) {
glClientWaitSync(frame->present_fence.handle, 0, GL_TIMEOUT_IGNORED);
}
// Delete the draw fence if the frame wasn't presented
// delete the draw fence if the frame wasn't presented
frame->render_fence.Release();
// wait for the presentation to be done
frame->present_fence.Wait();
frame->present_fence.Release();
if (frame->present_fence.handle) {
glWaitSync(frame->present_fence.handle, 0, GL_TIMEOUT_IGNORED);
frame->present_fence.Release();
}
}
{
@ -1120,7 +1124,7 @@ void RendererOpenGL::TryPresent(int timeout_ms) {
render_window.mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
}
frame->render_fence.Wait();
glWaitSync(frame->render_fence.handle, 0, GL_TIMEOUT_IGNORED);
// INTEL workaround.
// Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
@ -1231,10 +1235,8 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
// Qualcomm has some spammy info messages that are marked as errors but not important
// https://developer.qualcomm.com/comment/11845
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
}
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
#endif
const std::string_view gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
@ -1247,20 +1249,24 @@ VideoCore::ResultStatus RendererOpenGL::Init() {
auto& telemetry_session = Core::System::GetInstance().TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", std::string(gpu_vendor));
telemetry_session.AddField(user_system, "GPU_Model", std::string(gpu_model));
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version));
telemetry_session.AddField(user_system, "GPU_Vendor", std::string{gpu_vendor});
telemetry_session.AddField(user_system, "GPU_Model", std::string{gpu_model});
telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string{gl_version});
if (gpu_vendor == "GDI Generic") {
return VideoCore::ResultStatus::ErrorGenericDrivers;
}
if (!(GLAD_GL_VERSION_4_3 || GLAD_GL_ES_VERSION_3_1)) {
if (!(GLAD_GL_VERSION_4_4 || GLAD_GL_ES_VERSION_3_2)) {
return VideoCore::ResultStatus::ErrorRendererInit;
}
// We require glBufferStorage(EXT)
if (GLES && !GLAD_GL_EXT_buffer_storage) {
return VideoCore::ResultStatus::ErrorRendererInit;
}
InitOpenGLObjects();
RefreshRasterizerSetting();
return VideoCore::ResultStatus::Success;

View File

@ -39,8 +39,8 @@ void TextureDownloaderES::Test() {
}
glActiveTexture(GL_TEXTURE0);
const auto test = [this, &state](FormatTuple tuple, auto original_data, std::size_t tex_size,
auto data_generator) {
const auto test = [this, &state]<typename T>(FormatTuple tuple, std::vector<T> original_data,
std::size_t tex_size, auto data_generator) {
OGLTexture texture;
texture.Create();
state.texture_units[0].texture_2d = texture.handle;
@ -55,7 +55,7 @@ void TextureDownloaderES::Test() {
glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, tex_sizei, tex_sizei, tuple.format, tuple.type,
original_data.data());
decltype(original_data) new_data(original_data.size());
std::vector<T> new_data(original_data.size());
glFinish();
auto start = std::chrono::high_resolution_clock::now();
GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, tex_sizei, tex_sizei,
@ -131,7 +131,7 @@ void main(){
state.draw.draw_framebuffer = depth32_fbo.handle;
state.renderbuffer = r32ui_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, max_size, max_size);
glRenderbufferStorage(GL_RENDERBUFFER, GL_R32UI, MAX_SIZE, MAX_SIZE);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r32ui_renderbuffer.handle);
glUniform1i(glGetUniformLocation(d24s8_r32ui_conversion_shader.program.handle, "depth"), 1);
@ -139,7 +139,7 @@ void main(){
state.draw.draw_framebuffer = depth16_fbo.handle;
state.renderbuffer = r16_renderbuffer.handle;
state.Apply();
glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, max_size, max_size);
glRenderbufferStorage(GL_RENDERBUFFER, GL_R16, MAX_SIZE, MAX_SIZE);
glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
r16_renderbuffer.handle);
@ -152,7 +152,7 @@ void main(){
*/
GLuint TextureDownloaderES::ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
GLint height, GLint width) {
ASSERT(width <= max_size && height <= max_size);
ASSERT(width <= MAX_SIZE && height <= MAX_SIZE);
const OpenGLState cur_state = OpenGLState::GetCurState();
OpenGLState state;
state.texture_units[0] = {cur_state.texture_units[0].texture_2d, sampler.handle};

View File

@ -11,26 +11,33 @@ namespace OpenGL {
class OpenGLState;
class TextureDownloaderES {
static constexpr u16 max_size = 1024;
public:
TextureDownloaderES(bool enable_depth_stencil);
void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type,
GLint height, GLint width, void* pixels);
private:
void Test();
GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type,
GLint height, GLint width);
private:
static constexpr u16 MAX_SIZE = 1024;
struct ConversionShader {
OGLProgram program;
GLint lod_location{-1};
};
OGLVertexArray vao;
OGLFramebuffer read_fbo_generic;
OGLFramebuffer depth32_fbo, depth16_fbo;
OGLRenderbuffer r32ui_renderbuffer, r16_renderbuffer;
struct ConversionShader {
OGLProgram program;
GLint lod_location{-1};
} d24_r32ui_conversion_shader, d16_r16_conversion_shader, d24s8_r32ui_conversion_shader;
ConversionShader d24_r32ui_conversion_shader;
ConversionShader d16_r16_conversion_shader;
ConversionShader d24s8_r32ui_conversion_shader;
OGLSampler sampler;
void Test();
GLuint ConvertDepthToColor(GLuint level, GLenum& format, GLenum& type, GLint height,
GLint width);
public:
TextureDownloaderES(bool enable_depth_stencil);
void GetTexImage(GLenum target, GLuint level, GLenum format, const GLenum type, GLint height,
GLint width, void* pixels);
};
} // namespace OpenGL