gl_rasterizer: Cleanup and fix bugs

This commit is contained in:
GPUCode
2022-10-31 21:58:14 +02:00
parent 939aafed40
commit 3c79360fd3
5 changed files with 34 additions and 71 deletions

View File

@ -10,7 +10,7 @@ namespace OpenGL {
enum class Vendor { Unknown = 0, AMD = 1, Nvidia = 2, Intel = 3, Generic = 4 };
enum class DriverBug {
// AMD drivers sometimes freeze when one shader stage is changed but not the others.
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
ShaderStageChangeFreeze = 1 << 0,
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is

View File

@ -20,19 +20,14 @@
namespace OpenGL {
static bool IsVendorAmd() {
const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc.";
}
#ifdef __APPLE__
static bool IsVendorIntel() {
std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
return gpu_vendor == "Intel Inc.";
}
#endif
constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver)
: driver{driver}, runtime{driver}, res_cache{*this, runtime}, is_amd(IsVendorAmd()),
: driver{driver}, runtime{driver}, res_cache{*this, runtime},
shader_program_manager{emu_window, driver, !driver.IsOpenGLES()},
vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE},
uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE},
index_buffer{GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE},
@ -44,8 +39,7 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
// Create a 1x1 clear texture to use in the NULL case,
// instead of OpenGL's default of solid black
glGenTextures(1, &default_texture);
glBindTexture(GL_TEXTURE_2D, default_texture);
default_texture.Create();
// For some reason alpha 0 wraps around to 1.0, so use 1/255 instead
u8 framebuffer_data[4] = {0, 0, 0, 1};
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
@ -128,17 +122,6 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
state.Apply();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());
#ifdef __APPLE__
if (IsVendorIntel()) {
shader_program_manager = std::make_unique<ShaderProgramManager>(
emu_window, VideoCore::g_separable_shader_enabled, is_amd);
} else {
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
}
#else
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
#endif
glEnable(GL_BLEND);
// Explicitly call the derived version to avoid warnings about calling virtual
@ -150,7 +133,7 @@ RasterizerOpenGL::~RasterizerOpenGL() = default;
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_program_manager->LoadDiskCache(stop_loading, callback);
shader_program_manager.LoadDiskCache(stop_loading, callback);
}
void RasterizerOpenGL::SyncEntireState() {
@ -285,7 +268,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
bool RasterizerOpenGL::SetupVertexShader() {
MICROPROFILE_SCOPE(OpenGL_VS);
return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs,
return shader_program_manager.UseProgrammableVertexShader(Pica::g_state.regs,
Pica::g_state.vs);
}
@ -299,7 +282,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
return false;
}
shader_program_manager->UseFixedGeometryShader(regs);
shader_program_manager.UseFixedGeometryShader(regs);
return true;
}
@ -360,7 +343,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) {
SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max);
vertex_buffer.Unmap(vs_input_size);
shader_program_manager->ApplyTo(state);
shader_program_manager.ApplyTo(state);
state.Apply();
if (is_indexed) {
@ -623,7 +606,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
// the geometry in question.
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
// on the male character's face, which in the OpenGL default appear black.
state.texture_units[texture_index].texture_2d = default_texture;
state.texture_units[texture_index].texture_2d = default_texture.handle;
}
} else {
state.texture_units[texture_index].texture_2d = 0;
@ -687,9 +670,9 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
} else {
state.draw.vertex_array = sw_vao.handle;
state.draw.vertex_buffer = vertex_buffer.GetHandle();
shader_program_manager->UseTrivialVertexShader();
shader_program_manager->UseTrivialGeometryShader();
shader_program_manager->ApplyTo(state);
shader_program_manager.UseTrivialVertexShader();
shader_program_manager.UseTrivialGeometryShader();
shader_program_manager.ApplyTo(state);
state.Apply();
std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
@ -784,7 +767,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Blending
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
if (GLES) {
if (driver.IsOpenGLES()) {
// With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
@ -908,7 +891,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Logic op
case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
if (GLES) {
if (driver.IsOpenGLES()) {
// With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true;
}
@ -1519,7 +1502,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
}
void RasterizerOpenGL::SetShader() {
shader_program_manager->UseFragmentShader(Pica::g_state.regs);
shader_program_manager.UseFragmentShader(Pica::g_state.regs);
}
void RasterizerOpenGL::SyncClipEnabled() {
@ -1595,7 +1578,7 @@ void RasterizerOpenGL::SyncLogicOp() {
const auto& regs = Pica::g_state.regs;
state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
if (GLES) {
if (driver.IsOpenGLES()) {
if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip
@ -1608,7 +1591,7 @@ void RasterizerOpenGL::SyncLogicOp() {
void RasterizerOpenGL::SyncColorWriteMask() {
const auto& regs = Pica::g_state.regs;
if (GLES) {
if (driver.IsOpenGLES()) {
if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip

View File

@ -138,28 +138,15 @@ private:
private:
Driver& driver;
OpenGLState state;
GLuint default_texture;
TextureRuntime runtime;
RasterizerCache res_cache;
std::vector<HardwareVertex> vertex_batch;
bool is_amd;
bool shader_dirty = true;
std::unique_ptr<ShaderProgramManager> shader_program_manager;
// They shall be big enough for about one frame.
static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
ShaderProgramManager shader_program_manager;
OGLVertexArray sw_vao; // VAO for software shader draw
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
std::array<bool, 16> hw_vao_enabled_attributes{};
OGLTexture default_texture;
std::array<SamplerInfo, 3> texture_samplers;
OGLStreamBuffer vertex_buffer;
OGLStreamBuffer uniform_buffer;

View File

@ -11,7 +11,7 @@
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/video_core.h"
namespace OpenGL {
@ -328,12 +328,13 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR
class ShaderProgramManager::Impl {
public:
explicit Impl(bool separable, bool is_amd)
: is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable),
explicit Impl(bool separable)
: separable(separable), programmable_vertex_shaders(separable),
trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
fragment_shaders(separable), disk_cache(separable) {
if (separable)
if (separable) {
pipeline.Create();
}
}
struct ShaderTuple {
@ -362,25 +363,19 @@ public:
static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2,
"ShaderTuple layout changed!");
bool is_amd;
bool separable;
ShaderTuple current;
ProgrammableVertexShaders programmable_vertex_shaders;
TrivialVertexShader trivial_vertex_shader;
FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders;
std::unordered_map<u64, OGLProgram> program_cache;
OGLPipeline pipeline;
ShaderDiskCache disk_cache;
};
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable,
bool is_amd)
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable)
: impl(std::make_unique<Impl>(separable)), emu_window{emu_window_}, driver{driver} {}
ShaderProgramManager::~ShaderProgramManager() = default;
@ -442,10 +437,7 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
void ShaderProgramManager::ApplyTo(OpenGLState& state) {
if (impl->separable) {
if (impl->is_amd) {
// Without this reseting, AMD sometimes freezes when one stage is changed but not
// for the others. On the other hand, including this reset seems to introduce memory
// leak in Intel Graphics.
if (driver.HasBug(DriverBug::ShaderStageChangeFreeze)) {
glUseProgramStages(
impl->pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);

View File

@ -107,12 +107,13 @@ static_assert(sizeof(VSUniformData) == 1856,
static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec");
class Driver;
class OpenGLState;
/// A class that manage different shader stages and configures them with given config data.
class ShaderProgramManager {
public:
ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd);
ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable);
~ShaderProgramManager();
void LoadDiskCache(const std::atomic_bool& stop_loading,
@ -133,7 +134,7 @@ public:
private:
class Impl;
std::unique_ptr<Impl> impl;
Frontend::EmuWindow& emu_window;
Driver& driver;
};
} // namespace OpenGL