gl_rasterizer: Cleanup and fix bugs

This commit is contained in:
GPUCode
2022-10-31 21:58:14 +02:00
parent 939aafed40
commit 3c79360fd3
5 changed files with 34 additions and 71 deletions

View File

@ -10,7 +10,7 @@ namespace OpenGL {
enum class Vendor { Unknown = 0, AMD = 1, Nvidia = 2, Intel = 3, Generic = 4 }; enum class Vendor { Unknown = 0, AMD = 1, Nvidia = 2, Intel = 3, Generic = 4 };
enum class DriverBug { enum class DriverBug {
// AMD drivers sometimes freeze when one shader stage is changed but not the others. // AMD drivers sometimes freezes when one shader stage is changed but not the others.
ShaderStageChangeFreeze = 1 << 0, ShaderStageChangeFreeze = 1 << 0,
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer // On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is // read position is near the end and is an out-of-bound access to the vertex buffer. This is

View File

@ -20,19 +20,14 @@
namespace OpenGL { namespace OpenGL {
static bool IsVendorAmd() { constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
const std::string_view gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
return gpu_vendor == "ATI Technologies Inc." || gpu_vendor == "Advanced Micro Devices, Inc."; constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
} constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
#ifdef __APPLE__
static bool IsVendorIntel() {
std::string gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
return gpu_vendor == "Intel Inc.";
}
#endif
RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver) RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver)
: driver{driver}, runtime{driver}, res_cache{*this, runtime}, is_amd(IsVendorAmd()), : driver{driver}, runtime{driver}, res_cache{*this, runtime},
shader_program_manager{emu_window, driver, !driver.IsOpenGLES()},
vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE}, vertex_buffer{GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE},
uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE}, uniform_buffer{GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE},
index_buffer{GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE}, index_buffer{GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE},
@ -44,8 +39,7 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
// Create a 1x1 clear texture to use in the NULL case, // Create a 1x1 clear texture to use in the NULL case,
// instead of OpenGL's default of solid black // instead of OpenGL's default of solid black
glGenTextures(1, &default_texture); default_texture.Create();
glBindTexture(GL_TEXTURE_2D, default_texture);
// For some reason alpha 0 wraps around to 1.0, so use 1/255 instead // For some reason alpha 0 wraps around to 1.0, so use 1/255 instead
u8 framebuffer_data[4] = {0, 0, 0, 1}; u8 framebuffer_data[4] = {0, 0, 0, 1};
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
@ -128,17 +122,6 @@ RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driv
state.Apply(); state.Apply();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle()); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_buffer.GetHandle());
#ifdef __APPLE__
if (IsVendorIntel()) {
shader_program_manager = std::make_unique<ShaderProgramManager>(
emu_window, VideoCore::g_separable_shader_enabled, is_amd);
} else {
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, true, is_amd);
}
#else
shader_program_manager = std::make_unique<ShaderProgramManager>(emu_window, !GLES, is_amd);
#endif
glEnable(GL_BLEND); glEnable(GL_BLEND);
// Explicitly call the derived version to avoid warnings about calling virtual // Explicitly call the derived version to avoid warnings about calling virtual
@ -150,7 +133,7 @@ RasterizerOpenGL::~RasterizerOpenGL() = default;
void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) { const VideoCore::DiskResourceLoadCallback& callback) {
shader_program_manager->LoadDiskCache(stop_loading, callback); shader_program_manager.LoadDiskCache(stop_loading, callback);
} }
void RasterizerOpenGL::SyncEntireState() { void RasterizerOpenGL::SyncEntireState() {
@ -285,7 +268,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset,
MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128)); MICROPROFILE_DEFINE(OpenGL_VS, "OpenGL", "Vertex Shader Setup", MP_RGB(192, 128, 128));
bool RasterizerOpenGL::SetupVertexShader() { bool RasterizerOpenGL::SetupVertexShader() {
MICROPROFILE_SCOPE(OpenGL_VS); MICROPROFILE_SCOPE(OpenGL_VS);
return shader_program_manager->UseProgrammableVertexShader(Pica::g_state.regs, return shader_program_manager.UseProgrammableVertexShader(Pica::g_state.regs,
Pica::g_state.vs); Pica::g_state.vs);
} }
@ -299,7 +282,7 @@ bool RasterizerOpenGL::SetupGeometryShader() {
return false; return false;
} }
shader_program_manager->UseFixedGeometryShader(regs); shader_program_manager.UseFixedGeometryShader(regs);
return true; return true;
} }
@ -360,7 +343,7 @@ bool RasterizerOpenGL::AccelerateDrawBatchInternal(bool is_indexed) {
SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max); SetupVertexArray(buffer_ptr, buffer_offset, vs_input_index_min, vs_input_index_max);
vertex_buffer.Unmap(vs_input_size); vertex_buffer.Unmap(vs_input_size);
shader_program_manager->ApplyTo(state); shader_program_manager.ApplyTo(state);
state.Apply(); state.Apply();
if (is_indexed) { if (is_indexed) {
@ -623,7 +606,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
// the geometry in question. // the geometry in question.
// For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn // For example: a bug in Pokemon X/Y causes NULL-texture squares to be drawn
// on the male character's face, which in the OpenGL default appear black. // on the male character's face, which in the OpenGL default appear black.
state.texture_units[texture_index].texture_2d = default_texture; state.texture_units[texture_index].texture_2d = default_texture.handle;
} }
} else { } else {
state.texture_units[texture_index].texture_2d = 0; state.texture_units[texture_index].texture_2d = 0;
@ -687,9 +670,9 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
} else { } else {
state.draw.vertex_array = sw_vao.handle; state.draw.vertex_array = sw_vao.handle;
state.draw.vertex_buffer = vertex_buffer.GetHandle(); state.draw.vertex_buffer = vertex_buffer.GetHandle();
shader_program_manager->UseTrivialVertexShader(); shader_program_manager.UseTrivialVertexShader();
shader_program_manager->UseTrivialGeometryShader(); shader_program_manager.UseTrivialGeometryShader();
shader_program_manager->ApplyTo(state); shader_program_manager.ApplyTo(state);
state.Apply(); state.Apply();
std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex))); std::size_t max_vertices = 3 * (VERTEX_BUFFER_SIZE / (3 * sizeof(HardwareVertex)));
@ -784,7 +767,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Blending // Blending
case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable): case PICA_REG_INDEX(framebuffer.output_merger.alphablend_enable):
if (GLES) { if (driver.IsOpenGLES()) {
// With GLES, we need this in the fragment shader to emulate logic operations // With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true; shader_dirty = true;
} }
@ -908,7 +891,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Logic op // Logic op
case PICA_REG_INDEX(framebuffer.output_merger.logic_op): case PICA_REG_INDEX(framebuffer.output_merger.logic_op):
if (GLES) { if (driver.IsOpenGLES()) {
// With GLES, we need this in the fragment shader to emulate logic operations // With GLES, we need this in the fragment shader to emulate logic operations
shader_dirty = true; shader_dirty = true;
} }
@ -1519,7 +1502,7 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(
} }
void RasterizerOpenGL::SetShader() { void RasterizerOpenGL::SetShader() {
shader_program_manager->UseFragmentShader(Pica::g_state.regs); shader_program_manager.UseFragmentShader(Pica::g_state.regs);
} }
void RasterizerOpenGL::SyncClipEnabled() { void RasterizerOpenGL::SyncClipEnabled() {
@ -1595,7 +1578,7 @@ void RasterizerOpenGL::SyncLogicOp() {
const auto& regs = Pica::g_state.regs; const auto& regs = Pica::g_state.regs;
state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op); state.logic_op = PicaToGL::LogicOp(regs.framebuffer.output_merger.logic_op);
if (GLES) { if (driver.IsOpenGLES()) {
if (!regs.framebuffer.output_merger.alphablend_enable) { if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip // Color output is disabled by logic operation. We use color write mask to skip
@ -1608,7 +1591,7 @@ void RasterizerOpenGL::SyncLogicOp() {
void RasterizerOpenGL::SyncColorWriteMask() { void RasterizerOpenGL::SyncColorWriteMask() {
const auto& regs = Pica::g_state.regs; const auto& regs = Pica::g_state.regs;
if (GLES) { if (driver.IsOpenGLES()) {
if (!regs.framebuffer.output_merger.alphablend_enable) { if (!regs.framebuffer.output_merger.alphablend_enable) {
if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) { if (regs.framebuffer.output_merger.logic_op == Pica::FramebufferRegs::LogicOp::NoOp) {
// Color output is disabled by logic operation. We use color write mask to skip // Color output is disabled by logic operation. We use color write mask to skip

View File

@ -138,28 +138,15 @@ private:
private: private:
Driver& driver; Driver& driver;
OpenGLState state; OpenGLState state;
GLuint default_texture;
TextureRuntime runtime; TextureRuntime runtime;
RasterizerCache res_cache; RasterizerCache res_cache;
ShaderProgramManager shader_program_manager;
std::vector<HardwareVertex> vertex_batch;
bool is_amd;
bool shader_dirty = true;
std::unique_ptr<ShaderProgramManager> shader_program_manager;
// They shall be big enough for about one frame.
static constexpr std::size_t VERTEX_BUFFER_SIZE = 16 * 1024 * 1024;
static constexpr std::size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr std::size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr std::size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
OGLVertexArray sw_vao; // VAO for software shader draw OGLVertexArray sw_vao; // VAO for software shader draw
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
std::array<bool, 16> hw_vao_enabled_attributes{}; std::array<bool, 16> hw_vao_enabled_attributes{};
OGLTexture default_texture;
std::array<SamplerInfo, 3> texture_samplers; std::array<SamplerInfo, 3> texture_samplers;
OGLStreamBuffer vertex_buffer; OGLStreamBuffer vertex_buffer;
OGLStreamBuffer uniform_buffer; OGLStreamBuffer uniform_buffer;

View File

@ -11,7 +11,7 @@
#include "video_core/renderer_opengl/gl_shader_disk_cache.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/video_core.h" #include "video_core/video_core.h"
namespace OpenGL { namespace OpenGL {
@ -328,12 +328,13 @@ using FragmentShaders = ShaderCache<PicaFSConfig, &GenerateFragmentShader, GL_FR
class ShaderProgramManager::Impl { class ShaderProgramManager::Impl {
public: public:
explicit Impl(bool separable, bool is_amd) explicit Impl(bool separable)
: is_amd(is_amd), separable(separable), programmable_vertex_shaders(separable), : separable(separable), programmable_vertex_shaders(separable),
trivial_vertex_shader(separable), fixed_geometry_shaders(separable), trivial_vertex_shader(separable), fixed_geometry_shaders(separable),
fragment_shaders(separable), disk_cache(separable) { fragment_shaders(separable), disk_cache(separable) {
if (separable) if (separable) {
pipeline.Create(); pipeline.Create();
}
} }
struct ShaderTuple { struct ShaderTuple {
@ -362,25 +363,19 @@ public:
static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2, static_assert(offsetof(ShaderTuple, fs_hash) == sizeof(std::size_t) * 2,
"ShaderTuple layout changed!"); "ShaderTuple layout changed!");
bool is_amd;
bool separable; bool separable;
ShaderTuple current; ShaderTuple current;
ProgrammableVertexShaders programmable_vertex_shaders; ProgrammableVertexShaders programmable_vertex_shaders;
TrivialVertexShader trivial_vertex_shader; TrivialVertexShader trivial_vertex_shader;
FixedGeometryShaders fixed_geometry_shaders; FixedGeometryShaders fixed_geometry_shaders;
FragmentShaders fragment_shaders; FragmentShaders fragment_shaders;
std::unordered_map<u64, OGLProgram> program_cache; std::unordered_map<u64, OGLProgram> program_cache;
OGLPipeline pipeline; OGLPipeline pipeline;
ShaderDiskCache disk_cache; ShaderDiskCache disk_cache;
}; };
ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, ShaderProgramManager::ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable)
bool is_amd) : impl(std::make_unique<Impl>(separable)), emu_window{emu_window_}, driver{driver} {}
: impl(std::make_unique<Impl>(separable, is_amd)), emu_window{emu_window_} {}
ShaderProgramManager::~ShaderProgramManager() = default; ShaderProgramManager::~ShaderProgramManager() = default;
@ -442,10 +437,7 @@ void ShaderProgramManager::UseFragmentShader(const Pica::Regs& regs) {
void ShaderProgramManager::ApplyTo(OpenGLState& state) { void ShaderProgramManager::ApplyTo(OpenGLState& state) {
if (impl->separable) { if (impl->separable) {
if (impl->is_amd) { if (driver.HasBug(DriverBug::ShaderStageChangeFreeze)) {
// Without this reseting, AMD sometimes freezes when one stage is changed but not
// for the others. On the other hand, including this reset seems to introduce memory
// leak in Intel Graphics.
glUseProgramStages( glUseProgramStages(
impl->pipeline.handle, impl->pipeline.handle,
GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0); GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | GL_FRAGMENT_SHADER_BIT, 0);

View File

@ -107,12 +107,13 @@ static_assert(sizeof(VSUniformData) == 1856,
static_assert(sizeof(VSUniformData) < 16384, static_assert(sizeof(VSUniformData) < 16384,
"VSUniformData structure must be less than 16kb as per the OpenGL spec"); "VSUniformData structure must be less than 16kb as per the OpenGL spec");
class Driver;
class OpenGLState; class OpenGLState;
/// A class that manage different shader stages and configures them with given config data. /// A class that manage different shader stages and configures them with given config data.
class ShaderProgramManager { class ShaderProgramManager {
public: public:
ShaderProgramManager(Frontend::EmuWindow& emu_window_, bool separable, bool is_amd); ShaderProgramManager(Frontend::EmuWindow& emu_window_, Driver& driver, bool separable);
~ShaderProgramManager(); ~ShaderProgramManager();
void LoadDiskCache(const std::atomic_bool& stop_loading, void LoadDiskCache(const std::atomic_bool& stop_loading,
@ -133,7 +134,7 @@ public:
private: private:
class Impl; class Impl;
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;
Frontend::EmuWindow& emu_window; Frontend::EmuWindow& emu_window;
Driver& driver;
}; };
} // namespace OpenGL } // namespace OpenGL