video_core: Re-implement format reinterpretation

* Same as before but D24S8 to RGBA8 is switched to a compute shader which should provide better throughput and is much simpler to implement in Vulkan
This commit is contained in:
GPUCode
2022-10-07 11:35:08 +03:00
parent 2da4c9ca90
commit 7f7408b81e
16 changed files with 546 additions and 269 deletions

View File

@ -86,6 +86,8 @@ add_library(video_core STATIC
renderer_vulkan/renderer_vulkan.h
renderer_vulkan/vk_common.cpp
renderer_vulkan/vk_common.h
renderer_vulkan/vk_format_reinterpreter.cpp
renderer_vulkan/vk_format_reinterpreter.h
renderer_vulkan/vk_rasterizer.cpp
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_instance.cpp

View File

@ -186,7 +186,7 @@ constexpr u32 GetFormatBpp(PixelFormat format) {
}
constexpr u32 GetBytesPerPixel(PixelFormat format) {
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
// Modern GPUs need 4 bpp alignment for D24
if (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) {
return 4;
}

View File

@ -144,7 +144,7 @@ private:
bool IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval);
/// Attempt to find a reinterpretable surface in the cache and use it to copy for validation
bool ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params,
bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params,
SurfaceInterval interval);
/// Create a new surface
@ -547,7 +547,7 @@ auto RasterizerCache<T>::GetTextureSurface(const Pica::Texture::TextureInfo& inf
// Blit mipmaps that have been invalidated
SurfaceParams surface_params = *surface;
for (u32 level = 1; level <= max_level; ++level) {
for (u32 level = 1; level <= max_level; level++) {
// In PICA all mipmap levels are stored next to each other
surface_params.addr +=
surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8;
@ -1059,28 +1059,24 @@ bool RasterizerCache<T>::IntervalHasInvalidPixelFormat(SurfaceParams& params, Su
}
template <class T>
bool RasterizerCache<T>::ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params,
bool RasterizerCache<T>::ValidateByReinterpretation(const Surface& surface, SurfaceParams& params,
SurfaceInterval interval) {
/*const PixelFormat dst_format = surface->pixel_format;
const SurfaceType type = GetFormatType(dst_format);
for (auto& reinterpreter :
format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) {
const PixelFormat dest_format = surface->pixel_format;
for (const auto& reinterpreter : runtime.GetPossibleReinterpretations(dest_format)) {
params.pixel_format = reinterpreter->GetSourceFormat();
Surface reinterpret_surface =
FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
if (reinterpret_surface != nullptr) {
auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface);
if (reinterpret_surface) {
auto reinterpret_interval = reinterpret_surface->GetCopyableInterval(params);
auto reinterpret_params = surface->FromInterval(reinterpret_interval);
auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params);
auto dest_rect = surface->GetScaledSubRect(reinterpret_params);
reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, dest_rect);
reinterpreter->Reinterpret(*reinterpret_surface, src_rect, *surface, dest_rect);
return true;
}
}*/
}
return false;
}

View File

@ -5,27 +5,103 @@
#include "common/scope_exit.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
namespace OpenGL {
class RGBA4toRGB5A1 final : public FormatReinterpreterBase {
public:
RGBA4toRGB5A1() {
constexpr std::string_view vs_source = R"(
D24S8toRGBA8::D24S8toRGBA8(bool use_texture_view) : use_texture_view{use_texture_view} {
constexpr std::string_view cs_source = R"(
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
layout(binding = 0) uniform sampler2D depth;
layout(binding = 1) uniform usampler2D stencil;
layout(rgba8, binding = 2) uniform writeonly image2D color;
uniform mediump ivec2 src_offset;
void main() {
ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy);
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0));
}
)";
program.Create(cs_source);
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
}
void D24S8toRGBA8::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
const Surface& dest, VideoCore::Rect2D dst_rect) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = source.texture.handle;
// Use glTextureView on desktop to avoid intermediate copy
if (use_texture_view) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.texture.handle, GL_DEPTH24_STENCIL8, 0, 1,
0, 1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.draw.shader_program = program.handle;
state.Apply();
glBindImageTexture(2, dest.texture.handle, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
glActiveTexture(GL_TEXTURE1);
if (!use_texture_view) {
glCopyImageSubData(source.texture.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
src_rect.GetWidth(), src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDispatchCompute(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1);
if (use_texture_view) {
temp_tex.Release();
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
RGBA4toRGB5A1::RGBA4toRGB5A1() {
constexpr std::string_view vs_source = R"(
out vec2 dst_coord;
uniform mediump ivec2 dst_size;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
}
)";
constexpr std::string_view fs_source = R"(
constexpr std::string_view fs_source = R"(
in mediump vec2 dst_coord;
out lowp vec4 frag_color;
@ -36,225 +112,55 @@ uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0));
lowp ivec3 rgb5 =
((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F;
frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01);
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0));
lowp ivec3 rgb5 =
((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F;
frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01);
}
)";
program.Create(vs_source.data(), fs_source.data());
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
}
VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::RGBA4;
}
void Reinterpret(const OGLTexture& src_tex, Common::Rectangle<u32> src_rect,
const OGLTexture& dst_tex, Common::Rectangle<u32> dst_rect) override {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = src_tex.handle;
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
dst_tex.handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
private:
OGLProgram program;
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
OGLVertexArray vao;
};
class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
public:
ShaderD24S8toRGBA8() {
constexpr std::string_view vs_source = R"(
out vec2 dst_coord;
uniform mediump ivec2 dst_size;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
void main() {
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
}
)";
constexpr std::string_view fs_source = R"(
in mediump vec2 dst_coord;
out lowp vec4 frag_color;
uniform highp sampler2D depth;
uniform lowp usampler2D stencil;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
frag_color = vec4(components) / (exp2(8.0) - 1.0);
}
)";
program.Create(vs_source.data(), fs_source.data());
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
auto state = OpenGLState::GetCurState();
auto cur_program = state.draw.shader_program;
state.draw.shader_program = program.handle;
state.Apply();
glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
state.draw.shader_program = cur_program;
state.Apply();
// Nvidia seem to be the only one to support D24S8 views, at least on windows
// so for everyone else it will do an intermediate copy before running through the shader
std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor.find("NVIDIA") != vendor.npos) {
use_texture_view = true;
} else {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
}
}
VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::D24S8;
}
void Reinterpret(const OGLTexture& src_tex, Common::Rectangle<u32> src_rect,
const OGLTexture& dst_tex, Common::Rectangle<u32> dst_rect) override {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = src_tex.handle;
if (use_texture_view) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
glTextureView(temp_tex.handle, GL_TEXTURE_2D, src_tex.handle, GL_DEPTH24_STENCIL8, 0, 1,
0, 1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!use_texture_view) {
glCopyImageSubData(src_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
src_rect.GetWidth(), src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
dst_tex.handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (use_texture_view) {
temp_tex.Release();
}
}
private:
bool use_texture_view{};
OGLProgram program{};
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
OGLVertexArray vao{};
OGLTexture temp_tex{};
Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
};
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
read_fbo.Create();
draw_fbo.Create();
program.Create(vs_source.data(), fs_source.data());
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
}
auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(VideoCore::PixelFormat dst_format)
-> const ReinterpreterList& {
return reinterpreters[static_cast<u32>(dst_format)];
void RGBA4toRGB5A1::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
const Surface& dest, VideoCore::Rect2D dst_rect) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = source.texture.handle;
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
dest.texture.handle, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
} // namespace OpenGL

View File

@ -4,44 +4,60 @@
#pragma once
#include <unordered_map>
#include "common/math_util.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class RasterizerCacheOpenGL;
class Surface;
class FormatReinterpreterBase {
public:
FormatReinterpreterBase() {
read_fbo.Create();
draw_fbo.Create();
}
virtual ~FormatReinterpreterBase() = default;
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
virtual void Reinterpret(const OGLTexture& src_tex, Common::Rectangle<u32> src_rect,
const OGLTexture& dst_tex, Common::Rectangle<u32> dst_rect) = 0;
protected:
OGLFramebuffer read_fbo;
OGLFramebuffer draw_fbo;
virtual void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
const Surface& dest, VideoCore::Rect2D dst_rect) = 0;
};
using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
class FormatReinterpreterOpenGL : NonCopyable {
class D24S8toRGBA8 final : public FormatReinterpreterBase {
public:
FormatReinterpreterOpenGL();
~FormatReinterpreterOpenGL() = default;
D24S8toRGBA8(bool use_texture_view);
const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dst_format);
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::D24S8;
}
void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
const Surface& dest, VideoCore::Rect2D dst_rect) override;
private:
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
bool use_texture_view{};
OGLProgram program{};
GLint src_offset_loc{-1};
OGLTexture temp_tex{};
VideoCore::Rect2D temp_rect{0, 0, 0, 0};
};
class RGBA4toRGB5A1 final : public FormatReinterpreterBase {
public:
RGBA4toRGB5A1();
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::RGBA4;
}
void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
const Surface& dest, VideoCore::Rect2D dst_rect) override;
private:
OGLFramebuffer read_fbo;
OGLFramebuffer draw_fbo;
OGLProgram program;
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
OGLVertexArray vao;
};
} // namespace OpenGL

View File

@ -153,6 +153,14 @@ void OGLProgram::Create(const char* vert_shader, const char* frag_shader) {
Create(false, {vert.handle, frag.handle});
}
void OGLProgram::Create(const std::string_view compute_shader) {
OGLShader comp;
comp.Create(compute_shader.data(), GL_COMPUTE_SHADER);
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
Create(false, {comp.handle});
}
void OGLProgram::Release() {
if (handle == 0)
return;

View File

@ -5,6 +5,7 @@
#pragma once
#include <utility>
#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
@ -137,6 +138,9 @@ public:
/// Creates a new program from given shader soruce code
void Create(const char* vert_shader, const char* frag_shader);
/// Creates a new compute shader program
void Create(const std::string_view compute_shader);
/// Deletes the internal OpenGL resource
void Release();

View File

@ -14,19 +14,18 @@
namespace OpenGL {
GLuint LoadShader(const char* source, GLenum type) {
const std::string version = GLES ? R"(#version 320 es
const std::string version = GLES ? R"(
#version 320 es
#define CITRA_GLES
#if defined(GL_ANDROID_extension_pack_es31a)
#extension GL_ANDROID_extension_pack_es31a : enable
#endif // defined(GL_ANDROID_extension_pack_es31a)
#endif
#if defined(GL_EXT_clip_cull_distance)
#extension GL_EXT_clip_cull_distance : enable
#endif // defined(GL_EXT_clip_cull_distance)
)"
: "#version 430 core\n";
#endif
)" : "#version 430 core\n";
const char* debug_type;
switch (type) {
@ -39,6 +38,9 @@ GLuint LoadShader(const char* source, GLenum type) {
case GL_FRAGMENT_SHADER:
debug_type = "fragment";
break;
case GL_COMPUTE_SHADER:
debug_type = "compute";
break;
default:
UNREACHABLE();
}

View File

@ -6,6 +6,7 @@
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
#include "video_core/renderer_opengl/gl_state.h"
namespace OpenGL {
@ -54,10 +55,18 @@ GLbitfield MakeBufferMask(VideoCore::SurfaceType type) {
TextureRuntime::TextureRuntime(Driver& driver)
: driver{driver}, downloader_es{false},
filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()} {
filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()}{
read_fbo.Create();
draw_fbo.Create();
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<D24S8toRGBA8>(!driver.IsOpenGLES()));
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
}
const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
@ -140,7 +149,6 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload,
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type) {
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
const GLenum target =
type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
@ -302,6 +310,10 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
glGenerateMipmap(GL_TEXTURE_2D);
}
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const {
return reinterpreters[static_cast<u32>(dest_format)];
}
void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarget,
VideoCore::SurfaceType type, OGLTexture& texture) const {
const GLint framebuffer = target == GL_DRAW_FRAMEBUFFER ? draw_fbo.handle : read_fbo.handle;

View File

@ -7,7 +7,7 @@
#include <set>
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
#include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
#include "video_core/renderer_opengl/texture_downloader_es.h"
@ -92,6 +92,10 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface, u32 max_level);
/// Returns all source formats that support reinterpretation to the dest format
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
VideoCore::PixelFormat dest_format) const;
private:
/// Returns the framebuffer used for texture downloads
void BindFramebuffer(GLenum target, GLint level, GLenum textarget,
@ -116,6 +120,7 @@ private:
Driver& driver;
TextureDownloaderES downloader_es;
TextureFilterer filterer;
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
// Staging buffers stored in increasing size
std::multiset<StagingBuffer> upload_buffers;

View File

@ -0,0 +1,191 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#define VULKAN_HPP_NO_CONSTRUCTORS
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
#include "video_core/renderer_vulkan/vk_shader.h"
namespace Vulkan {
D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime)
: FormatReinterpreterBase{instance, scheduler, runtime}, device{instance.GetDevice()} {
constexpr std::string_view cs_source = R"(
#version 450 core
#extension GL_EXT_samplerless_texture_functions : require
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform texture2D depth;
layout(set = 0, binding = 1) uniform utexture2D stencil;
layout(set = 0, binding = 2, rgba8) uniform writeonly image2D color;
layout(push_constant, std140) uniform ComputeInfo {
mediump ivec2 src_offset;
};
void main() {
ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy);
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0));
}
)";
compute_shader = Compile(cs_source, vk::ShaderStageFlagBits::eCompute,
device, ShaderOptimization::High);
const std::array compute_layout_bindings = {
vk::DescriptorSetLayoutBinding{
.binding = 0,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute
},
vk::DescriptorSetLayoutBinding{
.binding = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute
},
vk::DescriptorSetLayoutBinding{
.binding = 2,
.descriptorType = vk::DescriptorType::eStorageImage,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute
}
};
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
.bindingCount = static_cast<u32>(compute_layout_bindings.size()),
.pBindings = compute_layout_bindings.data()
};
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
const std::array update_template_entries = {
vk::DescriptorUpdateTemplateEntry{
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = 0,
.stride = sizeof(vk::DescriptorImageInfo)
},
vk::DescriptorUpdateTemplateEntry{
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampledImage,
.offset = sizeof(vk::DescriptorImageInfo),
.stride = 0
},
vk::DescriptorUpdateTemplateEntry{
.dstBinding = 2,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eStorageImage,
.offset = 2 * sizeof(vk::DescriptorImageInfo),
.stride = 0
}
};
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
.pDescriptorUpdateEntries = update_template_entries.data(),
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
.descriptorSetLayout = descriptor_layout
};
update_template = device.createDescriptorUpdateTemplate(template_info);
const vk::PushConstantRange push_range = {
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.offset = 0,
.size = sizeof(Common::Vec2i),
};
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 1,
.pSetLayouts = &descriptor_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_range
};
compute_pipeline_layout = device.createPipelineLayout(layout_info);
const vk::DescriptorSetAllocateInfo alloc_info = {
.descriptorPool = scheduler.GetPersistentDescriptorPool(),
.descriptorSetCount = 1,
.pSetLayouts = &descriptor_layout
};
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
const vk::PipelineShaderStageCreateInfo compute_stage = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = compute_shader,
.pName = "main"
};
const vk::ComputePipelineCreateInfo compute_info = {
.stage = compute_stage,
.layout = compute_pipeline_layout
};
if (const auto result = device.createComputePipeline({}, compute_info);
result.result == vk::Result::eSuccess) {
compute_pipeline = result.value;
} else {
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
UNREACHABLE();
}
}
D24S8toRGBA8::~D24S8toRGBA8() {
device.destroyPipeline(compute_pipeline);
device.destroyPipelineLayout(compute_pipeline_layout);
device.destroyDescriptorUpdateTemplate(update_template);
device.destroyDescriptorSetLayout(descriptor_layout);
device.destroyShaderModule(compute_shader);
}
void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect,
Surface& dest, VideoCore::Rect2D dst_rect) {
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
runtime.Transition(command_buffer, source.alloc, vk::ImageLayout::eDepthStencilReadOnlyOptimal,
0, source.alloc.levels);
runtime.Transition(command_buffer, dest.alloc, vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
const std::array textures = {
vk::DescriptorImageInfo{
.imageView = source.GetDepthView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal
},
vk::DescriptorImageInfo{
.imageView = source.GetStencilView(),
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal
},
vk::DescriptorImageInfo{
.imageView = dest.GetImageView(),
.imageLayout = vk::ImageLayout::eGeneral
}
};
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
0, 1, &descriptor_set, 0, nullptr);
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom);
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute,
0, sizeof(Common::Vec2i), src_offset.AsArray());
command_buffer.dispatch(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1);
}
} // namespace Vulkan

View File

@ -0,0 +1,58 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/rasterizer_cache/utils.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Surface;
class Instance;
class TaskScheduler;
class TextureRuntime;
class FormatReinterpreterBase {
public:
FormatReinterpreterBase(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime)
: instance{instance}, scheduler{scheduler}, runtime{runtime} {}
virtual ~FormatReinterpreterBase() = default;
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
virtual void Reinterpret(Surface& source, VideoCore::Rect2D src_rect,
Surface& dest, VideoCore::Rect2D dst_rect) = 0;
protected:
const Instance& instance;
TaskScheduler& scheduler;
TextureRuntime& runtime;
};
using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
class D24S8toRGBA8 final : public FormatReinterpreterBase {
public:
D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime);
~D24S8toRGBA8();
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::D24S8;
}
void Reinterpret(Surface& source, VideoCore::Rect2D src_rect,
Surface& dest, VideoCore::Rect2D dst_rect) override;
private:
vk::Device device;
vk::Pipeline compute_pipeline;
vk::PipelineLayout compute_pipeline_layout;
vk::DescriptorSetLayout descriptor_layout;
vk::DescriptorSet descriptor_set;
vk::DescriptorUpdateTemplate update_template;
vk::ShaderModule compute_shader;
VideoCore::Rect2D temp_rect{0, 0, 0, 0};
};
} // namespace Vulkan

View File

@ -49,6 +49,8 @@ TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer)
.pPoolSizes = pool_sizes.data()
};
persistent_descriptor_pool = device.createDescriptorPool(descriptor_pool_info);
const vk::CommandBufferAllocateInfo buffer_info = {
.commandPool = command_pool,
.level = vk::CommandBufferLevel::ePrimary,
@ -93,6 +95,7 @@ TaskScheduler::~TaskScheduler() {
}
device.destroyCommandPool(command_pool);
device.destroyDescriptorPool(persistent_descriptor_pool);
}
void TaskScheduler::Synchronize(u32 slot) {

View File

@ -52,6 +52,11 @@ public:
return commands[current_command].descriptor_pool;
}
/// Returns the persistent descriptor pool
vk::DescriptorPool GetPersistentDescriptorPool() const {
return persistent_descriptor_pool;
}
/// Returns the index of the current command slot
u32 GetCurrentSlotIndex() const {
return current_command;
@ -92,6 +97,7 @@ private:
vk::CommandPool command_pool{};
vk::Semaphore timeline{};
vk::DescriptorPool persistent_descriptor_pool;
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
u32 current_command = 0;
};

View File

@ -40,6 +40,13 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule
vk::BufferUsageFlagBits::eTransferSrc |
vk::BufferUsageFlagBits::eTransferDst);
}
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<D24S8toRGBA8>(instance, scheduler, *this));
}
TextureRuntime::~TextureRuntime() {
@ -51,6 +58,10 @@ TextureRuntime::~TextureRuntime() {
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
device.destroyImageView(alloc.image_view);
device.destroyImageView(alloc.base_view);
if (alloc.depth_view) {
device.destroyImageView(alloc.depth_view);
device.destroyImageView(alloc.stencil_view);
}
}
for (const auto& [key, framebuffer] : clear_framebuffers) {
@ -175,10 +186,36 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
vk::ImageView image_view = device.createImageView(view_info);
vk::ImageView base_view = device.createImageView(base_view_info);
// Create seperate depth/stencil views in case this gets reinterpreted with a compute shader
vk::ImageView depth_view;
vk::ImageView stencil_view;
if (format == VideoCore::PixelFormat::D24S8) {
vk::ImageViewCreateInfo view_info = {
.image = image,
.viewType = type == VideoCore::TextureType::CubeMap ?
vk::ImageViewType::eCube :
vk::ImageViewType::e2D,
.format = vk_format,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eDepth,
.baseMipLevel = 0,
.levelCount = levels,
.baseArrayLayer = 0,
.layerCount = layers
}
};
depth_view = device.createImageView(view_info);
view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil;
stencil_view = device.createImageView(view_info);
}
return ImageAlloc{
.image = image,
.image_view = image_view,
.base_view = base_view,
.depth_view = depth_view,
.stencil_view = stencil_view,
.allocation = allocation,
.format = vk_format,
.aspect = aspect,
@ -440,6 +477,10 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
}
}
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const {
return reinterpreters[static_cast<u32>(dest_format)];
}
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
vk::ImageLayout new_layout, u32 level, u32 level_count,
u32 layer, u32 layer_count) {
@ -501,7 +542,13 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
case vk::ImageLayout::eGeneral:
info.access = vk::AccessFlagBits::eInputAttachmentRead;
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput |
vk::PipelineStageFlagBits::eFragmentShader;
vk::PipelineStageFlagBits::eFragmentShader |
vk::PipelineStageFlagBits::eComputeShader;
break;
case vk::ImageLayout::eDepthStencilReadOnlyOptimal:
// Image is going to be sampled from a compute shader
info.access = vk::AccessFlagBits::eShaderRead;
info.stage = vk::PipelineStageFlagBits::eComputeShader;
break;
default:
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);

View File

@ -9,6 +9,7 @@
#include "video_core/rasterizer_cache/rasterizer_cache.h"
#include "video_core/rasterizer_cache/surface_base.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
@ -25,6 +26,8 @@ struct ImageAlloc {
vk::Image image;
vk::ImageView image_view;
vk::ImageView base_view;
vk::ImageView depth_view;
vk::ImageView stencil_view;
VmaAllocation allocation;
vk::ImageUsageFlags usage;
vk::Format format;
@ -52,13 +55,13 @@ public:
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
/// Causes a GPU command flush
void Finish();
/// Allocates a vulkan image possibly resusing an existing one
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
VideoCore::TextureType type);
/// Causes a GPU command flush
void Finish();
/// Takes back ownership of the allocation for recycling
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
@ -84,6 +87,10 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface, u32 max_level);
/// Returns all source formats that support reinterpretation to the dest format
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
VideoCore::PixelFormat dest_format) const;
/// Performs operations that need to be done on every scheduler slot switch
void OnSlotSwitch(u32 new_slot);
@ -102,10 +109,12 @@ private:
const Instance& instance;
TaskScheduler& scheduler;
RenderpassCache& renderpass_cache;
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
std::unordered_multimap<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
ReinterpreterList list;
};
class Surface : public VideoCore::SurfaceBase<Surface> {
@ -137,6 +146,16 @@ public:
return alloc.base_view;
}
/// Returns the depth only image view of the surface, null otherwise
vk::ImageView GetDepthView() const {
return alloc.depth_view;
}
/// Returns the stencil only image view of the surface, null otherwise
vk::ImageView GetStencilView() const {
return alloc.stencil_view;
}
/// Returns the internal format of the allocated texture
vk::Format GetInternalFormat() const {
return alloc.format;
@ -156,6 +175,8 @@ private:
TextureRuntime& runtime;
const Instance& instance;
TaskScheduler& scheduler;
public:
ImageAlloc alloc{};
FormatTraits traits;
};