video_core: Re-implement format reinterpretation
* Same as before but D24S8 to RGBA8 is switched to a compute shader which should provide better throughput and is much simpler to implement in Vulkan
This commit is contained in:
@ -86,6 +86,8 @@ add_library(video_core STATIC
|
||||
renderer_vulkan/renderer_vulkan.h
|
||||
renderer_vulkan/vk_common.cpp
|
||||
renderer_vulkan/vk_common.h
|
||||
renderer_vulkan/vk_format_reinterpreter.cpp
|
||||
renderer_vulkan/vk_format_reinterpreter.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_instance.cpp
|
||||
|
@ -186,7 +186,7 @@ constexpr u32 GetFormatBpp(PixelFormat format) {
|
||||
}
|
||||
|
||||
constexpr u32 GetBytesPerPixel(PixelFormat format) {
|
||||
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
|
||||
// Modern GPUs need 4 bpp alignment for D24
|
||||
if (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) {
|
||||
return 4;
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ private:
|
||||
bool IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval);
|
||||
|
||||
/// Attempt to find a reinterpretable surface in the cache and use it to copy for validation
|
||||
bool ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params,
|
||||
bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params,
|
||||
SurfaceInterval interval);
|
||||
|
||||
/// Create a new surface
|
||||
@ -547,7 +547,7 @@ auto RasterizerCache<T>::GetTextureSurface(const Pica::Texture::TextureInfo& inf
|
||||
|
||||
// Blit mipmaps that have been invalidated
|
||||
SurfaceParams surface_params = *surface;
|
||||
for (u32 level = 1; level <= max_level; ++level) {
|
||||
for (u32 level = 1; level <= max_level; level++) {
|
||||
// In PICA all mipmap levels are stored next to each other
|
||||
surface_params.addr +=
|
||||
surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8;
|
||||
@ -1059,28 +1059,24 @@ bool RasterizerCache<T>::IntervalHasInvalidPixelFormat(SurfaceParams& params, Su
|
||||
}
|
||||
|
||||
template <class T>
|
||||
bool RasterizerCache<T>::ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params,
|
||||
bool RasterizerCache<T>::ValidateByReinterpretation(const Surface& surface, SurfaceParams& params,
|
||||
SurfaceInterval interval) {
|
||||
/*const PixelFormat dst_format = surface->pixel_format;
|
||||
const SurfaceType type = GetFormatType(dst_format);
|
||||
|
||||
for (auto& reinterpreter :
|
||||
format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) {
|
||||
|
||||
const PixelFormat dest_format = surface->pixel_format;
|
||||
for (const auto& reinterpreter : runtime.GetPossibleReinterpretations(dest_format)) {
|
||||
params.pixel_format = reinterpreter->GetSourceFormat();
|
||||
Surface reinterpret_surface =
|
||||
FindMatch<MatchFlags::Copy>(surface_cache, params, ScaleMatch::Ignore, interval);
|
||||
|
||||
if (reinterpret_surface != nullptr) {
|
||||
auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface);
|
||||
if (reinterpret_surface) {
|
||||
auto reinterpret_interval = reinterpret_surface->GetCopyableInterval(params);
|
||||
auto reinterpret_params = surface->FromInterval(reinterpret_interval);
|
||||
auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params);
|
||||
auto dest_rect = surface->GetScaledSubRect(reinterpret_params);
|
||||
|
||||
reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, dest_rect);
|
||||
reinterpreter->Reinterpret(*reinterpret_surface, src_rect, *surface, dest_rect);
|
||||
return true;
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -5,23 +5,99 @@
|
||||
#include "common/scope_exit.h"
|
||||
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_runtime.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class RGBA4toRGB5A1 final : public FormatReinterpreterBase {
|
||||
public:
|
||||
RGBA4toRGB5A1() {
|
||||
D24S8toRGBA8::D24S8toRGBA8(bool use_texture_view) : use_texture_view{use_texture_view} {
|
||||
constexpr std::string_view cs_source = R"(
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
layout(binding = 0) uniform sampler2D depth;
|
||||
layout(binding = 1) uniform usampler2D stencil;
|
||||
layout(rgba8, binding = 2) uniform writeonly image2D color;
|
||||
|
||||
uniform mediump ivec2 src_offset;
|
||||
|
||||
void main() {
|
||||
ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
highp uint depth_val =
|
||||
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
|
||||
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
|
||||
highp uvec4 components =
|
||||
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
|
||||
imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0));
|
||||
}
|
||||
|
||||
)";
|
||||
program.Create(cs_source);
|
||||
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
|
||||
}
|
||||
|
||||
void D24S8toRGBA8::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
|
||||
const Surface& dest, VideoCore::Rect2D dst_rect) {
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
OpenGLState state;
|
||||
state.texture_units[0].texture_2d = source.texture.handle;
|
||||
|
||||
// Use glTextureView on desktop to avoid intermediate copy
|
||||
if (use_texture_view) {
|
||||
temp_tex.Create();
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.texture.handle, GL_DEPTH24_STENCIL8, 0, 1,
|
||||
0, 1);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
} else {
|
||||
temp_tex.Release();
|
||||
temp_tex.Create();
|
||||
state.texture_units[1].texture_2d = temp_tex.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
temp_rect = src_rect;
|
||||
}
|
||||
|
||||
state.texture_units[1].texture_2d = temp_tex.handle;
|
||||
state.draw.shader_program = program.handle;
|
||||
state.Apply();
|
||||
|
||||
glBindImageTexture(2, dest.texture.handle, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
if (!use_texture_view) {
|
||||
glCopyImageSubData(source.texture.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
|
||||
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
|
||||
src_rect.GetWidth(), src_rect.GetHeight(), 1);
|
||||
}
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
|
||||
|
||||
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
|
||||
glDispatchCompute(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1);
|
||||
|
||||
if (use_texture_view) {
|
||||
temp_tex.Release();
|
||||
}
|
||||
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
}
|
||||
|
||||
RGBA4toRGB5A1::RGBA4toRGB5A1() {
|
||||
constexpr std::string_view vs_source = R"(
|
||||
out vec2 dst_coord;
|
||||
|
||||
uniform mediump ivec2 dst_size;
|
||||
|
||||
const vec2 vertices[4] =
|
||||
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
|
||||
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
|
||||
|
||||
void main() {
|
||||
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
|
||||
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
|
||||
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
|
||||
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
|
||||
}
|
||||
)";
|
||||
|
||||
@ -36,41 +112,38 @@ uniform mediump ivec2 src_size;
|
||||
uniform mediump ivec2 src_offset;
|
||||
|
||||
void main() {
|
||||
mediump ivec2 tex_coord;
|
||||
if (src_size == dst_size) {
|
||||
mediump ivec2 tex_coord;
|
||||
if (src_size == dst_size) {
|
||||
tex_coord = ivec2(dst_coord);
|
||||
} else {
|
||||
} else {
|
||||
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
|
||||
mediump int y = tex_index / src_size.x;
|
||||
tex_coord = ivec2(tex_index - y * src_size.x, y);
|
||||
}
|
||||
tex_coord -= src_offset;
|
||||
}
|
||||
tex_coord -= src_offset;
|
||||
|
||||
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0));
|
||||
lowp ivec3 rgb5 =
|
||||
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0));
|
||||
lowp ivec3 rgb5 =
|
||||
((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F;
|
||||
frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01);
|
||||
frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01);
|
||||
}
|
||||
)";
|
||||
|
||||
read_fbo.Create();
|
||||
draw_fbo.Create();
|
||||
program.Create(vs_source.data(), fs_source.data());
|
||||
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
|
||||
src_size_loc = glGetUniformLocation(program.handle, "src_size");
|
||||
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
|
||||
vao.Create();
|
||||
}
|
||||
}
|
||||
|
||||
VideoCore::PixelFormat GetSourceFormat() const override {
|
||||
return VideoCore::PixelFormat::RGBA4;
|
||||
}
|
||||
|
||||
void Reinterpret(const OGLTexture& src_tex, Common::Rectangle<u32> src_rect,
|
||||
const OGLTexture& dst_tex, Common::Rectangle<u32> dst_rect) override {
|
||||
void RGBA4toRGB5A1::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
|
||||
const Surface& dest, VideoCore::Rect2D dst_rect) {
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
OpenGLState state;
|
||||
state.texture_units[0].texture_2d = src_tex.handle;
|
||||
state.texture_units[0].texture_2d = source.texture.handle;
|
||||
state.draw.draw_framebuffer = draw_fbo.handle;
|
||||
state.draw.shader_program = program.handle;
|
||||
state.draw.vertex_array = vao.handle;
|
||||
@ -80,7 +153,7 @@ void main() {
|
||||
state.Apply();
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||
dst_tex.handle, 0);
|
||||
dest.texture.handle, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
@ -88,173 +161,6 @@ void main() {
|
||||
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
|
||||
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
}
|
||||
|
||||
private:
|
||||
OGLProgram program;
|
||||
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
|
||||
OGLVertexArray vao;
|
||||
};
|
||||
|
||||
class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
|
||||
public:
|
||||
ShaderD24S8toRGBA8() {
|
||||
constexpr std::string_view vs_source = R"(
|
||||
out vec2 dst_coord;
|
||||
|
||||
uniform mediump ivec2 dst_size;
|
||||
|
||||
const vec2 vertices[4] =
|
||||
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));
|
||||
|
||||
void main() {
|
||||
gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0);
|
||||
dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size);
|
||||
}
|
||||
)";
|
||||
|
||||
constexpr std::string_view fs_source = R"(
|
||||
in mediump vec2 dst_coord;
|
||||
|
||||
out lowp vec4 frag_color;
|
||||
|
||||
uniform highp sampler2D depth;
|
||||
uniform lowp usampler2D stencil;
|
||||
uniform mediump ivec2 dst_size;
|
||||
uniform mediump ivec2 src_size;
|
||||
uniform mediump ivec2 src_offset;
|
||||
|
||||
void main() {
|
||||
mediump ivec2 tex_coord;
|
||||
if (src_size == dst_size) {
|
||||
tex_coord = ivec2(dst_coord);
|
||||
} else {
|
||||
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
|
||||
mediump int y = tex_index / src_size.x;
|
||||
tex_coord = ivec2(tex_index - y * src_size.x, y);
|
||||
}
|
||||
tex_coord -= src_offset;
|
||||
|
||||
highp uint depth_val =
|
||||
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
|
||||
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
|
||||
highp uvec4 components =
|
||||
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
|
||||
frag_color = vec4(components) / (exp2(8.0) - 1.0);
|
||||
}
|
||||
)";
|
||||
|
||||
program.Create(vs_source.data(), fs_source.data());
|
||||
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
|
||||
src_size_loc = glGetUniformLocation(program.handle, "src_size");
|
||||
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
|
||||
vao.Create();
|
||||
|
||||
auto state = OpenGLState::GetCurState();
|
||||
auto cur_program = state.draw.shader_program;
|
||||
state.draw.shader_program = program.handle;
|
||||
state.Apply();
|
||||
glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
|
||||
state.draw.shader_program = cur_program;
|
||||
state.Apply();
|
||||
|
||||
// Nvidia seem to be the only one to support D24S8 views, at least on windows
|
||||
// so for everyone else it will do an intermediate copy before running through the shader
|
||||
std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
|
||||
if (vendor.find("NVIDIA") != vendor.npos) {
|
||||
use_texture_view = true;
|
||||
} else {
|
||||
LOG_INFO(Render_OpenGL,
|
||||
"Texture views are unsupported, reinterpretation will do intermediate copy");
|
||||
temp_tex.Create();
|
||||
}
|
||||
}
|
||||
|
||||
VideoCore::PixelFormat GetSourceFormat() const override {
|
||||
return VideoCore::PixelFormat::D24S8;
|
||||
}
|
||||
|
||||
void Reinterpret(const OGLTexture& src_tex, Common::Rectangle<u32> src_rect,
|
||||
const OGLTexture& dst_tex, Common::Rectangle<u32> dst_rect) override {
|
||||
OpenGLState prev_state = OpenGLState::GetCurState();
|
||||
SCOPE_EXIT({ prev_state.Apply(); });
|
||||
|
||||
OpenGLState state;
|
||||
state.texture_units[0].texture_2d = src_tex.handle;
|
||||
|
||||
if (use_texture_view) {
|
||||
temp_tex.Create();
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glTextureView(temp_tex.handle, GL_TEXTURE_2D, src_tex.handle, GL_DEPTH24_STENCIL8, 0, 1,
|
||||
0, 1);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
} else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
|
||||
temp_tex.Release();
|
||||
temp_tex.Create();
|
||||
state.texture_units[1].texture_2d = temp_tex.handle;
|
||||
state.Apply();
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
|
||||
temp_rect = src_rect;
|
||||
}
|
||||
|
||||
state.texture_units[1].texture_2d = temp_tex.handle;
|
||||
state.draw.draw_framebuffer = draw_fbo.handle;
|
||||
state.draw.shader_program = program.handle;
|
||||
state.draw.vertex_array = vao.handle;
|
||||
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
|
||||
static_cast<GLsizei>(dst_rect.GetWidth()),
|
||||
static_cast<GLsizei>(dst_rect.GetHeight())};
|
||||
state.Apply();
|
||||
|
||||
glActiveTexture(GL_TEXTURE1);
|
||||
if (!use_texture_view) {
|
||||
glCopyImageSubData(src_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
|
||||
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
|
||||
src_rect.GetWidth(), src_rect.GetHeight(), 1);
|
||||
}
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
|
||||
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
|
||||
dst_tex.handle, 0);
|
||||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
|
||||
0);
|
||||
|
||||
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
|
||||
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
|
||||
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
||||
if (use_texture_view) {
|
||||
temp_tex.Release();
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
bool use_texture_view{};
|
||||
OGLProgram program{};
|
||||
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
|
||||
OGLVertexArray vao{};
|
||||
OGLTexture temp_tex{};
|
||||
Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
|
||||
};
|
||||
|
||||
FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() {
|
||||
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
const u32 dst_index = static_cast<u32>(dest);
|
||||
return reinterpreters[dst_index].push_back(std::move(obj));
|
||||
};
|
||||
|
||||
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
|
||||
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
|
||||
}
|
||||
|
||||
auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(VideoCore::PixelFormat dst_format)
|
||||
-> const ReinterpreterList& {
|
||||
return reinterpreters[static_cast<u32>(dst_format)];
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -4,44 +4,60 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include "common/math_util.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class RasterizerCacheOpenGL;
|
||||
class Surface;
|
||||
|
||||
class FormatReinterpreterBase {
|
||||
public:
|
||||
FormatReinterpreterBase() {
|
||||
read_fbo.Create();
|
||||
draw_fbo.Create();
|
||||
}
|
||||
|
||||
virtual ~FormatReinterpreterBase() = default;
|
||||
|
||||
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
|
||||
virtual void Reinterpret(const OGLTexture& src_tex, Common::Rectangle<u32> src_rect,
|
||||
const OGLTexture& dst_tex, Common::Rectangle<u32> dst_rect) = 0;
|
||||
|
||||
protected:
|
||||
OGLFramebuffer read_fbo;
|
||||
OGLFramebuffer draw_fbo;
|
||||
virtual void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
|
||||
const Surface& dest, VideoCore::Rect2D dst_rect) = 0;
|
||||
};
|
||||
|
||||
using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
|
||||
|
||||
class FormatReinterpreterOpenGL : NonCopyable {
|
||||
class D24S8toRGBA8 final : public FormatReinterpreterBase {
|
||||
public:
|
||||
FormatReinterpreterOpenGL();
|
||||
~FormatReinterpreterOpenGL() = default;
|
||||
D24S8toRGBA8(bool use_texture_view);
|
||||
|
||||
const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dst_format);
|
||||
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
|
||||
return VideoCore::PixelFormat::D24S8;
|
||||
}
|
||||
|
||||
void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
|
||||
const Surface& dest, VideoCore::Rect2D dst_rect) override;
|
||||
|
||||
private:
|
||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||
bool use_texture_view{};
|
||||
OGLProgram program{};
|
||||
GLint src_offset_loc{-1};
|
||||
OGLTexture temp_tex{};
|
||||
VideoCore::Rect2D temp_rect{0, 0, 0, 0};
|
||||
};
|
||||
|
||||
class RGBA4toRGB5A1 final : public FormatReinterpreterBase {
|
||||
public:
|
||||
RGBA4toRGB5A1();
|
||||
|
||||
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
|
||||
return VideoCore::PixelFormat::RGBA4;
|
||||
}
|
||||
|
||||
void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect,
|
||||
const Surface& dest, VideoCore::Rect2D dst_rect) override;
|
||||
|
||||
private:
|
||||
OGLFramebuffer read_fbo;
|
||||
OGLFramebuffer draw_fbo;
|
||||
OGLProgram program;
|
||||
GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1};
|
||||
OGLVertexArray vao;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
@ -153,6 +153,14 @@ void OGLProgram::Create(const char* vert_shader, const char* frag_shader) {
|
||||
Create(false, {vert.handle, frag.handle});
|
||||
}
|
||||
|
||||
void OGLProgram::Create(const std::string_view compute_shader) {
|
||||
OGLShader comp;
|
||||
comp.Create(compute_shader.data(), GL_COMPUTE_SHADER);
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
Create(false, {comp.handle});
|
||||
}
|
||||
|
||||
void OGLProgram::Release() {
|
||||
if (handle == 0)
|
||||
return;
|
||||
|
@ -5,6 +5,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <utility>
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
@ -137,6 +138,9 @@ public:
|
||||
/// Creates a new program from given shader soruce code
|
||||
void Create(const char* vert_shader, const char* frag_shader);
|
||||
|
||||
/// Creates a new compute shader program
|
||||
void Create(const std::string_view compute_shader);
|
||||
|
||||
/// Deletes the internal OpenGL resource
|
||||
void Release();
|
||||
|
||||
|
@ -14,19 +14,18 @@
|
||||
namespace OpenGL {
|
||||
|
||||
GLuint LoadShader(const char* source, GLenum type) {
|
||||
const std::string version = GLES ? R"(#version 320 es
|
||||
|
||||
const std::string version = GLES ? R"(
|
||||
#version 320 es
|
||||
#define CITRA_GLES
|
||||
|
||||
#if defined(GL_ANDROID_extension_pack_es31a)
|
||||
#extension GL_ANDROID_extension_pack_es31a : enable
|
||||
#endif // defined(GL_ANDROID_extension_pack_es31a)
|
||||
#endif
|
||||
|
||||
#if defined(GL_EXT_clip_cull_distance)
|
||||
#extension GL_EXT_clip_cull_distance : enable
|
||||
#endif // defined(GL_EXT_clip_cull_distance)
|
||||
)"
|
||||
: "#version 430 core\n";
|
||||
#endif
|
||||
)" : "#version 430 core\n";
|
||||
|
||||
const char* debug_type;
|
||||
switch (type) {
|
||||
@ -39,6 +38,9 @@ GLuint LoadShader(const char* source, GLenum type) {
|
||||
case GL_FRAGMENT_SHADER:
|
||||
debug_type = "fragment";
|
||||
break;
|
||||
case GL_COMPUTE_SHADER:
|
||||
debug_type = "compute";
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_runtime.h"
|
||||
#include "video_core/renderer_opengl/gl_driver.h"
|
||||
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
|
||||
namespace OpenGL {
|
||||
@ -54,10 +55,18 @@ GLbitfield MakeBufferMask(VideoCore::SurfaceType type) {
|
||||
|
||||
TextureRuntime::TextureRuntime(Driver& driver)
|
||||
: driver{driver}, downloader_es{false},
|
||||
filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()} {
|
||||
filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()}{
|
||||
|
||||
read_fbo.Create();
|
||||
draw_fbo.Create();
|
||||
|
||||
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
const u32 dst_index = static_cast<u32>(dest);
|
||||
return reinterpreters[dst_index].push_back(std::move(obj));
|
||||
};
|
||||
|
||||
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<D24S8toRGBA8>(!driver.IsOpenGLES()));
|
||||
Register(VideoCore::PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
|
||||
}
|
||||
|
||||
const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
@ -140,7 +149,6 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload,
|
||||
|
||||
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
|
||||
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
const GLenum target =
|
||||
type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D;
|
||||
@ -302,6 +310,10 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
|
||||
glGenerateMipmap(GL_TEXTURE_2D);
|
||||
}
|
||||
|
||||
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const {
|
||||
return reinterpreters[static_cast<u32>(dest_format)];
|
||||
}
|
||||
|
||||
void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarget,
|
||||
VideoCore::SurfaceType type, OGLTexture& texture) const {
|
||||
const GLint framebuffer = target == GL_DRAW_FRAMEBUFFER ? draw_fbo.handle : read_fbo.handle;
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <set>
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
|
||||
#include "video_core/renderer_opengl/texture_filters/texture_filterer.h"
|
||||
#include "video_core/renderer_opengl/texture_downloader_es.h"
|
||||
|
||||
@ -92,6 +92,10 @@ public:
|
||||
/// Generates mipmaps for all the available levels of the texture
|
||||
void GenerateMipmaps(Surface& surface, u32 max_level);
|
||||
|
||||
/// Returns all source formats that support reinterpretation to the dest format
|
||||
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
||||
VideoCore::PixelFormat dest_format) const;
|
||||
|
||||
private:
|
||||
/// Returns the framebuffer used for texture downloads
|
||||
void BindFramebuffer(GLenum target, GLint level, GLenum textarget,
|
||||
@ -116,6 +120,7 @@ private:
|
||||
Driver& driver;
|
||||
TextureDownloaderES downloader_es;
|
||||
TextureFilterer filterer;
|
||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||
|
||||
// Staging buffers stored in increasing size
|
||||
std::multiset<StagingBuffer> upload_buffers;
|
||||
|
191
src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp
Normal file
191
src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp
Normal file
@ -0,0 +1,191 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS
|
||||
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_runtime.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime)
|
||||
: FormatReinterpreterBase{instance, scheduler, runtime}, device{instance.GetDevice()} {
|
||||
constexpr std::string_view cs_source = R"(
|
||||
#version 450 core
|
||||
#extension GL_EXT_samplerless_texture_functions : require
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
layout(set = 0, binding = 0) uniform texture2D depth;
|
||||
layout(set = 0, binding = 1) uniform utexture2D stencil;
|
||||
layout(set = 0, binding = 2, rgba8) uniform writeonly image2D color;
|
||||
|
||||
layout(push_constant, std140) uniform ComputeInfo {
|
||||
mediump ivec2 src_offset;
|
||||
};
|
||||
|
||||
void main() {
|
||||
ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
highp uint depth_val =
|
||||
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
|
||||
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
|
||||
highp uvec4 components =
|
||||
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
|
||||
imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0));
|
||||
}
|
||||
|
||||
)";
|
||||
compute_shader = Compile(cs_source, vk::ShaderStageFlagBits::eCompute,
|
||||
device, ShaderOptimization::High);
|
||||
|
||||
const std::array compute_layout_bindings = {
|
||||
vk::DescriptorSetLayoutBinding{
|
||||
.binding = 0,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute
|
||||
},
|
||||
vk::DescriptorSetLayoutBinding{
|
||||
.binding = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute
|
||||
},
|
||||
vk::DescriptorSetLayoutBinding{
|
||||
.binding = 2,
|
||||
.descriptorType = vk::DescriptorType::eStorageImage,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute
|
||||
}
|
||||
};
|
||||
|
||||
const vk::DescriptorSetLayoutCreateInfo compute_layout_info = {
|
||||
.bindingCount = static_cast<u32>(compute_layout_bindings.size()),
|
||||
.pBindings = compute_layout_bindings.data()
|
||||
};
|
||||
|
||||
descriptor_layout = device.createDescriptorSetLayout(compute_layout_info);
|
||||
|
||||
const std::array update_template_entries = {
|
||||
vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.offset = 0,
|
||||
.stride = sizeof(vk::DescriptorImageInfo)
|
||||
},
|
||||
vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampledImage,
|
||||
.offset = sizeof(vk::DescriptorImageInfo),
|
||||
.stride = 0
|
||||
},
|
||||
vk::DescriptorUpdateTemplateEntry{
|
||||
.dstBinding = 2,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eStorageImage,
|
||||
.offset = 2 * sizeof(vk::DescriptorImageInfo),
|
||||
.stride = 0
|
||||
}
|
||||
};
|
||||
|
||||
const vk::DescriptorUpdateTemplateCreateInfo template_info = {
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(update_template_entries.size()),
|
||||
.pDescriptorUpdateEntries = update_template_entries.data(),
|
||||
.templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet,
|
||||
.descriptorSetLayout = descriptor_layout
|
||||
};
|
||||
|
||||
update_template = device.createDescriptorUpdateTemplate(template_info);
|
||||
|
||||
const vk::PushConstantRange push_range = {
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.offset = 0,
|
||||
.size = sizeof(Common::Vec2i),
|
||||
};
|
||||
|
||||
const vk::PipelineLayoutCreateInfo layout_info = {
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &descriptor_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &push_range
|
||||
};
|
||||
|
||||
compute_pipeline_layout = device.createPipelineLayout(layout_info);
|
||||
|
||||
const vk::DescriptorSetAllocateInfo alloc_info = {
|
||||
.descriptorPool = scheduler.GetPersistentDescriptorPool(),
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &descriptor_layout
|
||||
};
|
||||
|
||||
descriptor_set = device.allocateDescriptorSets(alloc_info)[0];
|
||||
|
||||
const vk::PipelineShaderStageCreateInfo compute_stage = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = compute_shader,
|
||||
.pName = "main"
|
||||
};
|
||||
|
||||
const vk::ComputePipelineCreateInfo compute_info = {
|
||||
.stage = compute_stage,
|
||||
.layout = compute_pipeline_layout
|
||||
};
|
||||
|
||||
if (const auto result = device.createComputePipeline({}, compute_info);
|
||||
result.result == vk::Result::eSuccess) {
|
||||
compute_pipeline = result.value;
|
||||
} else {
|
||||
LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!");
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
D24S8toRGBA8::~D24S8toRGBA8() {
|
||||
device.destroyPipeline(compute_pipeline);
|
||||
device.destroyPipelineLayout(compute_pipeline_layout);
|
||||
device.destroyDescriptorUpdateTemplate(update_template);
|
||||
device.destroyDescriptorSetLayout(descriptor_layout);
|
||||
device.destroyShaderModule(compute_shader);
|
||||
}
|
||||
|
||||
void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect,
|
||||
Surface& dest, VideoCore::Rect2D dst_rect) {
|
||||
vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer();
|
||||
runtime.Transition(command_buffer, source.alloc, vk::ImageLayout::eDepthStencilReadOnlyOptimal,
|
||||
0, source.alloc.levels);
|
||||
runtime.Transition(command_buffer, dest.alloc, vk::ImageLayout::eGeneral, 0, dest.alloc.levels);
|
||||
|
||||
const std::array textures = {
|
||||
vk::DescriptorImageInfo{
|
||||
.imageView = source.GetDepthView(),
|
||||
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal
|
||||
},
|
||||
vk::DescriptorImageInfo{
|
||||
.imageView = source.GetStencilView(),
|
||||
.imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal
|
||||
},
|
||||
vk::DescriptorImageInfo{
|
||||
.imageView = dest.GetImageView(),
|
||||
.imageLayout = vk::ImageLayout::eGeneral
|
||||
}
|
||||
};
|
||||
|
||||
device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]);
|
||||
command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout,
|
||||
0, 1, &descriptor_set, 0, nullptr);
|
||||
|
||||
command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline);
|
||||
|
||||
const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom);
|
||||
command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute,
|
||||
0, sizeof(Common::Vec2i), src_offset.AsArray());
|
||||
|
||||
command_buffer.dispatch(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
58
src/video_core/renderer_vulkan/vk_format_reinterpreter.h
Normal file
58
src/video_core/renderer_vulkan/vk_format_reinterpreter.h
Normal file
@ -0,0 +1,58 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Surface;
|
||||
class Instance;
|
||||
class TaskScheduler;
|
||||
class TextureRuntime;
|
||||
|
||||
class FormatReinterpreterBase {
|
||||
public:
|
||||
FormatReinterpreterBase(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime)
|
||||
: instance{instance}, scheduler{scheduler}, runtime{runtime} {}
|
||||
virtual ~FormatReinterpreterBase() = default;
|
||||
|
||||
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
|
||||
virtual void Reinterpret(Surface& source, VideoCore::Rect2D src_rect,
|
||||
Surface& dest, VideoCore::Rect2D dst_rect) = 0;
|
||||
|
||||
protected:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
TextureRuntime& runtime;
|
||||
};
|
||||
|
||||
using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
|
||||
|
||||
class D24S8toRGBA8 final : public FormatReinterpreterBase {
|
||||
public:
|
||||
D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime);
|
||||
~D24S8toRGBA8();
|
||||
|
||||
[[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override {
|
||||
return VideoCore::PixelFormat::D24S8;
|
||||
}
|
||||
|
||||
void Reinterpret(Surface& source, VideoCore::Rect2D src_rect,
|
||||
Surface& dest, VideoCore::Rect2D dst_rect) override;
|
||||
|
||||
private:
|
||||
vk::Device device;
|
||||
vk::Pipeline compute_pipeline;
|
||||
vk::PipelineLayout compute_pipeline_layout;
|
||||
vk::DescriptorSetLayout descriptor_layout;
|
||||
vk::DescriptorSet descriptor_set;
|
||||
vk::DescriptorUpdateTemplate update_template;
|
||||
vk::ShaderModule compute_shader;
|
||||
VideoCore::Rect2D temp_rect{0, 0, 0, 0};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
@ -49,6 +49,8 @@ TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer)
|
||||
.pPoolSizes = pool_sizes.data()
|
||||
};
|
||||
|
||||
persistent_descriptor_pool = device.createDescriptorPool(descriptor_pool_info);
|
||||
|
||||
const vk::CommandBufferAllocateInfo buffer_info = {
|
||||
.commandPool = command_pool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
@ -93,6 +95,7 @@ TaskScheduler::~TaskScheduler() {
|
||||
}
|
||||
|
||||
device.destroyCommandPool(command_pool);
|
||||
device.destroyDescriptorPool(persistent_descriptor_pool);
|
||||
}
|
||||
|
||||
void TaskScheduler::Synchronize(u32 slot) {
|
||||
|
@ -52,6 +52,11 @@ public:
|
||||
return commands[current_command].descriptor_pool;
|
||||
}
|
||||
|
||||
/// Returns the persistent descriptor pool
|
||||
vk::DescriptorPool GetPersistentDescriptorPool() const {
|
||||
return persistent_descriptor_pool;
|
||||
}
|
||||
|
||||
/// Returns the index of the current command slot
|
||||
u32 GetCurrentSlotIndex() const {
|
||||
return current_command;
|
||||
@ -92,6 +97,7 @@ private:
|
||||
|
||||
vk::CommandPool command_pool{};
|
||||
vk::Semaphore timeline{};
|
||||
vk::DescriptorPool persistent_descriptor_pool;
|
||||
std::array<ExecutionSlot, SCHEDULER_COMMAND_COUNT> commands{};
|
||||
u32 current_command = 0;
|
||||
};
|
||||
|
@ -40,6 +40,13 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule
|
||||
vk::BufferUsageFlagBits::eTransferSrc |
|
||||
vk::BufferUsageFlagBits::eTransferDst);
|
||||
}
|
||||
|
||||
auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr<FormatReinterpreterBase>&& obj) {
|
||||
const u32 dst_index = static_cast<u32>(dest);
|
||||
return reinterpreters[dst_index].push_back(std::move(obj));
|
||||
};
|
||||
|
||||
Register(VideoCore::PixelFormat::RGBA8, std::make_unique<D24S8toRGBA8>(instance, scheduler, *this));
|
||||
}
|
||||
|
||||
TextureRuntime::~TextureRuntime() {
|
||||
@ -51,6 +58,10 @@ TextureRuntime::~TextureRuntime() {
|
||||
vmaDestroyImage(allocator, alloc.image, alloc.allocation);
|
||||
device.destroyImageView(alloc.image_view);
|
||||
device.destroyImageView(alloc.base_view);
|
||||
if (alloc.depth_view) {
|
||||
device.destroyImageView(alloc.depth_view);
|
||||
device.destroyImageView(alloc.stencil_view);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& [key, framebuffer] : clear_framebuffers) {
|
||||
@ -175,10 +186,36 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma
|
||||
vk::ImageView image_view = device.createImageView(view_info);
|
||||
vk::ImageView base_view = device.createImageView(base_view_info);
|
||||
|
||||
// Create seperate depth/stencil views in case this gets reinterpreted with a compute shader
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
if (format == VideoCore::PixelFormat::D24S8) {
|
||||
vk::ImageViewCreateInfo view_info = {
|
||||
.image = image,
|
||||
.viewType = type == VideoCore::TextureType::CubeMap ?
|
||||
vk::ImageViewType::eCube :
|
||||
vk::ImageViewType::e2D,
|
||||
.format = vk_format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eDepth,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = levels,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = layers
|
||||
}
|
||||
};
|
||||
|
||||
depth_view = device.createImageView(view_info);
|
||||
view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil;
|
||||
stencil_view = device.createImageView(view_info);
|
||||
}
|
||||
|
||||
return ImageAlloc{
|
||||
.image = image,
|
||||
.image_view = image_view,
|
||||
.base_view = base_view,
|
||||
.depth_view = depth_view,
|
||||
.stencil_view = stencil_view,
|
||||
.allocation = allocation,
|
||||
.format = vk_format,
|
||||
.aspect = aspect,
|
||||
@ -440,6 +477,10 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) {
|
||||
}
|
||||
}
|
||||
|
||||
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const {
|
||||
return reinterpreters[static_cast<u32>(dest_format)];
|
||||
}
|
||||
|
||||
void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc,
|
||||
vk::ImageLayout new_layout, u32 level, u32 level_count,
|
||||
u32 layer, u32 layer_count) {
|
||||
@ -501,7 +542,13 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al
|
||||
case vk::ImageLayout::eGeneral:
|
||||
info.access = vk::AccessFlagBits::eInputAttachmentRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput |
|
||||
vk::PipelineStageFlagBits::eFragmentShader;
|
||||
vk::PipelineStageFlagBits::eFragmentShader |
|
||||
vk::PipelineStageFlagBits::eComputeShader;
|
||||
break;
|
||||
case vk::ImageLayout::eDepthStencilReadOnlyOptimal:
|
||||
// Image is going to be sampled from a compute shader
|
||||
info.access = vk::AccessFlagBits::eShaderRead;
|
||||
info.stage = vk::PipelineStageFlagBits::eComputeShader;
|
||||
break;
|
||||
default:
|
||||
LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout);
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "video_core/rasterizer_cache/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_cache/surface_base.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_format_reinterpreter.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_task_scheduler.h"
|
||||
|
||||
@ -25,6 +26,8 @@ struct ImageAlloc {
|
||||
vk::Image image;
|
||||
vk::ImageView image_view;
|
||||
vk::ImageView base_view;
|
||||
vk::ImageView depth_view;
|
||||
vk::ImageView stencil_view;
|
||||
VmaAllocation allocation;
|
||||
vk::ImageUsageFlags usage;
|
||||
vk::Format format;
|
||||
@ -52,13 +55,13 @@ public:
|
||||
/// Maps an internal staging buffer of the provided size of pixel uploads/downloads
|
||||
[[nodiscard]] StagingData FindStaging(u32 size, bool upload);
|
||||
|
||||
/// Causes a GPU command flush
|
||||
void Finish();
|
||||
|
||||
/// Allocates a vulkan image possibly resusing an existing one
|
||||
[[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type);
|
||||
|
||||
/// Causes a GPU command flush
|
||||
void Finish();
|
||||
|
||||
/// Takes back ownership of the allocation for recycling
|
||||
void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc);
|
||||
|
||||
@ -84,6 +87,10 @@ public:
|
||||
/// Generates mipmaps for all the available levels of the texture
|
||||
void GenerateMipmaps(Surface& surface, u32 max_level);
|
||||
|
||||
/// Returns all source formats that support reinterpretation to the dest format
|
||||
[[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations(
|
||||
VideoCore::PixelFormat dest_format) const;
|
||||
|
||||
/// Performs operations that need to be done on every scheduler slot switch
|
||||
void OnSlotSwitch(u32 new_slot);
|
||||
|
||||
@ -102,10 +109,12 @@ private:
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
RenderpassCache& renderpass_cache;
|
||||
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
|
||||
std::array<std::unique_ptr<StagingBuffer>, SCHEDULER_COMMAND_COUNT> staging_buffers;
|
||||
std::array<u32, SCHEDULER_COMMAND_COUNT> staging_offsets{};
|
||||
std::unordered_multimap<VideoCore::HostTextureTag, ImageAlloc> texture_recycler;
|
||||
std::unordered_map<vk::ImageView, vk::Framebuffer> clear_framebuffers;
|
||||
ReinterpreterList list;
|
||||
};
|
||||
|
||||
class Surface : public VideoCore::SurfaceBase<Surface> {
|
||||
@ -137,6 +146,16 @@ public:
|
||||
return alloc.base_view;
|
||||
}
|
||||
|
||||
/// Returns the depth only image view of the surface, null otherwise
|
||||
vk::ImageView GetDepthView() const {
|
||||
return alloc.depth_view;
|
||||
}
|
||||
|
||||
/// Returns the stencil only image view of the surface, null otherwise
|
||||
vk::ImageView GetStencilView() const {
|
||||
return alloc.stencil_view;
|
||||
}
|
||||
|
||||
/// Returns the internal format of the allocated texture
|
||||
vk::Format GetInternalFormat() const {
|
||||
return alloc.format;
|
||||
@ -156,6 +175,8 @@ private:
|
||||
TextureRuntime& runtime;
|
||||
const Instance& instance;
|
||||
TaskScheduler& scheduler;
|
||||
|
||||
public:
|
||||
ImageAlloc alloc{};
|
||||
FormatTraits traits;
|
||||
};
|
||||
|
Reference in New Issue
Block a user