diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index ad47a8f24..f0f27fbda 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -86,6 +86,8 @@ add_library(video_core STATIC renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_common.cpp renderer_vulkan/vk_common.h + renderer_vulkan/vk_format_reinterpreter.cpp + renderer_vulkan/vk_format_reinterpreter.h renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_instance.cpp diff --git a/src/video_core/rasterizer_cache/pixel_format.h b/src/video_core/rasterizer_cache/pixel_format.h index b6b69995d..50f1bcfaf 100644 --- a/src/video_core/rasterizer_cache/pixel_format.h +++ b/src/video_core/rasterizer_cache/pixel_format.h @@ -186,7 +186,7 @@ constexpr u32 GetFormatBpp(PixelFormat format) { } constexpr u32 GetBytesPerPixel(PixelFormat format) { - // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + // Modern GPUs need 4 bpp alignment for D24 if (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) { return 4; } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 3055f9e2d..0532b4889 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -144,7 +144,7 @@ private: bool IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval); /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation - bool ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params, + bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, SurfaceInterval interval); /// Create a new surface @@ -547,7 +547,7 @@ auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& inf // Blit mipmaps that have been invalidated SurfaceParams surface_params = *surface; - for (u32 level = 1; level <= max_level; ++level) { + for (u32 level = 1; level <= max_level; level++) { // In PICA all mipmap levels are stored next to each other surface_params.addr += surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; @@ -1059,28 +1059,24 @@ bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, Su } template -bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params, +bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, SurfaceInterval interval) { - /*const PixelFormat dst_format = surface->pixel_format; - const SurfaceType type = GetFormatType(dst_format); - - for (auto& reinterpreter : - format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) { - + const PixelFormat dest_format = surface->pixel_format; + for (const auto& reinterpreter : runtime.GetPossibleReinterpretations(dest_format)) { params.pixel_format = reinterpreter->GetSourceFormat(); Surface reinterpret_surface = FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (reinterpret_surface != nullptr) { - auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); + if (reinterpret_surface) { + auto reinterpret_interval = reinterpret_surface->GetCopyableInterval(params); auto reinterpret_params = surface->FromInterval(reinterpret_interval); auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); auto dest_rect = surface->GetScaledSubRect(reinterpret_params); - reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, dest_rect); + reinterpreter->Reinterpret(*reinterpret_surface, src_rect, *surface, dest_rect); return true; } - }*/ + } return false; } diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index 7b91fbd58..c3b3b2709 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -5,27 +5,103 @@ #include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_format_reinterpreter.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" namespace OpenGL { -class RGBA4toRGB5A1 final : public FormatReinterpreterBase { -public: - RGBA4toRGB5A1() { - constexpr std::string_view vs_source = R"( +D24S8toRGBA8::D24S8toRGBA8(bool use_texture_view) : use_texture_view{use_texture_view} { + constexpr std::string_view cs_source = R"( +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(binding = 0) uniform sampler2D depth; +layout(binding = 1) uniform usampler2D stencil; +layout(rgba8, binding = 2) uniform writeonly image2D color; + +uniform mediump ivec2 src_offset; + +void main() { +ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); + +highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); +lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; +highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); +imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0)); +} + +)"; + program.Create(cs_source); + src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); +} + +void D24S8toRGBA8::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect, + const Surface& dest, VideoCore::Rect2D dst_rect) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.texture_units[0].texture_2d = source.texture.handle; + + // Use glTextureView on desktop to avoid intermediate copy + if (use_texture_view) { + temp_tex.Create(); + glActiveTexture(GL_TEXTURE1); + glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.texture.handle, GL_DEPTH24_STENCIL8, 0, 1, + 0, 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } else { + temp_tex.Release(); + temp_tex.Create(); + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE1); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + temp_rect = src_rect; + } + + state.texture_units[1].texture_2d = temp_tex.handle; + state.draw.shader_program = program.handle; + state.Apply(); + + glBindImageTexture(2, dest.texture.handle, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); + + glActiveTexture(GL_TEXTURE1); + if (!use_texture_view) { + glCopyImageSubData(source.texture.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, + temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, + src_rect.GetWidth(), src_rect.GetHeight(), 1); + } + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); + glDispatchCompute(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1); + + if (use_texture_view) { + temp_tex.Release(); + } + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); +} + +RGBA4toRGB5A1::RGBA4toRGB5A1() { + constexpr std::string_view vs_source = R"( out vec2 dst_coord; uniform mediump ivec2 dst_size; const vec2 vertices[4] = - vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); +vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); - dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size); +gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); +dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size); } )"; - constexpr std::string_view fs_source = R"( + constexpr std::string_view fs_source = R"( in mediump vec2 dst_coord; out lowp vec4 frag_color; @@ -36,225 +112,55 @@ uniform mediump ivec2 src_size; uniform mediump ivec2 src_offset; void main() { - mediump ivec2 tex_coord; - if (src_size == dst_size) { - tex_coord = ivec2(dst_coord); - } else { - highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); - mediump int y = tex_index / src_size.x; - tex_coord = ivec2(tex_index - y * src_size.x, y); - } - tex_coord -= src_offset; +mediump ivec2 tex_coord; +if (src_size == dst_size) { + tex_coord = ivec2(dst_coord); +} else { + highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); + mediump int y = tex_index / src_size.x; + tex_coord = ivec2(tex_index - y * src_size.x, y); +} +tex_coord -= src_offset; - lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0)); - lowp ivec3 rgb5 = - ((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F; - frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01); +lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0)); +lowp ivec3 rgb5 = + ((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F; +frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01); } )"; - - program.Create(vs_source.data(), fs_source.data()); - dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); - src_size_loc = glGetUniformLocation(program.handle, "src_size"); - src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); - vao.Create(); - } - - VideoCore::PixelFormat GetSourceFormat() const override { - return VideoCore::PixelFormat::RGBA4; - } - - void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, - const OGLTexture& dst_tex, Common::Rectangle dst_rect) override { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.texture_units[0].texture_2d = src_tex.handle; - state.draw.draw_framebuffer = draw_fbo.handle; - state.draw.shader_program = program.handle; - state.draw.vertex_array = vao.handle; - state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), - static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight())}; - state.Apply(); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_tex.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); - glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); - glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - } - -private: - OGLProgram program; - GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1}; - OGLVertexArray vao; -}; - -class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase { -public: - ShaderD24S8toRGBA8() { - constexpr std::string_view vs_source = R"( -out vec2 dst_coord; - -uniform mediump ivec2 dst_size; - -const vec2 vertices[4] = - vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); - -void main() { - gl_Position = vec4(vertices[gl_VertexID], 0.0, 1.0); - dst_coord = (vertices[gl_VertexID] / 2.0 + 0.5) * vec2(dst_size); -} -)"; - - constexpr std::string_view fs_source = R"( -in mediump vec2 dst_coord; - -out lowp vec4 frag_color; - -uniform highp sampler2D depth; -uniform lowp usampler2D stencil; -uniform mediump ivec2 dst_size; -uniform mediump ivec2 src_size; -uniform mediump ivec2 src_offset; - -void main() { - mediump ivec2 tex_coord; - if (src_size == dst_size) { - tex_coord = ivec2(dst_coord); - } else { - highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); - mediump int y = tex_index / src_size.x; - tex_coord = ivec2(tex_index - y * src_size.x, y); - } - tex_coord -= src_offset; - - highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); - lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; - highp uvec4 components = - uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); - frag_color = vec4(components) / (exp2(8.0) - 1.0); -} -)"; - - program.Create(vs_source.data(), fs_source.data()); - dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); - src_size_loc = glGetUniformLocation(program.handle, "src_size"); - src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); - vao.Create(); - - auto state = OpenGLState::GetCurState(); - auto cur_program = state.draw.shader_program; - state.draw.shader_program = program.handle; - state.Apply(); - glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1); - state.draw.shader_program = cur_program; - state.Apply(); - - // Nvidia seem to be the only one to support D24S8 views, at least on windows - // so for everyone else it will do an intermediate copy before running through the shader - std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - if (vendor.find("NVIDIA") != vendor.npos) { - use_texture_view = true; - } else { - LOG_INFO(Render_OpenGL, - "Texture views are unsupported, reinterpretation will do intermediate copy"); - temp_tex.Create(); - } - } - - VideoCore::PixelFormat GetSourceFormat() const override { - return VideoCore::PixelFormat::D24S8; - } - - void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, - const OGLTexture& dst_tex, Common::Rectangle dst_rect) override { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.texture_units[0].texture_2d = src_tex.handle; - - if (use_texture_view) { - temp_tex.Create(); - glActiveTexture(GL_TEXTURE1); - glTextureView(temp_tex.handle, GL_TEXTURE_2D, src_tex.handle, GL_DEPTH24_STENCIL8, 0, 1, - 0, 1); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - } else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) { - temp_tex.Release(); - temp_tex.Create(); - state.texture_units[1].texture_2d = temp_tex.handle; - state.Apply(); - glActiveTexture(GL_TEXTURE1); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - temp_rect = src_rect; - } - - state.texture_units[1].texture_2d = temp_tex.handle; - state.draw.draw_framebuffer = draw_fbo.handle; - state.draw.shader_program = program.handle; - state.draw.vertex_array = vao.handle; - state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), - static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight())}; - state.Apply(); - - glActiveTexture(GL_TEXTURE1); - if (!use_texture_view) { - glCopyImageSubData(src_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, - temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, - src_rect.GetWidth(), src_rect.GetHeight(), 1); - } - glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, - dst_tex.handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); - glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); - glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - if (use_texture_view) { - temp_tex.Release(); - } - } - -private: - bool use_texture_view{}; - OGLProgram program{}; - GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1}; - OGLVertexArray vao{}; - OGLTexture temp_tex{}; - Common::Rectangle temp_rect{0, 0, 0, 0}; -}; - -FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { - auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { - const u32 dst_index = static_cast(dest); - return reinterpreters[dst_index].push_back(std::move(obj)); - }; - - Register(VideoCore::PixelFormat::RGBA8, std::make_unique()); - Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); + read_fbo.Create(); + draw_fbo.Create(); + program.Create(vs_source.data(), fs_source.data()); + dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); + src_size_loc = glGetUniformLocation(program.handle, "src_size"); + src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); + vao.Create(); } -auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(VideoCore::PixelFormat dst_format) - -> const ReinterpreterList& { - return reinterpreters[static_cast(dst_format)]; +void RGBA4toRGB5A1::Reinterpret(const Surface& source, VideoCore::Rect2D src_rect, + const Surface& dest, VideoCore::Rect2D dst_rect) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state; + state.texture_units[0].texture_2d = source.texture.handle; + state.draw.draw_framebuffer = draw_fbo.handle; + state.draw.shader_program = program.handle; + state.draw.vertex_array = vao.handle; + state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), + static_cast(dst_rect.GetWidth()), + static_cast(dst_rect.GetHeight())}; + state.Apply(); + + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + dest.texture.handle, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); + glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); + glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.h b/src/video_core/renderer_opengl/gl_format_reinterpreter.h index 0e9cc61e7..3279f8de3 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.h +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.h @@ -4,44 +4,60 @@ #pragma once -#include -#include "common/math_util.h" -#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -class RasterizerCacheOpenGL; +class Surface; class FormatReinterpreterBase { public: - FormatReinterpreterBase() { - read_fbo.Create(); - draw_fbo.Create(); - } - virtual ~FormatReinterpreterBase() = default; virtual VideoCore::PixelFormat GetSourceFormat() const = 0; - virtual void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, - const OGLTexture& dst_tex, Common::Rectangle dst_rect) = 0; - -protected: - OGLFramebuffer read_fbo; - OGLFramebuffer draw_fbo; + virtual void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect, + const Surface& dest, VideoCore::Rect2D dst_rect) = 0; }; using ReinterpreterList = std::vector>; -class FormatReinterpreterOpenGL : NonCopyable { +class D24S8toRGBA8 final : public FormatReinterpreterBase { public: - FormatReinterpreterOpenGL(); - ~FormatReinterpreterOpenGL() = default; + D24S8toRGBA8(bool use_texture_view); - const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dst_format); + [[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::D24S8; + } + + void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect, + const Surface& dest, VideoCore::Rect2D dst_rect) override; private: - std::array reinterpreters; + bool use_texture_view{}; + OGLProgram program{}; + GLint src_offset_loc{-1}; + OGLTexture temp_tex{}; + VideoCore::Rect2D temp_rect{0, 0, 0, 0}; +}; + +class RGBA4toRGB5A1 final : public FormatReinterpreterBase { +public: + RGBA4toRGB5A1(); + + [[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::RGBA4; + } + + void Reinterpret(const Surface& source, VideoCore::Rect2D src_rect, + const Surface& dest, VideoCore::Rect2D dst_rect) override; + +private: + OGLFramebuffer read_fbo; + OGLFramebuffer draw_fbo; + OGLProgram program; + GLint dst_size_loc{-1}, src_size_loc{-1}, src_offset_loc{-1}; + OGLVertexArray vao; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index 881343596..f517e8992 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -153,6 +153,14 @@ void OGLProgram::Create(const char* vert_shader, const char* frag_shader) { Create(false, {vert.handle, frag.handle}); } +void OGLProgram::Create(const std::string_view compute_shader) { + OGLShader comp; + comp.Create(compute_shader.data(), GL_COMPUTE_SHADER); + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + Create(false, {comp.handle}); +} + void OGLProgram::Release() { if (handle == 0) return; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index d6b24262c..91edc2e20 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include "common/common_types.h" @@ -137,6 +138,9 @@ public: /// Creates a new program from given shader soruce code void Create(const char* vert_shader, const char* frag_shader); + /// Creates a new compute shader program + void Create(const std::string_view compute_shader); + /// Deletes the internal OpenGL resource void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 32b40c712..6b701db6d 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -14,19 +14,18 @@ namespace OpenGL { GLuint LoadShader(const char* source, GLenum type) { - const std::string version = GLES ? R"(#version 320 es - + const std::string version = GLES ? R"( +#version 320 es #define CITRA_GLES #if defined(GL_ANDROID_extension_pack_es31a) #extension GL_ANDROID_extension_pack_es31a : enable -#endif // defined(GL_ANDROID_extension_pack_es31a) +#endif #if defined(GL_EXT_clip_cull_distance) #extension GL_EXT_clip_cull_distance : enable -#endif // defined(GL_EXT_clip_cull_distance) -)" - : "#version 430 core\n"; +#endif +)" : "#version 430 core\n"; const char* debug_type; switch (type) { @@ -39,6 +38,9 @@ GLuint LoadShader(const char* source, GLenum type) { case GL_FRAGMENT_SHADER: debug_type = "fragment"; break; + case GL_COMPUTE_SHADER: + debug_type = "compute"; + break; default: UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index c714c3828..8b70d5361 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -6,6 +6,7 @@ #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_opengl/gl_texture_runtime.h" #include "video_core/renderer_opengl/gl_driver.h" +#include "video_core/renderer_opengl/gl_format_reinterpreter.h" #include "video_core/renderer_opengl/gl_state.h" namespace OpenGL { @@ -54,10 +55,18 @@ GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { TextureRuntime::TextureRuntime(Driver& driver) : driver{driver}, downloader_es{false}, - filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()} { + filterer{Settings::values.texture_filter_name, VideoCore::GetResolutionScaleFactor()}{ read_fbo.Create(); draw_fbo.Create(); + + auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { + const u32 dst_index = static_cast(dest); + return reinterpreters[dst_index].push_back(std::move(obj)); + }; + + Register(VideoCore::PixelFormat::RGBA8, std::make_unique(!driver.IsOpenGLES())); + Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); } const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { @@ -140,7 +149,6 @@ void TextureRuntime::FormatConvert(const Surface& surface, bool upload, OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type) { - const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; const GLenum target = type == VideoCore::TextureType::CubeMap ? GL_TEXTURE_CUBE_MAP : GL_TEXTURE_2D; @@ -302,6 +310,10 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { glGenerateMipmap(GL_TEXTURE_2D); } +const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const { + return reinterpreters[static_cast(dest_format)]; +} + void TextureRuntime::BindFramebuffer(GLenum target, GLint level, GLenum textarget, VideoCore::SurfaceType type, OGLTexture& texture) const { const GLint framebuffer = target == GL_DRAW_FRAMEBUFFER ? draw_fbo.handle : read_fbo.handle; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index 332c0778f..1a9fb6a36 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -7,7 +7,7 @@ #include #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/surface_base.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" +#include "video_core/renderer_opengl/gl_format_reinterpreter.h" #include "video_core/renderer_opengl/texture_filters/texture_filterer.h" #include "video_core/renderer_opengl/texture_downloader_es.h" @@ -92,6 +92,10 @@ public: /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(Surface& surface, u32 max_level); + /// Returns all source formats that support reinterpretation to the dest format + [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( + VideoCore::PixelFormat dest_format) const; + private: /// Returns the framebuffer used for texture downloads void BindFramebuffer(GLenum target, GLint level, GLenum textarget, @@ -116,6 +120,7 @@ private: Driver& driver; TextureDownloaderES downloader_es; TextureFilterer filterer; + std::array reinterpreters; // Staging buffers stored in increasing size std::multiset upload_buffers; diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp new file mode 100644 index 000000000..15243dbb6 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.cpp @@ -0,0 +1,191 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#define VULKAN_HPP_NO_CONSTRUCTORS +#include "video_core/renderer_vulkan/vk_format_reinterpreter.h" +#include "video_core/renderer_vulkan/vk_texture_runtime.h" +#include "video_core/renderer_vulkan/vk_shader.h" + +namespace Vulkan { + +D24S8toRGBA8::D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime) + : FormatReinterpreterBase{instance, scheduler, runtime}, device{instance.GetDevice()} { + constexpr std::string_view cs_source = R"( +#version 450 core +#extension GL_EXT_samplerless_texture_functions : require +layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; +layout(set = 0, binding = 0) uniform texture2D depth; +layout(set = 0, binding = 1) uniform utexture2D stencil; +layout(set = 0, binding = 2, rgba8) uniform writeonly image2D color; + +layout(push_constant, std140) uniform ComputeInfo { + mediump ivec2 src_offset; +}; + +void main() { + ivec2 tex_coord = src_offset + ivec2(gl_GlobalInvocationID.xy); + + highp uint depth_val = + uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + highp uvec4 components = + uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); + imageStore(color, tex_coord, vec4(components) / (exp2(8.0) - 1.0)); +} + +)"; + compute_shader = Compile(cs_source, vk::ShaderStageFlagBits::eCompute, + device, ShaderOptimization::High); + + const std::array compute_layout_bindings = { + vk::DescriptorSetLayoutBinding{ + .binding = 0, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute + }, + vk::DescriptorSetLayoutBinding{ + .binding = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute + }, + vk::DescriptorSetLayoutBinding{ + .binding = 2, + .descriptorType = vk::DescriptorType::eStorageImage, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute + } + }; + + const vk::DescriptorSetLayoutCreateInfo compute_layout_info = { + .bindingCount = static_cast(compute_layout_bindings.size()), + .pBindings = compute_layout_bindings.data() + }; + + descriptor_layout = device.createDescriptorSetLayout(compute_layout_info); + + const std::array update_template_entries = { + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = 0, + .stride = sizeof(vk::DescriptorImageInfo) + }, + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampledImage, + .offset = sizeof(vk::DescriptorImageInfo), + .stride = 0 + }, + vk::DescriptorUpdateTemplateEntry{ + .dstBinding = 2, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageImage, + .offset = 2 * sizeof(vk::DescriptorImageInfo), + .stride = 0 + } + }; + + const vk::DescriptorUpdateTemplateCreateInfo template_info = { + .descriptorUpdateEntryCount = static_cast(update_template_entries.size()), + .pDescriptorUpdateEntries = update_template_entries.data(), + .templateType = vk::DescriptorUpdateTemplateType::eDescriptorSet, + .descriptorSetLayout = descriptor_layout + }; + + update_template = device.createDescriptorUpdateTemplate(template_info); + + const vk::PushConstantRange push_range = { + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .offset = 0, + .size = sizeof(Common::Vec2i), + }; + + const vk::PipelineLayoutCreateInfo layout_info = { + .setLayoutCount = 1, + .pSetLayouts = &descriptor_layout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &push_range + }; + + compute_pipeline_layout = device.createPipelineLayout(layout_info); + + const vk::DescriptorSetAllocateInfo alloc_info = { + .descriptorPool = scheduler.GetPersistentDescriptorPool(), + .descriptorSetCount = 1, + .pSetLayouts = &descriptor_layout + }; + + descriptor_set = device.allocateDescriptorSets(alloc_info)[0]; + + const vk::PipelineShaderStageCreateInfo compute_stage = { + .stage = vk::ShaderStageFlagBits::eCompute, + .module = compute_shader, + .pName = "main" + }; + + const vk::ComputePipelineCreateInfo compute_info = { + .stage = compute_stage, + .layout = compute_pipeline_layout + }; + + if (const auto result = device.createComputePipeline({}, compute_info); + result.result == vk::Result::eSuccess) { + compute_pipeline = result.value; + } else { + LOG_CRITICAL(Render_Vulkan, "D24S8 compute pipeline creation failed!"); + UNREACHABLE(); + } +} + +D24S8toRGBA8::~D24S8toRGBA8() { + device.destroyPipeline(compute_pipeline); + device.destroyPipelineLayout(compute_pipeline_layout); + device.destroyDescriptorUpdateTemplate(update_template); + device.destroyDescriptorSetLayout(descriptor_layout); + device.destroyShaderModule(compute_shader); +} + +void D24S8toRGBA8::Reinterpret(Surface& source, VideoCore::Rect2D src_rect, + Surface& dest, VideoCore::Rect2D dst_rect) { + vk::CommandBuffer command_buffer = scheduler.GetRenderCommandBuffer(); + runtime.Transition(command_buffer, source.alloc, vk::ImageLayout::eDepthStencilReadOnlyOptimal, + 0, source.alloc.levels); + runtime.Transition(command_buffer, dest.alloc, vk::ImageLayout::eGeneral, 0, dest.alloc.levels); + + const std::array textures = { + vk::DescriptorImageInfo{ + .imageView = source.GetDepthView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal + }, + vk::DescriptorImageInfo{ + .imageView = source.GetStencilView(), + .imageLayout = vk::ImageLayout::eDepthStencilReadOnlyOptimal + }, + vk::DescriptorImageInfo{ + .imageView = dest.GetImageView(), + .imageLayout = vk::ImageLayout::eGeneral + } + }; + + device.updateDescriptorSetWithTemplate(descriptor_set, update_template, textures[0]); + command_buffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, compute_pipeline_layout, + 0, 1, &descriptor_set, 0, nullptr); + + command_buffer.bindPipeline(vk::PipelineBindPoint::eCompute, compute_pipeline); + + const auto src_offset = Common::MakeVec(src_rect.left, src_rect.bottom); + command_buffer.pushConstants(compute_pipeline_layout, vk::ShaderStageFlagBits::eCompute, + 0, sizeof(Common::Vec2i), src_offset.AsArray()); + + command_buffer.dispatch(src_rect.GetWidth() / 32, src_rect.GetHeight() / 32, 1); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_format_reinterpreter.h b/src/video_core/renderer_vulkan/vk_format_reinterpreter.h new file mode 100644 index 000000000..47e115ed4 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_format_reinterpreter.h @@ -0,0 +1,58 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_vulkan/vk_common.h" + +namespace Vulkan { + +class Surface; +class Instance; +class TaskScheduler; +class TextureRuntime; + +class FormatReinterpreterBase { +public: + FormatReinterpreterBase(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime) + : instance{instance}, scheduler{scheduler}, runtime{runtime} {} + virtual ~FormatReinterpreterBase() = default; + + virtual VideoCore::PixelFormat GetSourceFormat() const = 0; + virtual void Reinterpret(Surface& source, VideoCore::Rect2D src_rect, + Surface& dest, VideoCore::Rect2D dst_rect) = 0; + +protected: + const Instance& instance; + TaskScheduler& scheduler; + TextureRuntime& runtime; +}; + +using ReinterpreterList = std::vector>; + +class D24S8toRGBA8 final : public FormatReinterpreterBase { +public: + D24S8toRGBA8(const Instance& instance, TaskScheduler& scheduler, TextureRuntime& runtime); + ~D24S8toRGBA8(); + + [[nodiscard]] VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::D24S8; + } + + void Reinterpret(Surface& source, VideoCore::Rect2D src_rect, + Surface& dest, VideoCore::Rect2D dst_rect) override; + +private: + vk::Device device; + vk::Pipeline compute_pipeline; + vk::PipelineLayout compute_pipeline_layout; + vk::DescriptorSetLayout descriptor_layout; + vk::DescriptorSet descriptor_set; + vk::DescriptorUpdateTemplate update_template; + vk::ShaderModule compute_shader; + VideoCore::Rect2D temp_rect{0, 0, 0, 0}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp index dc596991c..346e6f84a 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.cpp @@ -49,6 +49,8 @@ TaskScheduler::TaskScheduler(const Instance& instance, RendererVulkan& renderer) .pPoolSizes = pool_sizes.data() }; + persistent_descriptor_pool = device.createDescriptorPool(descriptor_pool_info); + const vk::CommandBufferAllocateInfo buffer_info = { .commandPool = command_pool, .level = vk::CommandBufferLevel::ePrimary, @@ -93,6 +95,7 @@ TaskScheduler::~TaskScheduler() { } device.destroyCommandPool(command_pool); + device.destroyDescriptorPool(persistent_descriptor_pool); } void TaskScheduler::Synchronize(u32 slot) { diff --git a/src/video_core/renderer_vulkan/vk_task_scheduler.h b/src/video_core/renderer_vulkan/vk_task_scheduler.h index da3ab5860..1cf0923e7 100644 --- a/src/video_core/renderer_vulkan/vk_task_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_task_scheduler.h @@ -52,6 +52,11 @@ public: return commands[current_command].descriptor_pool; } + /// Returns the persistent descriptor pool + vk::DescriptorPool GetPersistentDescriptorPool() const { + return persistent_descriptor_pool; + } + /// Returns the index of the current command slot u32 GetCurrentSlotIndex() const { return current_command; @@ -92,6 +97,7 @@ private: vk::CommandPool command_pool{}; vk::Semaphore timeline{}; + vk::DescriptorPool persistent_descriptor_pool; std::array commands{}; u32 current_command = 0; }; diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index c1a857201..a28ba8c73 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -40,6 +40,13 @@ TextureRuntime::TextureRuntime(const Instance& instance, TaskScheduler& schedule vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst); } + + auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { + const u32 dst_index = static_cast(dest); + return reinterpreters[dst_index].push_back(std::move(obj)); + }; + + Register(VideoCore::PixelFormat::RGBA8, std::make_unique(instance, scheduler, *this)); } TextureRuntime::~TextureRuntime() { @@ -51,6 +58,10 @@ TextureRuntime::~TextureRuntime() { vmaDestroyImage(allocator, alloc.image, alloc.allocation); device.destroyImageView(alloc.image_view); device.destroyImageView(alloc.base_view); + if (alloc.depth_view) { + device.destroyImageView(alloc.depth_view); + device.destroyImageView(alloc.stencil_view); + } } for (const auto& [key, framebuffer] : clear_framebuffers) { @@ -175,10 +186,36 @@ ImageAlloc TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelForma vk::ImageView image_view = device.createImageView(view_info); vk::ImageView base_view = device.createImageView(base_view_info); + // Create seperate depth/stencil views in case this gets reinterpreted with a compute shader + vk::ImageView depth_view; + vk::ImageView stencil_view; + if (format == VideoCore::PixelFormat::D24S8) { + vk::ImageViewCreateInfo view_info = { + .image = image, + .viewType = type == VideoCore::TextureType::CubeMap ? + vk::ImageViewType::eCube : + vk::ImageViewType::e2D, + .format = vk_format, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eDepth, + .baseMipLevel = 0, + .levelCount = levels, + .baseArrayLayer = 0, + .layerCount = layers + } + }; + + depth_view = device.createImageView(view_info); + view_info.subresourceRange.aspectMask = vk::ImageAspectFlagBits::eStencil; + stencil_view = device.createImageView(view_info); + } + return ImageAlloc{ .image = image, .image_view = image_view, .base_view = base_view, + .depth_view = depth_view, + .stencil_view = stencil_view, .allocation = allocation, .format = vk_format, .aspect = aspect, @@ -440,6 +477,10 @@ void TextureRuntime::GenerateMipmaps(Surface& surface, u32 max_level) { } } +const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const { + return reinterpreters[static_cast(dest_format)]; +} + void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& alloc, vk::ImageLayout new_layout, u32 level, u32 level_count, u32 layer, u32 layer_count) { @@ -501,7 +542,13 @@ void TextureRuntime::Transition(vk::CommandBuffer command_buffer, ImageAlloc& al case vk::ImageLayout::eGeneral: info.access = vk::AccessFlagBits::eInputAttachmentRead; info.stage = vk::PipelineStageFlagBits::eColorAttachmentOutput | - vk::PipelineStageFlagBits::eFragmentShader; + vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader; + break; + case vk::ImageLayout::eDepthStencilReadOnlyOptimal: + // Image is going to be sampled from a compute shader + info.access = vk::AccessFlagBits::eShaderRead; + info.stage = vk::PipelineStageFlagBits::eComputeShader; break; default: LOG_CRITICAL(Render_Vulkan, "Unhandled vulkan image layout {}\n", layout); diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index 13bf49716..6a2df4f99 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -9,6 +9,7 @@ #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/surface_base.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" +#include "video_core/renderer_vulkan/vk_format_reinterpreter.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_task_scheduler.h" @@ -25,6 +26,8 @@ struct ImageAlloc { vk::Image image; vk::ImageView image_view; vk::ImageView base_view; + vk::ImageView depth_view; + vk::ImageView stencil_view; VmaAllocation allocation; vk::ImageUsageFlags usage; vk::Format format; @@ -52,13 +55,13 @@ public: /// Maps an internal staging buffer of the provided size of pixel uploads/downloads [[nodiscard]] StagingData FindStaging(u32 size, bool upload); - /// Causes a GPU command flush - void Finish(); - /// Allocates a vulkan image possibly resusing an existing one [[nodiscard]] ImageAlloc Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type); + /// Causes a GPU command flush + void Finish(); + /// Takes back ownership of the allocation for recycling void Recycle(const VideoCore::HostTextureTag tag, ImageAlloc&& alloc); @@ -84,6 +87,10 @@ public: /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(Surface& surface, u32 max_level); + /// Returns all source formats that support reinterpretation to the dest format + [[nodiscard]] const ReinterpreterList& GetPossibleReinterpretations( + VideoCore::PixelFormat dest_format) const; + /// Performs operations that need to be done on every scheduler slot switch void OnSlotSwitch(u32 new_slot); @@ -102,10 +109,12 @@ private: const Instance& instance; TaskScheduler& scheduler; RenderpassCache& renderpass_cache; + std::array reinterpreters; std::array, SCHEDULER_COMMAND_COUNT> staging_buffers; std::array staging_offsets{}; std::unordered_multimap texture_recycler; std::unordered_map clear_framebuffers; + ReinterpreterList list; }; class Surface : public VideoCore::SurfaceBase { @@ -137,6 +146,16 @@ public: return alloc.base_view; } + /// Returns the depth only image view of the surface, null otherwise + vk::ImageView GetDepthView() const { + return alloc.depth_view; + } + + /// Returns the stencil only image view of the surface, null otherwise + vk::ImageView GetStencilView() const { + return alloc.stencil_view; + } + /// Returns the internal format of the allocated texture vk::Format GetInternalFormat() const { return alloc.format; @@ -156,6 +175,8 @@ private: TextureRuntime& runtime; const Instance& instance; TaskScheduler& scheduler; + +public: ImageAlloc alloc{}; FormatTraits traits; };