rasterizer_cache: Rework reinterpretation lookup

This commit is contained in:
GPUCode
2023-04-28 15:31:44 +03:00
parent 41b777a693
commit 1b6be6f72a
17 changed files with 232 additions and 406 deletions

View File

@@ -56,8 +56,6 @@ add_library(video_core STATIC
renderer_opengl/gl_blit_helper.h
renderer_opengl/gl_driver.cpp
renderer_opengl/gl_driver.h
renderer_opengl/gl_format_reinterpreter.cpp
renderer_opengl/gl_format_reinterpreter.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp

View File

@@ -4,29 +4,18 @@
//? #version 430 core
layout(location = 0) in mediump vec2 dst_coord;
layout(location = 0) in mediump vec2 tex_coord;
layout(location = 0) out lowp vec4 frag_color;
layout(binding = 0) uniform highp sampler2D depth;
layout(binding = 1) uniform lowp usampler2D stencil;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
mediump vec2 coord = tex_coord * vec2(textureSize(depth, 0));
mediump ivec2 tex_icoord = ivec2(coord);
highp uint depth_val =
uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x;
uint(texelFetch(depth, tex_icoord, 0).x * (exp2(32.0) - 1.0));
lowp uint stencil_val = texelFetch(stencil, tex_icoord, 0).x;
highp uvec4 components =
uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu);
frag_color = vec4(components) / (exp2(8.0) - 1.0);

View File

@@ -6,7 +6,7 @@
layout(location = 0) out vec2 dst_coord;
uniform mediump ivec2 dst_size;
layout(location = 0) uniform mediump ivec2 dst_size;
const vec2 vertices[4] =
vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0));

View File

@@ -4,26 +4,15 @@
//? #version 430 core
layout(location = 0) in mediump vec2 dst_coord;
layout(location = 0) in mediump vec2 tex_coord;
layout(location = 0) out lowp vec4 frag_color;
layout(binding = 0) uniform lowp sampler2D source;
uniform mediump ivec2 dst_size;
uniform mediump ivec2 src_size;
uniform mediump ivec2 src_offset;
void main() {
mediump ivec2 tex_coord;
if (src_size == dst_size) {
tex_coord = ivec2(dst_coord);
} else {
highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x);
mediump int y = tex_index / src_size.x;
tex_coord = ivec2(tex_index - y * src_size.x, y);
}
tex_coord -= src_offset;
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0));
mediump vec2 coord = tex_coord * vec2(textureSize(source, 0));
mediump ivec2 tex_icoord = ivec2(coord);
lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_icoord, 0) * (exp2(4.0) - 1.0));
lowp ivec3 rgb5 =
((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F;
frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01);

View File

@@ -10,12 +10,11 @@ MICROPROFILE_DEFINE(RasterizerCache_CopySurface, "RasterizerCache", "CopySurface
MP_RGB(128, 192, 64));
MICROPROFILE_DEFINE(RasterizerCache_UploadSurface, "RasterizerCache", "UploadSurface",
MP_RGB(128, 192, 64));
MICROPROFILE_DEFINE(RasterizerCache_ComputeHash, "RasterizerCache", "ComputeHash",
MICROPROFILE_DEFINE(RasterizerCache_ValidateSurface, "RasterizerCache", "ValidateSurface",
MP_RGB(32, 64, 192));
MICROPROFILE_DEFINE(RasterizerCache_DownloadSurface, "RasterizerCache", "DownloadSurface",
MP_RGB(128, 192, 64));
MICROPROFILE_DEFINE(RasterizerCache_Invalidation, "RasterizerCache", "Invalidation",
MP_RGB(128, 64, 192));
MICROPROFILE_DEFINE(RasterizerCache_Flush, "RasterizerCache", "Flush", MP_RGB(128, 64, 192));
} // namespace VideoCore

View File

@@ -19,10 +19,9 @@ namespace VideoCore {
MICROPROFILE_DECLARE(RasterizerCache_CopySurface);
MICROPROFILE_DECLARE(RasterizerCache_UploadSurface);
MICROPROFILE_DECLARE(RasterizerCache_ComputeHash);
MICROPROFILE_DECLARE(RasterizerCache_ValidateSurface);
MICROPROFILE_DECLARE(RasterizerCache_DownloadSurface);
MICROPROFILE_DECLARE(RasterizerCache_Invalidation);
MICROPROFILE_DECLARE(RasterizerCache_Flush);
constexpr auto RangeFromInterval(const auto& map, const auto& interval) {
return boost::make_iterator_range(map.equal_range(interval));
@@ -848,12 +847,19 @@ SurfaceId RasterizerCache<T>::FindMatch(const SurfaceParams& params, ScaleMatch
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Copy>{}, [&] {
ASSERT(validate_interval);
auto copy_interval =
const SurfaceInterval copy_interval =
surface.GetCopyableInterval(params.FromInterval(*validate_interval));
bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
surface.CanCopy(params, copy_interval);
const bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 &&
surface.CanCopy(params, copy_interval);
return std::make_pair(matched, copy_interval);
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Reinterpret>{}, [&] {
ASSERT(validate_interval);
const bool matched =
!boost::icl::contains(surface.invalid_regions, *validate_interval) &&
surface.CanReinterpret(params);
return std::make_pair(matched, surface.GetInterval());
});
IsMatch_Helper(std::integral_constant<MatchFlags, MatchFlags::Expand>{}, [&] {
return std::make_pair(surface.CanExpand(params), surface.GetInterval());
});
@@ -903,6 +909,8 @@ void RasterizerCache<T>::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s
return;
}
MICROPROFILE_SCOPE(RasterizerCache_ValidateSurface);
Surface& surface = slot_surfaces[surface_id];
const SurfaceInterval validate_interval(addr, addr + size);
@@ -949,22 +957,7 @@ void RasterizerCache<T>::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s
notify_validated(interval);
continue;
}
// Could not find a matching reinterpreter, check if we need to implement a
// reinterpreter
if (NoUnimplementedReinterpretations(surface, params, interval) &&
!IntervalHasInvalidPixelFormat(params, interval)) {
// No surfaces were found in the cache that had a matching bit-width.
// If the region was created entirely on the GPU,
// assume it was a developer mistake and skip flushing.
if (boost::icl::contains(dirty_regions, interval)) {
LOG_DEBUG(HW_GPU, "Region created fully on GPU and reinterpretation is "
"invalid. Skipping validation");
validate_regions.erase(interval);
continue;
}
}
// Load data from 3DS memory
FlushRegion(params.addr, params.size);
if (!use_custom_textures || !UploadCustomSurface(surface_id, interval)) {
UploadSurface(surface, interval);
@@ -1134,71 +1127,41 @@ void RasterizerCache<T>::DownloadFillSurface(Surface& surface, SurfaceInterval i
}
}
template <class T>
bool RasterizerCache<T>::NoUnimplementedReinterpretations(const Surface& surface,
SurfaceParams params,
const SurfaceInterval& interval) {
static constexpr std::array<PixelFormat, 17> all_formats{
PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565,
PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8,
PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4,
PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24,
PixelFormat::D24S8,
};
bool implemented = true;
for (PixelFormat format : all_formats) {
if (GetFormatBpp(format) == surface.GetFormatBpp()) {
params.pixel_format = format;
// This could potentially be expensive,
// although experimentally it hasn't been too bad
SurfaceId test_surface_id =
FindMatch<MatchFlags::Copy>(params, ScaleMatch::Ignore, interval);
if (test_surface_id) {
LOG_WARNING(HW_GPU, "Missing pixel_format reinterpreter: {} -> {}",
PixelFormatAsString(format), PixelFormatAsString(surface.pixel_format));
implemented = false;
}
}
}
return implemented;
}
template <class T>
bool RasterizerCache<T>::IntervalHasInvalidPixelFormat(const SurfaceParams& params,
const SurfaceInterval& interval) {
bool invalid_format_found = false;
ForEachSurfaceInRegion(params.addr, params.end, [&](SurfaceId surface_id, Surface& surface) {
if (surface.pixel_format == PixelFormat::Invalid) {
LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface.addr);
invalid_format_found = true;
return true;
}
return false;
});
return invalid_format_found;
}
template <class T>
bool RasterizerCache<T>::ValidateByReinterpretation(Surface& surface, SurfaceParams params,
const SurfaceInterval& interval) {
const PixelFormat dest_format = surface.pixel_format;
for (const auto& reinterpreter : runtime.GetPossibleReinterpretations(dest_format)) {
params.pixel_format = reinterpreter->GetSourceFormat();
SurfaceId reinterpret_surface_id =
FindMatch<MatchFlags::Copy>(params, ScaleMatch::Ignore, interval);
if (reinterpret_surface_id) {
Surface& reinterpret_surface = slot_surfaces[reinterpret_surface_id];
auto reinterpret_interval = reinterpret_surface.GetCopyableInterval(params);
auto reinterpret_params = surface.FromInterval(reinterpret_interval);
auto src_rect = reinterpret_surface.GetScaledSubRect(reinterpret_params);
auto dest_rect = surface.GetScaledSubRect(reinterpret_params);
reinterpreter->Reinterpret(reinterpret_surface, src_rect, surface, dest_rect);
return true;
const PixelFormat dst_format = surface.pixel_format;
SurfaceId reinterpret_id =
FindMatch<MatchFlags::Reinterpret>(params, ScaleMatch::Ignore, interval);
if (reinterpret_id) {
Surface& src_surface = slot_surfaces[reinterpret_id];
if (src_surface.stride == surface.stride) {
const SurfaceInterval copy_interval = src_surface.GetCopyableInterval(params);
if (boost::icl::is_empty(copy_interval)) {
return false;
}
const PAddr addr = interval.lower();
const PixelFormat src_format = src_surface.pixel_format;
const bool is_gpu_modified = boost::icl::contains(dirty_regions, copy_interval);
if (GetFormatBpp(src_format) != GetFormatBpp(dst_format) && is_gpu_modified) {
LOG_DEBUG(HW_GPU, "Region created fully on GPU and reinterpretation is "
"invalid. Skipping validation");
return true;
}
const SurfaceParams copy_params = surface.FromInterval(copy_interval);
const TextureBlit reinterpret = {
.src_level = src_surface.LevelOf(addr),
.dst_level = surface.LevelOf(addr),
.src_rect = src_surface.GetScaledSubRect(copy_params),
.dst_rect = surface.GetScaledSubRect(copy_params),
};
return runtime.Reinterpret(src_surface, surface, reinterpret);
}
LOG_INFO(HW_GPU, "Unimplemented dimentional reinterpretatation {}x{} -> {}x{}",
src_surface.width, src_surface.height, surface.width, surface.height);
const bool is_gpu_modified = boost::icl::contains(dirty_regions, interval);
return is_gpu_modified;
}
return false;
}

View File

@@ -34,11 +34,12 @@ enum class ScaleMatch {
};
enum class MatchFlags {
Exact = 1 << 0, ///< Surface perfectly matches params
SubRect = 1 << 1, ///< Surface encompasses params
Copy = 1 << 2, ///< Surface that can be used as a copy source
Expand = 1 << 3, ///< Surface that can expand params
TexCopy = 1 << 4 ///< Surface that will match a display transfer "texture copy" parameters
Exact = 1 << 0, ///< Surface perfectly matches params
SubRect = 1 << 1, ///< Surface encompasses params
Copy = 1 << 2, ///< Surface that can be used as a copy source
Expand = 1 << 3, ///< Surface that can expand params
TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters
Reinterpret = 1 << 5, ///< Surface might have different pixel format.
};
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
@@ -182,14 +183,6 @@ private:
/// Downloads a fill surface to guest VRAM
void DownloadFillSurface(Surface& surface, SurfaceInterval interval);
/// Returns false if there is a surface in the cache at the interval with the same bit-width,
bool NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams params,
const SurfaceInterval& interval);
/// Return true if a surface with an invalid pixel format exists at the interval
bool IntervalHasInvalidPixelFormat(const SurfaceParams& params,
const SurfaceInterval& interval);
/// Attempt to find a reinterpretable surface in the cache and use it to copy for validation
bool ValidateByReinterpretation(Surface& surface, SurfaceParams params,
const SurfaceInterval& interval);

View File

@@ -45,13 +45,16 @@ bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fil
}
bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const {
SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
const SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval);
ASSERT(subrect_params.GetInterval() == copy_interval);
if (CanSubRect(subrect_params))
return true;
if (CanFill(dest_surface, copy_interval))
if (CanSubRect(subrect_params)) {
return true;
}
if (CanFill(dest_surface, copy_interval)) {
return true;
}
return false;
}
@@ -102,6 +105,23 @@ SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) co
return result;
}
Extent SurfaceBase::RealExtent(bool scaled) {
const bool is_custom = IsCustom();
u32 real_width = width;
u32 real_height = height;
if (is_custom) {
real_width = material->width;
real_height = material->height;
} else if (scaled) {
real_width = GetScaledWidth();
real_height = GetScaledHeight();
}
return Extent{
.width = real_width,
.height = real_height,
};
}
bool SurfaceBase::HasNormalMap() const noexcept {
return material && material->Map(MapType::Normal) != nullptr;
}

View File

@@ -38,6 +38,9 @@ public:
/// Returns the clear value used to validate another surface from this fill surface
ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format);
/// Returns the internal surface extent.
Extent RealExtent(bool scaled = true);
/// Returns true if the surface contains a custom material with a normal map.
bool HasNormalMap() const noexcept;
@@ -55,7 +58,7 @@ public:
}
bool IsRegionValid(SurfaceInterval interval) const {
return (invalid_regions.find(interval) == invalid_regions.end());
return invalid_regions.find(interval) == invalid_regions.end();
}
void MarkValid(SurfaceInterval interval) {

View File

@@ -11,7 +11,7 @@ bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
return std::tie(other_surface.addr, other_surface.width, other_surface.height,
other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
pixel_format != PixelFormat::Invalid /*&& levels >= other_surface.levels*/;
pixel_format != PixelFormat::Invalid && levels >= other_surface.levels;
}
bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
@@ -23,6 +23,12 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
GetSubRect(sub_surface).right <= stride;
}
bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) {
return other_surface.addr >= addr && other_surface.end <= end &&
pixel_format != PixelFormat::Invalid && other_surface.is_tiled == is_tiled &&
(other_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0;
}
bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
addr <= expanded_surface.end && expanded_surface.addr <= end &&

View File

@@ -19,6 +19,9 @@ public:
/// Returns true if sub_surface is a subrect of params
bool CanSubRect(const SurfaceParams& sub_surface) const;
/// Returns true if other_surface can be used for reinterpretion.
bool CanReinterpret(const SurfaceParams& other_surface);
/// Returns true if params can be expanded to match expanded_surface
bool CanExpand(const SurfaceParams& expanded_surface) const;

View File

@@ -2,12 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_opengl/gl_blit_helper.h"
#include "video_core/renderer_opengl/gl_driver.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h"
#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h"
#include "video_core/host_shaders/full_screen_triangle_vert.h"
#include "video_core/host_shaders/texture_filtering/bicubic_frag.h"
#include "video_core/host_shaders/texture_filtering/nearest_neighbor_frag.h"
@@ -49,8 +53,8 @@ OGLProgram CreateProgram(std::string_view frag) {
} // Anonymous namespace
BlitHelper::BlitHelper(TextureRuntime& runtime_)
: runtime{runtime_}, linear_sampler{CreateSampler(GL_LINEAR)},
BlitHelper::BlitHelper(const Driver& driver_)
: driver{driver_}, linear_sampler{CreateSampler(GL_LINEAR)},
nearest_sampler{CreateSampler(GL_NEAREST)}, bicubic_program{CreateProgram(
HostShaders::BICUBIC_FRAG)},
nearest_program{CreateProgram(HostShaders::NEAREST_NEIGHBOR_FRAG)},
@@ -58,17 +62,86 @@ BlitHelper::BlitHelper(TextureRuntime& runtime_)
xbrz_program{CreateProgram(HostShaders::XBRZ_FREESCALE_FRAG)},
gradient_x_program{CreateProgram(HostShaders::X_GRADIENT_FRAG)},
gradient_y_program{CreateProgram(HostShaders::Y_GRADIENT_FRAG)},
refine_program{CreateProgram(HostShaders::REFINE_FRAG)} {
refine_program{CreateProgram(HostShaders::REFINE_FRAG)},
d24s8_to_rgba8{CreateProgram(HostShaders::D24S8_TO_RGBA8_FRAG)},
rgba4_to_rgb5a1{CreateProgram(HostShaders::RGBA4_TO_RGB5A1_FRAG)} {
vao.Create();
filter_fbo.Create();
draw_fbo.Create();
state.draw.vertex_array = vao.handle;
for (u32 i = 0; i < 3; i++) {
state.texture_units[i].sampler = i == 2 ? nearest_sampler.handle : linear_sampler.handle;
}
if (driver.IsOpenGLES()) {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
use_texture_view = false;
}
}
BlitHelper::~BlitHelper() = default;
bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = source.Handle();
if (use_texture_view) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0,
1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (blit.src_rect.top > temp_rect.top || blit.src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, blit.src_rect.right,
blit.src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = blit.src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!use_texture_view) {
glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, blit.src_rect.left,
blit.src_rect.bottom, 0, temp_tex.handle, GL_TEXTURE_2D, 0,
blit.src_rect.left, blit.src_rect.bottom, 0, blit.src_rect.GetWidth(),
blit.src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
SetParams(d24s8_to_rgba8, source.RealExtent(), blit.src_rect);
Draw(d24s8_to_rgba8, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect);
if (use_texture_view) {
temp_tex.Release();
}
return true;
}
bool BlitHelper::ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
state.texture_units[0].texture_2d = source.Handle();
SetParams(rgba4_to_rgb5a1, source.RealExtent(), blit.src_rect);
Draw(rgba4_to_rgb5a1, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect);
return true;
}
bool BlitHelper::Filter(Surface& surface, const VideoCore::TextureBlit& blit) {
// Filtering to depth stencil surfaces isn't supported.
if (surface.type == SurfaceType::Depth || surface.type == SurfaceType::DepthStencil) {
@@ -149,7 +222,7 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b
Draw(gradient_y_program, LUMAD.tex.handle, LUMAD.fbo.handle, 0, temp_rect);
// refine pass
Draw(refine_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
Draw(refine_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
// These will have handles from the previous texture that was filtered, reset them to avoid
// binding invalid textures.
@@ -160,25 +233,25 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b
}
void BlitHelper::FilterBicubic(Surface& surface, const VideoCore::TextureBlit& blit) {
SetParams(bicubic_program, surface.Extent(), blit.src_rect);
Draw(bicubic_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
SetParams(bicubic_program, surface.RealExtent(false), blit.src_rect);
Draw(bicubic_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::FilterNearest(Surface& surface, const VideoCore::TextureBlit& blit) {
state.texture_units[2].texture_2d = surface.Handle(0);
SetParams(nearest_program, surface.Extent(), blit.src_rect);
Draw(nearest_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
SetParams(nearest_program, surface.RealExtent(false), blit.src_rect);
Draw(nearest_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::FilterScaleForce(Surface& surface, const VideoCore::TextureBlit& blit) {
SetParams(scale_force_program, surface.Extent(), blit.src_rect);
Draw(scale_force_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
SetParams(scale_force_program, surface.RealExtent(false), blit.src_rect);
Draw(scale_force_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::FilterXbrz(Surface& surface, const VideoCore::TextureBlit& blit) {
glProgramUniform1f(xbrz_program.handle, 2, static_cast<GLfloat>(surface.res_scale));
SetParams(xbrz_program, surface.Extent(), blit.src_rect);
Draw(xbrz_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect);
SetParams(xbrz_program, surface.RealExtent(false), blit.src_rect);
Draw(xbrz_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect);
}
void BlitHelper::SetParams(OGLProgram& program, const VideoCore::Extent& src_extent,
@@ -206,7 +279,7 @@ void BlitHelper::Draw(OGLProgram& program, GLuint dst_tex, GLuint dst_fbo, u32 d
dst_level);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
glDrawArrays(GL_TRIANGLES, 0, 3);
}
} // namespace OpenGL

View File

@@ -15,16 +15,20 @@ struct TextureBlit;
namespace OpenGL {
class TextureRuntime;
class Driver;
class Surface;
class BlitHelper {
public:
BlitHelper(TextureRuntime& runtime);
explicit BlitHelper(const Driver& driver);
~BlitHelper();
bool Filter(Surface& surface, const VideoCore::TextureBlit& blit);
bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
bool ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
private:
void FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit);
@@ -43,10 +47,10 @@ private:
Common::Rectangle<u32> dst_rect);
private:
TextureRuntime& runtime;
const Driver& driver;
OGLVertexArray vao;
OpenGLState state;
OGLFramebuffer filter_fbo;
OGLFramebuffer draw_fbo;
OGLSampler linear_sampler;
OGLSampler nearest_sampler;
@@ -57,6 +61,12 @@ private:
OGLProgram gradient_x_program;
OGLProgram gradient_y_program;
OGLProgram refine_program;
OGLProgram d24s8_to_rgba8;
OGLProgram rgba4_to_rgb5a1;
OGLTexture temp_tex;
Common::Rectangle<u32> temp_rect{};
bool use_texture_view{true};
};
} // namespace OpenGL

View File

@@ -1,134 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/scope_exit.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_runtime.h"
#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h"
#include "video_core/host_shaders/format_reinterpreter/fullscreen_quad_vert.h"
#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h"
namespace OpenGL {
RGBA4toRGB5A1::RGBA4toRGB5A1() {
program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::RGBA4_TO_RGB5A1_FRAG);
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
}
void RGBA4toRGB5A1::Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = source.Handle();
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(),
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
ShaderD24S8toRGBA8::ShaderD24S8toRGBA8() {
program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::D24S8_TO_RGBA8_FRAG);
dst_size_loc = glGetUniformLocation(program.handle, "dst_size");
src_size_loc = glGetUniformLocation(program.handle, "src_size");
src_offset_loc = glGetUniformLocation(program.handle, "src_offset");
vao.Create();
auto state = OpenGLState::GetCurState();
auto cur_program = state.draw.shader_program;
state.draw.shader_program = program.handle;
state.Apply();
glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1);
state.draw.shader_program = cur_program;
state.Apply();
// Nvidia seem to be the only one to support D24S8 views, at least on windows
// so for everyone else it will do an intermediate copy before running through the shader
std::string_view vendor{reinterpret_cast<const char*>(glGetString(GL_VENDOR))};
if (vendor.find("NVIDIA") != vendor.npos) {
use_texture_view = true;
} else {
LOG_INFO(Render_OpenGL,
"Texture views are unsupported, reinterpretation will do intermediate copy");
temp_tex.Create();
}
}
void ShaderD24S8toRGBA8::Reinterpret(Surface& source, Common::Rectangle<u32> src_rect,
Surface& dest, Common::Rectangle<u32> dst_rect) {
OpenGLState prev_state = OpenGLState::GetCurState();
SCOPE_EXIT({ prev_state.Apply(); });
OpenGLState state;
state.texture_units[0].texture_2d = source.Handle();
if (use_texture_view) {
temp_tex.Create();
glActiveTexture(GL_TEXTURE1);
glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0,
1);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
} else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) {
temp_tex.Release();
temp_tex.Create();
state.texture_units[1].texture_2d = temp_tex.handle;
state.Apply();
glActiveTexture(GL_TEXTURE1);
glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
temp_rect = src_rect;
}
state.texture_units[1].texture_2d = temp_tex.handle;
state.draw.draw_framebuffer = draw_fbo.handle;
state.draw.shader_program = program.handle;
state.draw.vertex_array = vao.handle;
state.viewport = {static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.bottom),
static_cast<GLsizei>(dst_rect.GetWidth()),
static_cast<GLsizei>(dst_rect.GetHeight())};
state.Apply();
glActiveTexture(GL_TEXTURE1);
if (!use_texture_view) {
glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0,
src_rect.GetWidth(), src_rect.GetHeight(), 1);
}
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(),
0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight());
glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight());
glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
if (use_texture_view) {
temp_tex.Release();
}
}
} // namespace OpenGL

View File

@@ -1,76 +0,0 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/math_util.h"
#include "video_core/rasterizer_cache/pixel_format.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
class Surface;
class FormatReinterpreterBase {
public:
FormatReinterpreterBase() {
read_fbo.Create();
draw_fbo.Create();
}
virtual ~FormatReinterpreterBase() = default;
virtual VideoCore::PixelFormat GetSourceFormat() const = 0;
virtual void Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) = 0;
protected:
OGLFramebuffer read_fbo;
OGLFramebuffer draw_fbo;
};
using ReinterpreterList = std::vector<std::unique_ptr<FormatReinterpreterBase>>;
class RGBA4toRGB5A1 final : public FormatReinterpreterBase {
public:
RGBA4toRGB5A1();
VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::RGBA4;
}
void Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) override;
private:
OGLProgram program;
GLint dst_size_loc{-1};
GLint src_size_loc{-1};
GLint src_offset_loc{-1};
OGLVertexArray vao;
};
class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase {
public:
ShaderD24S8toRGBA8();
VideoCore::PixelFormat GetSourceFormat() const override {
return VideoCore::PixelFormat::D24S8;
}
void Reinterpret(Surface& source, Common::Rectangle<u32> src_rect, Surface& dest,
Common::Rectangle<u32> dst_rect) override;
private:
bool use_texture_view{};
OGLProgram program{};
GLint dst_size_loc{-1};
GLint src_size_loc{-1};
GLint src_offset_loc{-1};
OGLVertexArray vao{};
OGLTexture temp_tex{};
Common::Rectangle<u32> temp_rect{0, 0, 0, 0};
};
} // namespace OpenGL

View File

@@ -117,20 +117,11 @@ struct FramebufferInfo {
} // Anonymous namespace
TextureRuntime::TextureRuntime(const Driver& driver_, VideoCore::RendererBase& renderer)
: driver{driver_}, blit_helper{*this} {
: driver{driver_}, blit_helper{driver} {
for (std::size_t i = 0; i < draw_fbos.size(); ++i) {
draw_fbos[i].Create();
read_fbos[i].Create();
}
auto add_reinterpreter = [this](PixelFormat dest,
std::unique_ptr<FormatReinterpreterBase>&& obj) {
const u32 dst_index = static_cast<u32>(dest);
return reinterpreters[dst_index].push_back(std::move(obj));
};
add_reinterpreter(PixelFormat::RGBA8, std::make_unique<ShaderD24S8toRGBA8>());
add_reinterpreter(PixelFormat::RGB5A1, std::make_unique<RGBA4toRGB5A1>());
}
TextureRuntime::~TextureRuntime() = default;
@@ -192,8 +183,8 @@ Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params,
const auto& tuple =
is_custom ? GetFormatTuple(params.custom_format) : GetFormatTuple(params.pixel_format);
const HostTextureTag key = {
.width = params.width,
.height = params.height,
.width = params.GetScaledWidth(),
.height = params.GetScaledHeight(),
.levels = params.levels,
.res_scale = params.res_scale,
.tuple = tuple,
@@ -238,13 +229,30 @@ Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params,
.textures = std::move(textures),
.handles = std::move(handles),
.tuple = tuple,
.width = params.width,
.height = params.height,
.width = params.GetScaledWidth(),
.height = params.GetScaledHeight(),
.levels = params.levels,
.res_scale = params.res_scale,
};
}
bool TextureRuntime::Reinterpret(Surface& source, Surface& dest,
const VideoCore::TextureBlit& blit) {
const PixelFormat src_format = source.pixel_format;
const PixelFormat dst_format = dest.pixel_format;
if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) {
blit_helper.ConvertDS24S8ToRGBA8(source, dest, blit);
} else if (src_format == PixelFormat::RGBA4 && dst_format == PixelFormat::RGB5A1) {
blit_helper.ConvertRGBA4ToRGB5A1(source, dest, blit);
} else {
LOG_WARNING(Render_OpenGL, "Unimplemented reinterpretation {} -> {}",
VideoCore::PixelFormatAsString(src_format),
VideoCore::PixelFormatAsString(dst_format));
return false;
}
return true;
}
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) {
const auto prev_state = OpenGLState::GetCurState();
@@ -360,11 +368,6 @@ void TextureRuntime::GenerateMipmaps(Surface& surface) {
}
}
const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations(
PixelFormat dest_format) const {
return reinterpreters[static_cast<u32>(dest_format)];
}
Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params)
: SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_} {
if (pixel_format == PixelFormat::Invalid) {

View File

@@ -7,7 +7,6 @@
#include "video_core/rasterizer_cache/framebuffer_base.h"
#include "video_core/rasterizer_cache/rasterizer_cache_base.h"
#include "video_core/renderer_opengl/gl_blit_helper.h"
#include "video_core/renderer_opengl/gl_format_reinterpreter.h"
namespace VideoCore {
struct Material;
@@ -76,7 +75,6 @@ class Driver;
class TextureRuntime {
friend class Surface;
friend class Framebuffer;
friend class BlitHelper;
public:
explicit TextureRuntime(const Driver& driver, VideoCore::RendererBase& renderer);
@@ -95,12 +93,8 @@ public:
const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format) const;
const FormatTuple& GetFormatTuple(VideoCore::CustomPixelFormat pixel_format);
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, Allocation&& alloc);
/// Allocates a texture with the specified dimentions and format
Allocation Allocate(const VideoCore::SurfaceParams& params,
const VideoCore::Material* material = nullptr);
/// Attempts to reinterpret
bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit);
/// Fills the rectangle of the texture with the clear value provided
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear);
@@ -114,10 +108,14 @@ public:
/// Generates mipmaps for all the available levels of the texture
void GenerateMipmaps(Surface& surface);
/// Returns all source formats that support reinterpretation to the dest format
const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const;
private:
/// Allocates a texture with the specified dimentions and format
Allocation Allocate(const VideoCore::SurfaceParams& params,
const VideoCore::Material* material = nullptr);
/// Takes back ownership of the allocation for recycling
void Recycle(const HostTextureTag tag, Allocation&& alloc);
/// Returns the OpenGL driver class
const Driver& GetDriver() const {
return driver;
@@ -127,7 +125,6 @@ private:
const Driver& driver;
BlitHelper blit_helper;
std::vector<u8> staging_buffer;
std::array<ReinterpreterList, VideoCore::PIXEL_FORMAT_COUNT> reinterpreters;
std::unordered_multimap<HostTextureTag, Allocation, HostTextureTag::Hash> alloc_cache;
std::unordered_map<u64, OGLFramebuffer, Common::IdentityHash<u64>> framebuffer_cache;
std::array<OGLFramebuffer, 3> draw_fbos;
@@ -145,24 +142,14 @@ public:
Surface(Surface&& o) noexcept = default;
Surface& operator=(Surface&& o) noexcept = default;
/// Returns the surface image handle at the provided index.
GLuint Handle(u32 index = 1) const noexcept {
[[nodiscard]] GLuint Handle(u32 index = 1) const noexcept {
return alloc.handles[index];
}
/// Returns the tuple of the surface allocation.
const FormatTuple& Tuple() const noexcept {
[[nodiscard]] const FormatTuple& Tuple() const noexcept {
return alloc.tuple;
}
/// Returns the extent of the underlying surface allocation
VideoCore::Extent Extent() const noexcept {
return {
.width = alloc.width,
.height = alloc.height,
};
}
/// Uploads pixel data in staging to a rectangle region of the surface texture
void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging);