diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 619892784..3cef900ee 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -56,8 +56,6 @@ add_library(video_core STATIC renderer_opengl/gl_blit_helper.h renderer_opengl/gl_driver.cpp renderer_opengl/gl_driver.h - renderer_opengl/gl_format_reinterpreter.cpp - renderer_opengl/gl_format_reinterpreter.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer_cache.cpp diff --git a/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag b/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag index ae0f5a36d..17c8e77f7 100644 --- a/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag +++ b/src/video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8.frag @@ -4,29 +4,18 @@ //? #version 430 core -layout(location = 0) in mediump vec2 dst_coord; +layout(location = 0) in mediump vec2 tex_coord; layout(location = 0) out lowp vec4 frag_color; layout(binding = 0) uniform highp sampler2D depth; layout(binding = 1) uniform lowp usampler2D stencil; -uniform mediump ivec2 dst_size; -uniform mediump ivec2 src_size; -uniform mediump ivec2 src_offset; void main() { - mediump ivec2 tex_coord; - if (src_size == dst_size) { - tex_coord = ivec2(dst_coord); - } else { - highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); - mediump int y = tex_index / src_size.x; - tex_coord = ivec2(tex_index - y * src_size.x, y); - } - tex_coord -= src_offset; - + mediump vec2 coord = tex_coord * vec2(textureSize(depth, 0)); + mediump ivec2 tex_icoord = ivec2(coord); highp uint depth_val = - uint(texelFetch(depth, tex_coord, 0).x * (exp2(32.0) - 1.0)); - lowp uint stencil_val = texelFetch(stencil, tex_coord, 0).x; + uint(texelFetch(depth, tex_icoord, 0).x * (exp2(32.0) - 1.0)); + lowp uint stencil_val = texelFetch(stencil, tex_icoord, 0).x; highp uvec4 components = uvec4(stencil_val, (uvec3(depth_val) >> uvec3(24u, 16u, 8u)) & 0x000000FFu); frag_color = vec4(components) / (exp2(8.0) - 1.0); diff --git a/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert b/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert index 62928913c..3c5e771bc 100644 --- a/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert +++ b/src/video_core/host_shaders/format_reinterpreter/fullscreen_quad.vert @@ -6,7 +6,7 @@ layout(location = 0) out vec2 dst_coord; -uniform mediump ivec2 dst_size; +layout(location = 0) uniform mediump ivec2 dst_size; const vec2 vertices[4] = vec2[4](vec2(-1.0, -1.0), vec2(1.0, -1.0), vec2(-1.0, 1.0), vec2(1.0, 1.0)); diff --git a/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag b/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag index fb1a874f9..99ca820da 100644 --- a/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag +++ b/src/video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1.frag @@ -4,26 +4,15 @@ //? #version 430 core -layout(location = 0) in mediump vec2 dst_coord; +layout(location = 0) in mediump vec2 tex_coord; layout(location = 0) out lowp vec4 frag_color; layout(binding = 0) uniform lowp sampler2D source; -uniform mediump ivec2 dst_size; -uniform mediump ivec2 src_size; -uniform mediump ivec2 src_offset; void main() { - mediump ivec2 tex_coord; - if (src_size == dst_size) { - tex_coord = ivec2(dst_coord); - } else { - highp int tex_index = int(dst_coord.y) * dst_size.x + int(dst_coord.x); - mediump int y = tex_index / src_size.x; - tex_coord = ivec2(tex_index - y * src_size.x, y); - } - tex_coord -= src_offset; - - lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_coord, 0) * (exp2(4.0) - 1.0)); + mediump vec2 coord = tex_coord * vec2(textureSize(source, 0)); + mediump ivec2 tex_icoord = ivec2(coord); + lowp ivec4 rgba4 = ivec4(texelFetch(source, tex_icoord, 0) * (exp2(4.0) - 1.0)); lowp ivec3 rgb5 = ((rgba4.rgb << ivec3(1, 2, 3)) | (rgba4.gba >> ivec3(3, 2, 1))) & 0x1F; frag_color = vec4(vec3(rgb5) / (exp2(5.0) - 1.0), rgba4.a & 0x01); diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.cpp b/src/video_core/rasterizer_cache/rasterizer_cache.cpp index dc3c461f2..391273236 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.cpp +++ b/src/video_core/rasterizer_cache/rasterizer_cache.cpp @@ -10,12 +10,11 @@ MICROPROFILE_DEFINE(RasterizerCache_CopySurface, "RasterizerCache", "CopySurface MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_UploadSurface, "RasterizerCache", "UploadSurface", MP_RGB(128, 192, 64)); -MICROPROFILE_DEFINE(RasterizerCache_ComputeHash, "RasterizerCache", "ComputeHash", +MICROPROFILE_DEFINE(RasterizerCache_ValidateSurface, "RasterizerCache", "ValidateSurface", MP_RGB(32, 64, 192)); MICROPROFILE_DEFINE(RasterizerCache_DownloadSurface, "RasterizerCache", "DownloadSurface", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_Invalidation, "RasterizerCache", "Invalidation", MP_RGB(128, 64, 192)); -MICROPROFILE_DEFINE(RasterizerCache_Flush, "RasterizerCache", "Flush", MP_RGB(128, 64, 192)); } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 8bdd7b55e..32a432cf1 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -19,10 +19,9 @@ namespace VideoCore { MICROPROFILE_DECLARE(RasterizerCache_CopySurface); MICROPROFILE_DECLARE(RasterizerCache_UploadSurface); -MICROPROFILE_DECLARE(RasterizerCache_ComputeHash); +MICROPROFILE_DECLARE(RasterizerCache_ValidateSurface); MICROPROFILE_DECLARE(RasterizerCache_DownloadSurface); MICROPROFILE_DECLARE(RasterizerCache_Invalidation); -MICROPROFILE_DECLARE(RasterizerCache_Flush); constexpr auto RangeFromInterval(const auto& map, const auto& interval) { return boost::make_iterator_range(map.equal_range(interval)); @@ -848,12 +847,19 @@ SurfaceId RasterizerCache::FindMatch(const SurfaceParams& params, ScaleMatch }); IsMatch_Helper(std::integral_constant{}, [&] { ASSERT(validate_interval); - auto copy_interval = + const SurfaceInterval copy_interval = surface.GetCopyableInterval(params.FromInterval(*validate_interval)); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface.CanCopy(params, copy_interval); + const bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface.CanCopy(params, copy_interval); return std::make_pair(matched, copy_interval); }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + const bool matched = + !boost::icl::contains(surface.invalid_regions, *validate_interval) && + surface.CanReinterpret(params); + return std::make_pair(matched, surface.GetInterval()); + }); IsMatch_Helper(std::integral_constant{}, [&] { return std::make_pair(surface.CanExpand(params), surface.GetInterval()); }); @@ -903,6 +909,8 @@ void RasterizerCache::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s return; } + MICROPROFILE_SCOPE(RasterizerCache_ValidateSurface); + Surface& surface = slot_surfaces[surface_id]; const SurfaceInterval validate_interval(addr, addr + size); @@ -949,22 +957,7 @@ void RasterizerCache::ValidateSurface(SurfaceId surface_id, PAddr addr, u32 s notify_validated(interval); continue; } - // Could not find a matching reinterpreter, check if we need to implement a - // reinterpreter - if (NoUnimplementedReinterpretations(surface, params, interval) && - !IntervalHasInvalidPixelFormat(params, interval)) { - // No surfaces were found in the cache that had a matching bit-width. - // If the region was created entirely on the GPU, - // assume it was a developer mistake and skip flushing. - if (boost::icl::contains(dirty_regions, interval)) { - LOG_DEBUG(HW_GPU, "Region created fully on GPU and reinterpretation is " - "invalid. Skipping validation"); - validate_regions.erase(interval); - continue; - } - } - // Load data from 3DS memory FlushRegion(params.addr, params.size); if (!use_custom_textures || !UploadCustomSurface(surface_id, interval)) { UploadSurface(surface, interval); @@ -1134,71 +1127,41 @@ void RasterizerCache::DownloadFillSurface(Surface& surface, SurfaceInterval i } } -template -bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, - SurfaceParams params, - const SurfaceInterval& interval) { - static constexpr std::array all_formats{ - PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, - PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, - PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, - PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, - PixelFormat::D24S8, - }; - bool implemented = true; - for (PixelFormat format : all_formats) { - if (GetFormatBpp(format) == surface.GetFormatBpp()) { - params.pixel_format = format; - // This could potentially be expensive, - // although experimentally it hasn't been too bad - SurfaceId test_surface_id = - FindMatch(params, ScaleMatch::Ignore, interval); - if (test_surface_id) { - LOG_WARNING(HW_GPU, "Missing pixel_format reinterpreter: {} -> {}", - PixelFormatAsString(format), PixelFormatAsString(surface.pixel_format)); - implemented = false; - } - } - } - return implemented; -} - -template -bool RasterizerCache::IntervalHasInvalidPixelFormat(const SurfaceParams& params, - const SurfaceInterval& interval) { - bool invalid_format_found = false; - ForEachSurfaceInRegion(params.addr, params.end, [&](SurfaceId surface_id, Surface& surface) { - if (surface.pixel_format == PixelFormat::Invalid) { - LOG_DEBUG(HW_GPU, "Surface {:#x} found with invalid pixel format", surface.addr); - invalid_format_found = true; - return true; - } - return false; - }); - return invalid_format_found; -} - template bool RasterizerCache::ValidateByReinterpretation(Surface& surface, SurfaceParams params, const SurfaceInterval& interval) { - const PixelFormat dest_format = surface.pixel_format; - for (const auto& reinterpreter : runtime.GetPossibleReinterpretations(dest_format)) { - params.pixel_format = reinterpreter->GetSourceFormat(); - SurfaceId reinterpret_surface_id = - FindMatch(params, ScaleMatch::Ignore, interval); - - if (reinterpret_surface_id) { - Surface& reinterpret_surface = slot_surfaces[reinterpret_surface_id]; - auto reinterpret_interval = reinterpret_surface.GetCopyableInterval(params); - auto reinterpret_params = surface.FromInterval(reinterpret_interval); - auto src_rect = reinterpret_surface.GetScaledSubRect(reinterpret_params); - auto dest_rect = surface.GetScaledSubRect(reinterpret_params); - reinterpreter->Reinterpret(reinterpret_surface, src_rect, surface, dest_rect); - - return true; + const PixelFormat dst_format = surface.pixel_format; + SurfaceId reinterpret_id = + FindMatch(params, ScaleMatch::Ignore, interval); + if (reinterpret_id) { + Surface& src_surface = slot_surfaces[reinterpret_id]; + if (src_surface.stride == surface.stride) { + const SurfaceInterval copy_interval = src_surface.GetCopyableInterval(params); + if (boost::icl::is_empty(copy_interval)) { + return false; + } + const PAddr addr = interval.lower(); + const PixelFormat src_format = src_surface.pixel_format; + const bool is_gpu_modified = boost::icl::contains(dirty_regions, copy_interval); + if (GetFormatBpp(src_format) != GetFormatBpp(dst_format) && is_gpu_modified) { + LOG_DEBUG(HW_GPU, "Region created fully on GPU and reinterpretation is " + "invalid. Skipping validation"); + return true; + } + const SurfaceParams copy_params = surface.FromInterval(copy_interval); + const TextureBlit reinterpret = { + .src_level = src_surface.LevelOf(addr), + .dst_level = surface.LevelOf(addr), + .src_rect = src_surface.GetScaledSubRect(copy_params), + .dst_rect = surface.GetScaledSubRect(copy_params), + }; + return runtime.Reinterpret(src_surface, surface, reinterpret); } + LOG_INFO(HW_GPU, "Unimplemented dimentional reinterpretatation {}x{} -> {}x{}", + src_surface.width, src_surface.height, surface.width, surface.height); + const bool is_gpu_modified = boost::icl::contains(dirty_regions, interval); + return is_gpu_modified; } - return false; } diff --git a/src/video_core/rasterizer_cache/rasterizer_cache_base.h b/src/video_core/rasterizer_cache/rasterizer_cache_base.h index dd4402cf8..a6a31ef46 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache_base.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache_base.h @@ -34,11 +34,12 @@ enum class ScaleMatch { }; enum class MatchFlags { - Exact = 1 << 0, ///< Surface perfectly matches params - SubRect = 1 << 1, ///< Surface encompasses params - Copy = 1 << 2, ///< Surface that can be used as a copy source - Expand = 1 << 3, ///< Surface that can expand params - TexCopy = 1 << 4 ///< Surface that will match a display transfer "texture copy" parameters + Exact = 1 << 0, ///< Surface perfectly matches params + SubRect = 1 << 1, ///< Surface encompasses params + Copy = 1 << 2, ///< Surface that can be used as a copy source + Expand = 1 << 3, ///< Surface that can expand params + TexCopy = 1 << 4, ///< Surface that will match a display transfer "texture copy" parameters + Reinterpret = 1 << 5, ///< Surface might have different pixel format. }; DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); @@ -182,14 +183,6 @@ private: /// Downloads a fill surface to guest VRAM void DownloadFillSurface(Surface& surface, SurfaceInterval interval); - /// Returns false if there is a surface in the cache at the interval with the same bit-width, - bool NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams params, - const SurfaceInterval& interval); - - /// Return true if a surface with an invalid pixel format exists at the interval - bool IntervalHasInvalidPixelFormat(const SurfaceParams& params, - const SurfaceInterval& interval); - /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation bool ValidateByReinterpretation(Surface& surface, SurfaceParams params, const SurfaceInterval& interval); diff --git a/src/video_core/rasterizer_cache/surface_base.cpp b/src/video_core/rasterizer_cache/surface_base.cpp index 0826be1ca..1ad8bfa37 100644 --- a/src/video_core/rasterizer_cache/surface_base.cpp +++ b/src/video_core/rasterizer_cache/surface_base.cpp @@ -45,13 +45,16 @@ bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fil } bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + const SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - if (CanFill(dest_surface, copy_interval)) + if (CanSubRect(subrect_params)) { return true; + } + + if (CanFill(dest_surface, copy_interval)) { + return true; + } return false; } @@ -102,6 +105,23 @@ SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) co return result; } +Extent SurfaceBase::RealExtent(bool scaled) { + const bool is_custom = IsCustom(); + u32 real_width = width; + u32 real_height = height; + if (is_custom) { + real_width = material->width; + real_height = material->height; + } else if (scaled) { + real_width = GetScaledWidth(); + real_height = GetScaledHeight(); + } + return Extent{ + .width = real_width, + .height = real_height, + }; +} + bool SurfaceBase::HasNormalMap() const noexcept { return material && material->Map(MapType::Normal) != nullptr; } diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h index 61b30aa29..4f5c671ea 100644 --- a/src/video_core/rasterizer_cache/surface_base.h +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -38,6 +38,9 @@ public: /// Returns the clear value used to validate another surface from this fill surface ClearValue MakeClearValue(PAddr copy_addr, PixelFormat dst_format); + /// Returns the internal surface extent. + Extent RealExtent(bool scaled = true); + /// Returns true if the surface contains a custom material with a normal map. bool HasNormalMap() const noexcept; @@ -55,7 +58,7 @@ public: } bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); + return invalid_regions.find(interval) == invalid_regions.end(); } void MarkValid(SurfaceInterval interval) { diff --git a/src/video_core/rasterizer_cache/surface_params.cpp b/src/video_core/rasterizer_cache/surface_params.cpp index 77aa3ea7f..c5e358bfe 100644 --- a/src/video_core/rasterizer_cache/surface_params.cpp +++ b/src/video_core/rasterizer_cache/surface_params.cpp @@ -11,7 +11,7 @@ bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const { return std::tie(other_surface.addr, other_surface.width, other_surface.height, other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) == std::tie(addr, width, height, stride, pixel_format, is_tiled) && - pixel_format != PixelFormat::Invalid /*&& levels >= other_surface.levels*/; + pixel_format != PixelFormat::Invalid && levels >= other_surface.levels; } bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { @@ -23,6 +23,12 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const { GetSubRect(sub_surface).right <= stride; } +bool SurfaceParams::CanReinterpret(const SurfaceParams& other_surface) { + return other_surface.addr >= addr && other_surface.end <= end && + pixel_format != PixelFormat::Invalid && other_surface.is_tiled == is_tiled && + (other_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0; +} + bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const { return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format && addr <= expanded_surface.end && expanded_surface.addr <= end && diff --git a/src/video_core/rasterizer_cache/surface_params.h b/src/video_core/rasterizer_cache/surface_params.h index e71f4716b..f44a14955 100644 --- a/src/video_core/rasterizer_cache/surface_params.h +++ b/src/video_core/rasterizer_cache/surface_params.h @@ -19,6 +19,9 @@ public: /// Returns true if sub_surface is a subrect of params bool CanSubRect(const SurfaceParams& sub_surface) const; + /// Returns true if other_surface can be used for reinterpretion. + bool CanReinterpret(const SurfaceParams& other_surface); + /// Returns true if params can be expanded to match expanded_surface bool CanExpand(const SurfaceParams& expanded_surface) const; diff --git a/src/video_core/renderer_opengl/gl_blit_helper.cpp b/src/video_core/renderer_opengl/gl_blit_helper.cpp index b8223aa0a..aed7269d2 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.cpp +++ b/src/video_core/renderer_opengl/gl_blit_helper.cpp @@ -2,12 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/scope_exit.h" #include "common/settings.h" #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/renderer_opengl/gl_blit_helper.h" +#include "video_core/renderer_opengl/gl_driver.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_runtime.h" +#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h" +#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h" #include "video_core/host_shaders/full_screen_triangle_vert.h" #include "video_core/host_shaders/texture_filtering/bicubic_frag.h" #include "video_core/host_shaders/texture_filtering/nearest_neighbor_frag.h" @@ -49,8 +53,8 @@ OGLProgram CreateProgram(std::string_view frag) { } // Anonymous namespace -BlitHelper::BlitHelper(TextureRuntime& runtime_) - : runtime{runtime_}, linear_sampler{CreateSampler(GL_LINEAR)}, +BlitHelper::BlitHelper(const Driver& driver_) + : driver{driver_}, linear_sampler{CreateSampler(GL_LINEAR)}, nearest_sampler{CreateSampler(GL_NEAREST)}, bicubic_program{CreateProgram( HostShaders::BICUBIC_FRAG)}, nearest_program{CreateProgram(HostShaders::NEAREST_NEIGHBOR_FRAG)}, @@ -58,17 +62,86 @@ BlitHelper::BlitHelper(TextureRuntime& runtime_) xbrz_program{CreateProgram(HostShaders::XBRZ_FREESCALE_FRAG)}, gradient_x_program{CreateProgram(HostShaders::X_GRADIENT_FRAG)}, gradient_y_program{CreateProgram(HostShaders::Y_GRADIENT_FRAG)}, - refine_program{CreateProgram(HostShaders::REFINE_FRAG)} { + refine_program{CreateProgram(HostShaders::REFINE_FRAG)}, + d24s8_to_rgba8{CreateProgram(HostShaders::D24S8_TO_RGBA8_FRAG)}, + rgba4_to_rgb5a1{CreateProgram(HostShaders::RGBA4_TO_RGB5A1_FRAG)} { vao.Create(); - filter_fbo.Create(); + draw_fbo.Create(); state.draw.vertex_array = vao.handle; for (u32 i = 0; i < 3; i++) { state.texture_units[i].sampler = i == 2 ? nearest_sampler.handle : linear_sampler.handle; } + if (driver.IsOpenGLES()) { + LOG_INFO(Render_OpenGL, + "Texture views are unsupported, reinterpretation will do intermediate copy"); + temp_tex.Create(); + use_texture_view = false; + } } BlitHelper::~BlitHelper() = default; +bool BlitHelper::ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + state.texture_units[0].texture_2d = source.Handle(); + + if (use_texture_view) { + temp_tex.Create(); + glActiveTexture(GL_TEXTURE1); + glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0, + 1); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + } else if (blit.src_rect.top > temp_rect.top || blit.src_rect.right > temp_rect.right) { + temp_tex.Release(); + temp_tex.Create(); + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + glActiveTexture(GL_TEXTURE1); + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, blit.src_rect.right, + blit.src_rect.top); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + temp_rect = blit.src_rect; + } + state.texture_units[1].texture_2d = temp_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE1); + if (!use_texture_view) { + glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, blit.src_rect.left, + blit.src_rect.bottom, 0, temp_tex.handle, GL_TEXTURE_2D, 0, + blit.src_rect.left, blit.src_rect.bottom, 0, blit.src_rect.GetWidth(), + blit.src_rect.GetHeight(), 1); + } + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + + SetParams(d24s8_to_rgba8, source.RealExtent(), blit.src_rect); + Draw(d24s8_to_rgba8, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect); + + if (use_texture_view) { + temp_tex.Release(); + } + + return true; +} + +bool BlitHelper::ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + state.texture_units[0].texture_2d = source.Handle(); + + SetParams(rgba4_to_rgb5a1, source.RealExtent(), blit.src_rect); + Draw(rgba4_to_rgb5a1, dest.Handle(), draw_fbo.handle, 0, blit.dst_rect); + + return true; +} + bool BlitHelper::Filter(Surface& surface, const VideoCore::TextureBlit& blit) { // Filtering to depth stencil surfaces isn't supported. if (surface.type == SurfaceType::Depth || surface.type == SurfaceType::DepthStencil) { @@ -149,7 +222,7 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b Draw(gradient_y_program, LUMAD.tex.handle, LUMAD.fbo.handle, 0, temp_rect); // refine pass - Draw(refine_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + Draw(refine_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); // These will have handles from the previous texture that was filtered, reset them to avoid // binding invalid textures. @@ -160,25 +233,25 @@ void BlitHelper::FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& b } void BlitHelper::FilterBicubic(Surface& surface, const VideoCore::TextureBlit& blit) { - SetParams(bicubic_program, surface.Extent(), blit.src_rect); - Draw(bicubic_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + SetParams(bicubic_program, surface.RealExtent(false), blit.src_rect); + Draw(bicubic_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::FilterNearest(Surface& surface, const VideoCore::TextureBlit& blit) { state.texture_units[2].texture_2d = surface.Handle(0); - SetParams(nearest_program, surface.Extent(), blit.src_rect); - Draw(nearest_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + SetParams(nearest_program, surface.RealExtent(false), blit.src_rect); + Draw(nearest_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::FilterScaleForce(Surface& surface, const VideoCore::TextureBlit& blit) { - SetParams(scale_force_program, surface.Extent(), blit.src_rect); - Draw(scale_force_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + SetParams(scale_force_program, surface.RealExtent(false), blit.src_rect); + Draw(scale_force_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::FilterXbrz(Surface& surface, const VideoCore::TextureBlit& blit) { glProgramUniform1f(xbrz_program.handle, 2, static_cast(surface.res_scale)); - SetParams(xbrz_program, surface.Extent(), blit.src_rect); - Draw(xbrz_program, surface.Handle(), filter_fbo.handle, blit.dst_level, blit.dst_rect); + SetParams(xbrz_program, surface.RealExtent(false), blit.src_rect); + Draw(xbrz_program, surface.Handle(), draw_fbo.handle, blit.dst_level, blit.dst_rect); } void BlitHelper::SetParams(OGLProgram& program, const VideoCore::Extent& src_extent, @@ -206,7 +279,7 @@ void BlitHelper::Draw(OGLProgram& program, GLuint dst_tex, GLuint dst_fbo, u32 d dst_level); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + glDrawArrays(GL_TRIANGLES, 0, 3); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_blit_helper.h b/src/video_core/renderer_opengl/gl_blit_helper.h index 3c156b502..a0cf2a4be 100644 --- a/src/video_core/renderer_opengl/gl_blit_helper.h +++ b/src/video_core/renderer_opengl/gl_blit_helper.h @@ -15,16 +15,20 @@ struct TextureBlit; namespace OpenGL { -class TextureRuntime; +class Driver; class Surface; class BlitHelper { public: - BlitHelper(TextureRuntime& runtime); + explicit BlitHelper(const Driver& driver); ~BlitHelper(); bool Filter(Surface& surface, const VideoCore::TextureBlit& blit); + bool ConvertDS24S8ToRGBA8(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + + bool ConvertRGBA4ToRGB5A1(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); + private: void FilterAnime4K(Surface& surface, const VideoCore::TextureBlit& blit); @@ -43,10 +47,10 @@ private: Common::Rectangle dst_rect); private: - TextureRuntime& runtime; + const Driver& driver; OGLVertexArray vao; OpenGLState state; - OGLFramebuffer filter_fbo; + OGLFramebuffer draw_fbo; OGLSampler linear_sampler; OGLSampler nearest_sampler; @@ -57,6 +61,12 @@ private: OGLProgram gradient_x_program; OGLProgram gradient_y_program; OGLProgram refine_program; + OGLProgram d24s8_to_rgba8; + OGLProgram rgba4_to_rgb5a1; + + OGLTexture temp_tex; + Common::Rectangle temp_rect{}; + bool use_texture_view{true}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp deleted file mode 100644 index 5019f79b7..000000000 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/scope_exit.h" -#include "video_core/renderer_opengl/gl_format_reinterpreter.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_texture_runtime.h" - -#include "video_core/host_shaders/format_reinterpreter/d24s8_to_rgba8_frag.h" -#include "video_core/host_shaders/format_reinterpreter/fullscreen_quad_vert.h" -#include "video_core/host_shaders/format_reinterpreter/rgba4_to_rgb5a1_frag.h" - -namespace OpenGL { - -RGBA4toRGB5A1::RGBA4toRGB5A1() { - program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::RGBA4_TO_RGB5A1_FRAG); - dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); - src_size_loc = glGetUniformLocation(program.handle, "src_size"); - src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); - vao.Create(); -} - -void RGBA4toRGB5A1::Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.texture_units[0].texture_2d = source.Handle(); - state.draw.draw_framebuffer = draw_fbo.handle; - state.draw.shader_program = program.handle; - state.draw.vertex_array = vao.handle; - state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), - static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight())}; - state.Apply(); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(), - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); - glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); - glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); -} - -ShaderD24S8toRGBA8::ShaderD24S8toRGBA8() { - program.Create(HostShaders::FULLSCREEN_QUAD_VERT, HostShaders::D24S8_TO_RGBA8_FRAG); - dst_size_loc = glGetUniformLocation(program.handle, "dst_size"); - src_size_loc = glGetUniformLocation(program.handle, "src_size"); - src_offset_loc = glGetUniformLocation(program.handle, "src_offset"); - vao.Create(); - - auto state = OpenGLState::GetCurState(); - auto cur_program = state.draw.shader_program; - state.draw.shader_program = program.handle; - state.Apply(); - glUniform1i(glGetUniformLocation(program.handle, "stencil"), 1); - state.draw.shader_program = cur_program; - state.Apply(); - - // Nvidia seem to be the only one to support D24S8 views, at least on windows - // so for everyone else it will do an intermediate copy before running through the shader - std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; - if (vendor.find("NVIDIA") != vendor.npos) { - use_texture_view = true; - } else { - LOG_INFO(Render_OpenGL, - "Texture views are unsupported, reinterpretation will do intermediate copy"); - temp_tex.Create(); - } -} - -void ShaderD24S8toRGBA8::Reinterpret(Surface& source, Common::Rectangle src_rect, - Surface& dest, Common::Rectangle dst_rect) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.texture_units[0].texture_2d = source.Handle(); - - if (use_texture_view) { - temp_tex.Create(); - glActiveTexture(GL_TEXTURE1); - glTextureView(temp_tex.handle, GL_TEXTURE_2D, source.Handle(), GL_DEPTH24_STENCIL8, 0, 1, 0, - 1); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - } else if (src_rect.top > temp_rect.top || src_rect.right > temp_rect.right) { - temp_tex.Release(); - temp_tex.Create(); - state.texture_units[1].texture_2d = temp_tex.handle; - state.Apply(); - glActiveTexture(GL_TEXTURE1); - glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH24_STENCIL8, src_rect.right, src_rect.top); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - temp_rect = src_rect; - } - - state.texture_units[1].texture_2d = temp_tex.handle; - state.draw.draw_framebuffer = draw_fbo.handle; - state.draw.shader_program = program.handle; - state.draw.vertex_array = vao.handle; - state.viewport = {static_cast(dst_rect.left), static_cast(dst_rect.bottom), - static_cast(dst_rect.GetWidth()), - static_cast(dst_rect.GetHeight())}; - state.Apply(); - - glActiveTexture(GL_TEXTURE1); - if (!use_texture_view) { - glCopyImageSubData(source.Handle(), GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, - temp_tex.handle, GL_TEXTURE_2D, 0, src_rect.left, src_rect.bottom, 0, - src_rect.GetWidth(), src_rect.GetHeight(), 1); - } - glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, dest.Handle(), - 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glUniform2i(dst_size_loc, dst_rect.GetWidth(), dst_rect.GetHeight()); - glUniform2i(src_size_loc, src_rect.GetWidth(), src_rect.GetHeight()); - glUniform2i(src_offset_loc, src_rect.left, src_rect.bottom); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - if (use_texture_view) { - temp_tex.Release(); - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.h b/src/video_core/renderer_opengl/gl_format_reinterpreter.h deleted file mode 100644 index b4b9468eb..000000000 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "common/math_util.h" -#include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" - -namespace OpenGL { - -class Surface; - -class FormatReinterpreterBase { -public: - FormatReinterpreterBase() { - read_fbo.Create(); - draw_fbo.Create(); - } - - virtual ~FormatReinterpreterBase() = default; - - virtual VideoCore::PixelFormat GetSourceFormat() const = 0; - virtual void Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) = 0; - -protected: - OGLFramebuffer read_fbo; - OGLFramebuffer draw_fbo; -}; - -using ReinterpreterList = std::vector>; - -class RGBA4toRGB5A1 final : public FormatReinterpreterBase { -public: - RGBA4toRGB5A1(); - - VideoCore::PixelFormat GetSourceFormat() const override { - return VideoCore::PixelFormat::RGBA4; - } - - void Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) override; - -private: - OGLProgram program; - GLint dst_size_loc{-1}; - GLint src_size_loc{-1}; - GLint src_offset_loc{-1}; - OGLVertexArray vao; -}; - -class ShaderD24S8toRGBA8 final : public FormatReinterpreterBase { -public: - ShaderD24S8toRGBA8(); - - VideoCore::PixelFormat GetSourceFormat() const override { - return VideoCore::PixelFormat::D24S8; - } - - void Reinterpret(Surface& source, Common::Rectangle src_rect, Surface& dest, - Common::Rectangle dst_rect) override; - -private: - bool use_texture_view{}; - OGLProgram program{}; - GLint dst_size_loc{-1}; - GLint src_size_loc{-1}; - GLint src_offset_loc{-1}; - OGLVertexArray vao{}; - OGLTexture temp_tex{}; - Common::Rectangle temp_rect{0, 0, 0, 0}; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index e31f2c01f..8f33edc64 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -117,20 +117,11 @@ struct FramebufferInfo { } // Anonymous namespace TextureRuntime::TextureRuntime(const Driver& driver_, VideoCore::RendererBase& renderer) - : driver{driver_}, blit_helper{*this} { + : driver{driver_}, blit_helper{driver} { for (std::size_t i = 0; i < draw_fbos.size(); ++i) { draw_fbos[i].Create(); read_fbos[i].Create(); } - - auto add_reinterpreter = [this](PixelFormat dest, - std::unique_ptr&& obj) { - const u32 dst_index = static_cast(dest); - return reinterpreters[dst_index].push_back(std::move(obj)); - }; - - add_reinterpreter(PixelFormat::RGBA8, std::make_unique()); - add_reinterpreter(PixelFormat::RGB5A1, std::make_unique()); } TextureRuntime::~TextureRuntime() = default; @@ -192,8 +183,8 @@ Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params, const auto& tuple = is_custom ? GetFormatTuple(params.custom_format) : GetFormatTuple(params.pixel_format); const HostTextureTag key = { - .width = params.width, - .height = params.height, + .width = params.GetScaledWidth(), + .height = params.GetScaledHeight(), .levels = params.levels, .res_scale = params.res_scale, .tuple = tuple, @@ -238,13 +229,30 @@ Allocation TextureRuntime::Allocate(const VideoCore::SurfaceParams& params, .textures = std::move(textures), .handles = std::move(handles), .tuple = tuple, - .width = params.width, - .height = params.height, + .width = params.GetScaledWidth(), + .height = params.GetScaledHeight(), .levels = params.levels, .res_scale = params.res_scale, }; } +bool TextureRuntime::Reinterpret(Surface& source, Surface& dest, + const VideoCore::TextureBlit& blit) { + const PixelFormat src_format = source.pixel_format; + const PixelFormat dst_format = dest.pixel_format; + if (src_format == PixelFormat::D24S8 && dst_format == PixelFormat::RGBA8) { + blit_helper.ConvertDS24S8ToRGBA8(source, dest, blit); + } else if (src_format == PixelFormat::RGBA4 && dst_format == PixelFormat::RGB5A1) { + blit_helper.ConvertRGBA4ToRGB5A1(source, dest, blit); + } else { + LOG_WARNING(Render_OpenGL, "Unimplemented reinterpretation {} -> {}", + VideoCore::PixelFormatAsString(src_format), + VideoCore::PixelFormatAsString(dst_format)); + return false; + } + return true; +} + bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear) { const auto prev_state = OpenGLState::GetCurState(); @@ -360,11 +368,6 @@ void TextureRuntime::GenerateMipmaps(Surface& surface) { } } -const ReinterpreterList& TextureRuntime::GetPossibleReinterpretations( - PixelFormat dest_format) const { - return reinterpreters[static_cast(dest_format)]; -} - Surface::Surface(TextureRuntime& runtime_, const VideoCore::SurfaceParams& params) : SurfaceBase{params}, driver{&runtime_.GetDriver()}, runtime{&runtime_} { if (pixel_format == PixelFormat::Invalid) { diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index a1563f72d..a0d155cd0 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -7,7 +7,6 @@ #include "video_core/rasterizer_cache/framebuffer_base.h" #include "video_core/rasterizer_cache/rasterizer_cache_base.h" #include "video_core/renderer_opengl/gl_blit_helper.h" -#include "video_core/renderer_opengl/gl_format_reinterpreter.h" namespace VideoCore { struct Material; @@ -76,7 +75,6 @@ class Driver; class TextureRuntime { friend class Surface; friend class Framebuffer; - friend class BlitHelper; public: explicit TextureRuntime(const Driver& driver, VideoCore::RendererBase& renderer); @@ -95,12 +93,8 @@ public: const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format) const; const FormatTuple& GetFormatTuple(VideoCore::CustomPixelFormat pixel_format); - /// Takes back ownership of the allocation for recycling - void Recycle(const HostTextureTag tag, Allocation&& alloc); - - /// Allocates a texture with the specified dimentions and format - Allocation Allocate(const VideoCore::SurfaceParams& params, - const VideoCore::Material* material = nullptr); + /// Attempts to reinterpret + bool Reinterpret(Surface& source, Surface& dest, const VideoCore::TextureBlit& blit); /// Fills the rectangle of the texture with the clear value provided bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear); @@ -114,10 +108,14 @@ public: /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(Surface& surface); - /// Returns all source formats that support reinterpretation to the dest format - const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dest_format) const; - private: + /// Allocates a texture with the specified dimentions and format + Allocation Allocate(const VideoCore::SurfaceParams& params, + const VideoCore::Material* material = nullptr); + + /// Takes back ownership of the allocation for recycling + void Recycle(const HostTextureTag tag, Allocation&& alloc); + /// Returns the OpenGL driver class const Driver& GetDriver() const { return driver; @@ -127,7 +125,6 @@ private: const Driver& driver; BlitHelper blit_helper; std::vector staging_buffer; - std::array reinterpreters; std::unordered_multimap alloc_cache; std::unordered_map> framebuffer_cache; std::array draw_fbos; @@ -145,24 +142,14 @@ public: Surface(Surface&& o) noexcept = default; Surface& operator=(Surface&& o) noexcept = default; - /// Returns the surface image handle at the provided index. - GLuint Handle(u32 index = 1) const noexcept { + [[nodiscard]] GLuint Handle(u32 index = 1) const noexcept { return alloc.handles[index]; } - /// Returns the tuple of the surface allocation. - const FormatTuple& Tuple() const noexcept { + [[nodiscard]] const FormatTuple& Tuple() const noexcept { return alloc.tuple; } - /// Returns the extent of the underlying surface allocation - VideoCore::Extent Extent() const noexcept { - return { - .width = alloc.width, - .height = alloc.height, - }; - } - /// Uploads pixel data in staging to a rectangle region of the surface texture void Upload(const VideoCore::BufferTextureCopy& upload, const VideoCore::StagingData& staging);