From c71dbb5d199498d08d389d16f5d8edf172cac42e Mon Sep 17 00:00:00 2001 From: emufan4568 Date: Mon, 12 Sep 2022 00:54:40 +0300 Subject: [PATCH] rasterizer_cache: Make into template * This is the final step, now RasterizerCache is compltely decoupled from OpenGL (technically not yet, but that's talking details). For now texture filtering and some GLES paths have been disabled and will be reimplemented in the following commits --- src/video_core/CMakeLists.txt | 7 +- .../rasterizer_cache/cached_surface.cpp | 218 --- .../rasterizer_cache/cached_surface.h | 127 -- .../rasterizer_cache/morton_swizzle.h | 18 +- .../rasterizer_cache/pixel_format.h | 2 +- .../rasterizer_cache/rasterizer_cache.cpp | 1142 +-------------- .../rasterizer_cache/rasterizer_cache.h | 1231 ++++++++++++++++- .../rasterizer_cache/surface_base.h | 211 +++ .../rasterizer_cache/surface_params.cpp | 43 +- .../rasterizer_cache/surface_params.h | 14 +- .../rasterizer_cache/texture_runtime.cpp | 261 ---- src/video_core/rasterizer_cache/types.h | 2 +- src/video_core/rasterizer_cache/utils.cpp | 46 +- src/video_core/rasterizer_cache/utils.h | 18 +- .../gl_format_reinterpreter.cpp | 16 +- .../renderer_opengl/gl_format_reinterpreter.h | 6 +- .../renderer_opengl/gl_rasterizer.cpp | 104 +- .../renderer_opengl/gl_rasterizer.h | 3 +- .../renderer_opengl/gl_texture_runtime.cpp | 490 +++++++ .../gl_texture_runtime.h} | 58 +- .../renderer_opengl/texture_downloader_es.cpp | 18 +- .../texture_filters/texture_filterer.cpp | 4 +- .../texture_filters/texture_filterer.h | 3 +- 23 files changed, 2024 insertions(+), 2018 deletions(-) delete mode 100644 src/video_core/rasterizer_cache/cached_surface.cpp delete mode 100644 src/video_core/rasterizer_cache/cached_surface.h create mode 100644 src/video_core/rasterizer_cache/surface_base.h delete mode 100644 src/video_core/rasterizer_cache/texture_runtime.cpp create mode 100644 src/video_core/renderer_opengl/gl_texture_runtime.cpp rename src/video_core/{rasterizer_cache/texture_runtime.h => renderer_opengl/gl_texture_runtime.h} (52%) diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index fe3566aa7..bef0b3fc0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -26,19 +26,16 @@ add_library(video_core STATIC regs_texturing.h renderer_base.cpp renderer_base.h - rasterizer_cache/cached_surface.cpp - rasterizer_cache/cached_surface.h rasterizer_cache/morton_swizzle.h rasterizer_cache/pixel_format.h rasterizer_cache/rasterizer_cache.cpp rasterizer_cache/rasterizer_cache.h + rasterizer_cache/surface_base.h rasterizer_cache/types.h rasterizer_cache/utils.cpp rasterizer_cache/utils.h rasterizer_cache/surface_params.cpp rasterizer_cache/surface_params.h - rasterizer_cache/texture_runtime.cpp - rasterizer_cache/texture_runtime.h renderer_opengl/frame_dumper_opengl.cpp renderer_opengl/frame_dumper_opengl.h renderer_opengl/gl_driver.cpp @@ -61,6 +58,8 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_texture_runtime.cpp + renderer_opengl/gl_texture_runtime.h renderer_opengl/gl_vars.cpp renderer_opengl/gl_vars.h renderer_opengl/pica_to_gl.h diff --git a/src/video_core/rasterizer_cache/cached_surface.cpp b/src/video_core/rasterizer_cache/cached_surface.cpp deleted file mode 100644 index 201fa5a09..000000000 --- a/src/video_core/rasterizer_cache/cached_surface.cpp +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "video_core/rasterizer_cache/cached_surface.h" -#include "video_core/rasterizer_cache/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/renderer_opengl/texture_downloader_es.h" -#include "video_core/renderer_opengl/texture_filters/texture_filterer.h" - -namespace OpenGL { - -CachedSurface::~CachedSurface() { - if (texture.handle) { - const auto tag = HostTextureTag{pixel_format, GetScaledWidth(), GetScaledHeight()}; - owner.host_texture_recycler.emplace(tag, std::move(texture)); - } -} - -MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadTexture(Common::Rectangle rect, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureUL); - - // Load data from memory to the surface - GLint x0 = static_cast(rect.left); - GLint y0 = static_cast(rect.bottom); - std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); - - GLuint target_tex = texture.handle; - - // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in surface - OGLTexture unscaled_tex; - if (res_scale != 1) { - x0 = 0; - y0 = 0; - - unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight()); - target_tex = unscaled_tex.handle; - } - - OpenGLState cur_state = OpenGLState::GetCurState(); - - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = target_tex; - cur_state.Apply(); - - const FormatTuple& tuple = GetFormatTuple(pixel_format); - - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); - - glActiveTexture(GL_TEXTURE0); - glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), tuple.format, tuple.type, - reinterpret_cast(buffer_offset)); - - staging.Lock(); - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - const Common::Rectangle from_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; - if (!owner.texture_filterer->Filter(unscaled_tex, from_rect, texture, scaled_rect, type)) { - const TextureBlit texture_blit = { - .surface_type = type, - .src_level = 0, - .dst_level = 0, - .src_region = Region2D{ - .start = {0, 0}, - .end = {width, height} - }, - .dst_region = Region2D{ - .start = {rect.left, rect.bottom}, - .end = {rect.right, rect.top} - } - }; - - runtime.BlitTextures(unscaled_tex, texture, texture_blit); - } - } - - InvalidateAllWatcher(); -} - -MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureDL); - - OpenGLState state = OpenGLState::GetCurState(); - OpenGLState prev_state = state; - SCOPE_EXIT({ prev_state.Apply(); }); - - // Ensure no bad interactions with GL_PACK_ALIGNMENT - ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); - glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle); - const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - OGLTexture unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight()); - - const TextureBlit texture_blit = { - .surface_type = type, - .src_level = 0, - .dst_level = 0, - .src_region = Region2D{ - .start = {scaled_rect.left, scaled_rect.bottom}, - .end = {scaled_rect.right, scaled_rect.top} - }, - .dst_region = Region2D{ - .start = {0, 0}, - .end = {rect.GetWidth(), rect.GetHeight()} - } - }; - - // Blit scaled texture to the unscaled one - runtime.BlitTextures(texture, unscaled_tex, texture_blit); - - state.texture_units[0].texture_2d = unscaled_tex.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - - const FormatTuple& tuple = GetFormatTuple(pixel_format); - if (GLES) { - owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - rect.GetHeight(), rect.GetWidth(), - reinterpret_cast(buffer_offset)); - } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); - } - } else { - const u32 download_size = width * height * GetBytesPerPixel(pixel_format); - const BufferTextureCopy texture_download = { - .buffer_offset = buffer_offset, - .buffer_size = download_size, - .buffer_row_length = stride, - .buffer_height = height, - .surface_type = type, - .texture_level = 0, - .texture_offset = {rect.bottom, rect.left}, - .texture_extent = {rect.GetWidth(), rect.GetHeight()} - }; - - runtime.ReadTexture(texture, texture_download, pixel_format, staging.mapped); - } - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * 8 != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); - std::vector fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; - } - return true; - } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; -} - -} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/cached_surface.h b/src/video_core/rasterizer_cache/cached_surface.h deleted file mode 100644 index 66152b52a..000000000 --- a/src/video_core/rasterizer_cache/cached_surface.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once -#include "common/assert.h" -#include "core/custom_tex_cache.h" -#include "video_core/rasterizer_cache/surface_params.h" -#include "video_core/rasterizer_cache/texture_runtime.h" - -namespace OpenGL { - -using SurfaceRegions = boost::icl::interval_set; - -/** - * A watcher that notifies whether a cached surface has been changed. This is useful for caching - * surface collection objects, including texture cube and mipmap. - */ -class SurfaceWatcher { - friend class CachedSurface; - -public: - explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} - - /// Checks whether the surface has been changed. - bool IsValid() const { - return !surface.expired() && valid; - } - - /// Marks that the content of the referencing surface has been updated to the watcher user. - void Validate() { - ASSERT(!surface.expired()); - valid = true; - } - - /// Gets the referencing surface. Returns null if the surface has been destroyed - Surface Get() const { - return surface.lock(); - } - -private: - std::weak_ptr surface; - bool valid = false; -}; - -class RasterizerCache; -class StagingBuffer; - -class CachedSurface : public SurfaceParams, public std::enable_shared_from_this { -public: - CachedSurface(SurfaceParams params, RasterizerCache& owner, TextureRuntime& runtime) - : SurfaceParams(params), owner(owner), runtime(runtime) {} - ~CachedSurface(); - - /// Upload/Download data in gl_buffer in/to this surface's texture - void UploadTexture(Common::Rectangle rect, const StagingBuffer& staging); - void DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging); - - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } - - bool IsSurfaceFullyInvalid() const { - auto interval = GetInterval(); - return *invalid_regions.equal_range(interval).first == interval; - } - - std::shared_ptr CreateWatcher() { - auto watcher = std::make_shared(weak_from_this()); - watchers[watcher_count++] = watcher; - return watcher; - } - - void InvalidateAllWatcher() { - for (const auto& watcher : watchers) { - if (auto locked = watcher.lock()) { - locked->valid = false; - } - } - } - - void UnlinkAllWatcher() { - for (const auto& watcher : watchers) { - if (auto locked = watcher.lock()) { - locked->valid = false; - locked->surface.reset(); - } - } - - watchers = {}; - watcher_count = 0; - } - -public: - bool registered = false; - SurfaceRegions invalid_regions; - - // Number of bytes to read from fill_data - u32 fill_size = 0; - std::array fill_data; - OGLTexture texture; - - std::array, 7> level_watchers; - u32 max_level = 0; - -private: - RasterizerCache& owner; - TextureRuntime& runtime; - u32 watcher_count = 0; - std::array, 8> watchers; -}; - -struct CachedTextureCube { - OGLTexture texture; - u16 res_scale = 1; - std::shared_ptr px; - std::shared_ptr nx; - std::shared_ptr py; - std::shared_ptr ny; - std::shared_ptr pz; - std::shared_ptr nz; -}; - -} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index 01a87317a..7066c53a9 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -9,11 +9,10 @@ #include "common/alignment.h" #include "common/color.h" #include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/texture/etc1.h" #include "video_core/utils.h" -namespace OpenGL { +namespace VideoCore { template inline T MakeInt(const std::byte* bytes) { @@ -46,14 +45,6 @@ inline void DecodePixel(const std::byte* source, std::byte* dest) { const u8 ia4 = static_cast(source[0]); std::memset(dest, Color::Convert4To8(ia4 >> 4), 3); dest[3] = std::byte{Color::Convert4To8(ia4 & 0xF)}; - } else if (format == PixelFormat::RGBA8 && GLES) { - const u32 abgr = MakeInt(source); - const u32 rgba = std::byteswap(abgr); - std::memcpy(dest, &rgba, sizeof(u32)); - } else if (format == PixelFormat::RGB8 && GLES) { - dest[0] = source[2]; - dest[1] = source[1]; - dest[2] = source[0]; } else { std::memcpy(dest, source, bytes_per_pixel); } @@ -111,13 +102,6 @@ inline void EncodePixel(const std::byte* source, std::byte* dest) { if constexpr (format == PixelFormat::D24S8) { const u32 s8d24 = std::rotr(MakeInt(source), 8); std::memcpy(dest, &s8d24, sizeof(u32)); - } else if (format == PixelFormat::RGBA8 && GLES) { - const u32 abgr = std::byteswap(MakeInt(source)); - std::memcpy(dest, &abgr, sizeof(u32)); - } else if (format == PixelFormat::RGB8 && GLES) { - dest[0] = source[2]; - dest[1] = source[1]; - dest[2] = source[0]; } else { std::memcpy(dest, source, bytes_per_pixel); } diff --git a/src/video_core/rasterizer_cache/pixel_format.h b/src/video_core/rasterizer_cache/pixel_format.h index e9a0b7cd0..b8ee07753 100644 --- a/src/video_core/rasterizer_cache/pixel_format.h +++ b/src/video_core/rasterizer_cache/pixel_format.h @@ -8,7 +8,7 @@ #include "video_core/regs_framebuffer.h" #include "video_core/regs_texturing.h" -namespace OpenGL { +namespace VideoCore { constexpr u32 PIXEL_FORMAT_COUNT = 18; diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.cpp b/src/video_core/rasterizer_cache/rasterizer_cache.cpp index f4e0a95f6..6f2436350 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.cpp +++ b/src/video_core/rasterizer_cache/rasterizer_cache.cpp @@ -2,1149 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include -#include "common/alignment.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "video_core/pica_state.h" -#include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_cache/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_format_reinterpreter.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/renderer_opengl/texture_downloader_es.h" -#include "video_core/renderer_opengl/texture_filters/texture_filterer.h" -namespace OpenGL { - -template -static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -enum class MatchFlags { - Invalid = 1, ///< Surface is allowed to be only partially valid - Exact = 1 << 1, ///< Surface perfectly matches params - SubRect = 1 << 2, ///< Surface encompasses params - Copy = 1 << 3, ///< Surface that can be used as a copy source - Expand = 1 << 4, ///< Surface that can expand params - TexCopy = 1 << 5 ///< Surface that will match a display transfer "texture copy" parameters -}; - -DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); - -/// Get the best surface match (and its match type) for the given flags -template -static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - std::optional validate_interval = std::nullopt) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (const auto& surface : pair.second) { - const bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - True(find_flags & MatchFlags::Copy) - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if (False(find_flags & MatchFlags::Invalid) && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if (False(find_flags & check_type)) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - ASSERT(validate_interval); - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } - } - return match_surface; -} - -RasterizerCache::RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, Driver& driver) - : rasterizer(rasterizer), runtime(driver) { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); - texture_filterer = std::make_unique( - Settings::values.texture_filter_name.GetValue(), resolution_scale_factor); - format_reinterpreter = std::make_unique(); - texture_downloader_es = std::make_unique(false); -} - -RasterizerCache::~RasterizerCache() = default; - -MICROPROFILE_DEFINE(RasterizerCache_BlitSurface, "RasterizerCache", "BlitSurface", - MP_RGB(128, 192, 64)); -bool RasterizerCache::BlitSurfaces(const Surface& src_surface, - const Common::Rectangle& src_rect, - const Surface& dst_surface, - const Common::Rectangle& dst_rect) { - MICROPROFILE_SCOPE(RasterizerCache_BlitSurface); - - if (CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { - dst_surface->InvalidateAllWatcher(); - - const TextureBlit texture_blit = { - .surface_type = src_surface->type, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {dst_rect.left, dst_rect.bottom}, - .end = {dst_rect.right, dst_rect.top} - } - }; - - return runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); - } - - return false; -} +namespace VideoCore { +MICROPROFILE_DEFINE(RasterizerCache_BlitSurface, "RasterizerCache", "BlitSurface", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_CopySurface, "RasterizerCache", "CopySurface", MP_RGB(128, 192, 64)); -void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - MICROPROFILE_SCOPE(RasterizerCache_CopySurface); - - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval && src_surface != dst_surface); - - const auto dst_rect = dst_surface->GetScaledSubRect(subrect_params); - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u32 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array fill_buffer; - - u32 fill_buff_pos = fill_offset; - for (std::size_t i = 0; i < fill_buffer.size(); i++) { - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - } - - const ClearValue clear_value = - MakeClearValue(dst_surface->type, dst_surface->pixel_format, fill_buffer.data()); - - const TextureClear clear_rect = { - .surface_type = dst_surface->type, - .texture_format = dst_surface->pixel_format, - .texture_level = 0, - .rect = Rect2D{ - .offset = {dst_rect.left, dst_rect.bottom}, - .extent = {dst_rect.GetWidth(), dst_rect.GetHeight()} - } - }; - - runtime.ClearTexture(dst_surface->texture, clear_rect, clear_value); - return; - } - - if (src_surface->CanSubRect(subrect_params)) { - const auto src_rect = src_surface->GetScaledSubRect(subrect_params); - const TextureBlit texture_blit = { - .surface_type = src_surface->type, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {dst_rect.left, dst_rect.bottom}, - .end = {dst_rect.right, dst_rect.top} - } - }; - - runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); - return; - } - - UNREACHABLE(); -} - - -Surface RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find - // it to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -SurfaceRect_Tuple RasterizerCache::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, Common::Rectangle{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = - new_params.size / aligned_params.BytesInPixels(aligned_params.stride); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); -} - -Surface RasterizerCache::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - Pica::Texture::TextureInfo info = - Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - return GetTextureSurface(info, config.config.lod.max_level); -} - -Surface RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& info, - u32 max_level) { - if (info.physical_address == 0) { - return nullptr; - } - - SurfaceParams params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.is_tiled = true; - params.pixel_format = PixelFormatFromTextureFormat(info.format); - params.res_scale = texture_filterer->IsNull() ? 1 : resolution_scale_factor; - params.UpdateParams(); - - u32 min_width = info.width >> max_level; - u32 min_height = info.height >> max_level; - if (min_width % 8 != 0 || min_height % 8 != 0) { - LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, - min_height); - return nullptr; - } - if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { - LOG_CRITICAL(Render_OpenGL, - "Texture size ({}x{}) does not support required mipmap level ({})", - params.width, params.height, max_level); - return nullptr; - } - - auto surface = GetSurface(params, ScaleMatch::Ignore, true); - if (!surface) - return nullptr; - - // Update mipmap if necessary - if (max_level != 0) { - if (max_level >= 8) { - // since PICA only supports texture size between 8 and 1024, there are at most eight - // possible mipmap levels including the base. - LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); - return nullptr; - } - - // Allocate more mipmap level if necessary - if (surface->max_level < max_level) { - if (!texture_filterer->IsNull()) { - // TODO: proper mipmap support for custom textures - runtime.GenerateMipmaps(surface->texture, max_level); - } - - surface->max_level = max_level; - } - - // Blit mipmaps that have been invalidated - SurfaceParams surface_params = *surface; - for (u32 level = 1; level <= max_level; ++level) { - // In PICA all mipmap levels are stored next to each other - surface_params.addr += - surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; - surface_params.width /= 2; - surface_params.height /= 2; - surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it - surface_params.UpdateParams(); - - auto& watcher = surface->level_watchers[level - 1]; - if (!watcher || !watcher->Get()) { - auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); - if (level_surface) { - watcher = level_surface->CreateWatcher(); - } else { - watcher = nullptr; - } - } - - if (watcher && !watcher->IsValid()) { - auto level_surface = watcher->Get(); - if (!level_surface->invalid_regions.empty()) { - ValidateSurface(level_surface, level_surface->addr, level_surface->size); - } - - if (texture_filterer->IsNull()) { - const auto src_rect = level_surface->GetScaledRect(); - const auto dst_rect = surface_params.GetScaledRect(); - const TextureBlit texture_blit = { - .surface_type = surface->type, - .src_level = 0, - .dst_level = level, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {dst_rect.left, dst_rect.bottom}, - .end = {dst_rect.right, dst_rect.top} - } - }; - - runtime.BlitTextures(level_surface->texture, surface->texture, texture_blit); - } - - watcher->Validate(); - } - } - } - - return surface; -} - -const CachedTextureCube& RasterizerCache::GetTextureCube(const TextureCubeConfig& config) { - auto& cube = texture_cube_cache[config]; - - struct Face { - Face(std::shared_ptr& watcher, PAddr address) - : watcher(watcher), address(address) {} - std::shared_ptr& watcher; - PAddr address; - }; - - const std::array faces{{ - {cube.px, config.px}, - {cube.nx, config.nx}, - {cube.py, config.py}, - {cube.ny, config.ny}, - {cube.pz, config.pz}, - {cube.nz, config.nz}, - }}; - - for (const Face& face : faces) { - if (!face.watcher || !face.watcher->Get()) { - Pica::Texture::TextureInfo info; - info.physical_address = face.address; - info.height = info.width = config.width; - info.format = config.format; - info.SetDefaultStride(); - auto surface = GetTextureSurface(info); - if (surface) { - face.watcher = surface->CreateWatcher(); - } else { - // Can occur when texture address is invalid. We mark the watcher with nullptr - // in this case and the content of the face wouldn't get updated. These are - // usually leftover setup in the texture unit and games are not supposed to draw - // using them. - face.watcher = nullptr; - } - } - } - - if (cube.texture.handle == 0) { - for (const Face& face : faces) { - if (face.watcher) { - auto surface = face.watcher->Get(); - cube.res_scale = std::max(cube.res_scale, surface->res_scale); - } - } - - const auto& tuple = GetFormatTuple(PixelFormatFromTextureFormat(config.format)); - const u32 width = cube.res_scale * config.width; - const GLsizei levels = static_cast(std::log2(width)) + 1; - - // Allocate the cube texture - cube.texture.Create(); - cube.texture.Allocate(GL_TEXTURE_CUBE_MAP, levels, tuple.internal_format, width, width); - } - - u32 scaled_size = cube.res_scale * config.width; - - for (std::size_t i = 0; i < faces.size(); i++) { - const Face& face = faces[i]; - if (face.watcher && !face.watcher->IsValid()) { - auto surface = face.watcher->Get(); - if (!surface->invalid_regions.empty()) { - ValidateSurface(surface, surface->addr, surface->size); - } - - const auto src_rect = surface->GetScaledRect(); - const TextureBlit texture_blit = { - .surface_type = SurfaceType::Color, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = static_cast(i), - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {0, 0}, - .end = {scaled_size, scaled_size} - } - }; - - runtime.BlitTextures(surface->texture, cube.texture, texture_blit); - face.watcher->Validate(); - } - } - - return cube; -} - -SurfaceSurfaceRect_Tuple RasterizerCache::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const Common::Rectangle& viewport_rect) { - const auto& regs = Pica::g_state.regs; - const auto& config = regs.framebuffer.framebuffer; - - // Update resolution_scale_factor and reset cache if changed - const bool resolution_scale_changed = - resolution_scale_factor != VideoCore::GetResolutionScaleFactor(); - const bool texture_filter_changed = - VideoCore::g_texture_filter_update_requested.exchange(false) && - texture_filterer->Reset(Settings::values.texture_filter_name.GetValue(), - VideoCore::GetResolutionScaleFactor()); - - if (resolution_scale_changed || texture_filter_changed) { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - texture_cube_cache.clear(); - } - - Common::Rectangle viewport_clamped{ - static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), - static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), - static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), - static_cast( - std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; - - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.GetWidth(); - color_params.height = config.GetHeight(); - SurfaceParams depth_params = color_params; - - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = PixelFormatFromColorFormat(config.color_format); - color_params.UpdateParams(); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = PixelFormatFromDepthFormat(config.depth_format); - depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - Common::Rectangle color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - - Common::Rectangle depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - - Common::Rectangle fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { - fb_rect = color_rect; - // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || - color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); - } - } else if (color_surface != nullptr) { - fb_rect = color_rect; - } else if (depth_surface != nullptr) { - fb_rect = depth_rect; - } - - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - color_surface->InvalidateAllWatcher(); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - depth_surface->InvalidateAllWatcher(); - } - - return std::make_tuple(color_surface, depth_surface, fb_rect); -} - -Surface RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - SurfaceParams params; - params.addr = config.GetStartAddress(); - params.end = config.GetEndAddress(); - params.size = params.end - params.addr; - params.type = SurfaceType::Fill; - params.res_scale = std::numeric_limits::max(); - - Surface new_surface = std::make_shared(params, *this, runtime); - - std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); - if (config.fill_32bit) { - new_surface->fill_size = 4; - } else if (config.fill_24bit) { - new_surface->fill_size = 3; - } else { - new_surface->fill_size = 2; - } - - RegisterSurface(new_surface); - return new_surface; -} - -SurfaceRect_Tuple RasterizerCache::GetTexCopySurface(const SurfaceParams& params) { - Common::Rectangle rect{}; - - Surface match_surface = FindMatch( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; - match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return std::make_tuple(match_surface, rect); -} - -void RasterizerCache::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (const auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { - if (size == 0) [[unlikely]] { - return; - } - - const SurfaceInterval validate_interval(addr, addr + size); - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - auto validate_regions = surface->invalid_regions & validate_interval; - auto NotifyValidated = [&](SurfaceInterval interval) { - surface->invalid_regions.erase(interval); - validate_regions.erase(interval); - }; - - while (true) { - const auto it = validate_regions.begin(); - if (it == validate_regions.end()) { - break; - } - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = surface->FromInterval(interval); - - Surface copy_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - NotifyValidated(copy_interval); - continue; - } - - // Try to find surface in cache with different format - // that can can be reinterpreted to the requested format. - if (ValidateByReinterpretation(surface, params, interval)) { - NotifyValidated(interval); - continue; - } - // Could not find a matching reinterpreter, check if we need to implement a - // reinterpreter - if (NoUnimplementedReinterpretations(surface, params, interval) && - !IntervalHasInvalidPixelFormat(params, interval)) { - // No surfaces were found in the cache that had a matching bit-width. - // If the region was created entirely on the GPU, - // assume it was a developer mistake and skip flushing. - if (boost::icl::contains(dirty_regions, interval)) { - LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is " - "invalid. Skipping validation"); - validate_regions.erase(interval); - continue; - } - } - - // Load data from 3DS memory - FlushRegion(params.addr, params.size); - UploadSurface(surface, interval); - NotifyValidated(params.GetInterval()); - } -} - MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Load", MP_RGB(128, 192, 64)); -void RasterizerCache::UploadSurface(const Surface& surface, const SurfaceInterval& interval) { - const SurfaceParams info = surface->FromInterval(interval); - const u32 load_start = info.addr; - const u32 load_end = info.end; - ASSERT(load_start >= surface->addr && load_end <= surface->end); - - const StagingBuffer& staging = runtime.FindStaging( - surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true); - auto source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); - if (!source_ptr) [[unlikely]] { - return; - } - - const auto start_offset = load_start - surface->addr; - const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); - const auto upload_size = static_cast(upload_data.size()); - - MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - - if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); - if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); - } else { - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); - } - } else { - UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); - } - - surface->UploadTexture(surface->GetSubRect(info), staging); -} - MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64)); -void RasterizerCache::DownloadSurface(const Surface& surface, const SurfaceInterval& interval) { - const u32 flush_start = boost::icl::first(interval); - const u32 flush_end = boost::icl::last_next(interval); - ASSERT(flush_start >= surface->addr && flush_end <= surface->end); - const StagingBuffer& staging = runtime.FindStaging( - surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadTexture(surface->GetSubRect(params), staging); - } - - auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); - if (!dest_ptr) [[unlikely]] { - return; - } - - const auto start_offset = flush_start - surface->addr; - const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); - const auto download_size = static_cast(download_dest.size()); - - MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); - - if (surface->type == SurfaceType::Fill) { - const u32 coarse_start_offset = start_offset - (start_offset % surface->fill_size); - const u32 backup_bytes = start_offset % surface->fill_size; - std::array backup_data; - if (backup_bytes) { - std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); - } - - for (u32 offset = coarse_start_offset; offset < download_size; offset += surface->fill_size) { - std::memcpy(&dest_ptr[offset], &surface->fill_data[0], - std::min(surface->fill_size, download_size - offset)); - } - - if (backup_bytes) - std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto download_data = staging.mapped.subspan(start_offset, download_size); - if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(download_data, download_dest); - } else { - std::memcpy(download_dest.data(), download_data.data(), download_size); - } - } else { - SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); - } -} - - -bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval) { - static constexpr std::array all_formats{ - PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, - PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, - PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, - PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, - PixelFormat::D24S8, - }; - bool implemented = true; - for (PixelFormat format : all_formats) { - if (GetFormatBpp(format) == surface->GetFormatBpp()) { - params.pixel_format = format; - // This could potentially be expensive, - // although experimentally it hasn't been too bad - Surface test_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (test_surface != nullptr) { - LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", - PixelFormatAsString(format), - PixelFormatAsString(surface->pixel_format)); - implemented = false; - } - } - } - return implemented; -} - -bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, - const SurfaceInterval& interval) { - params.pixel_format = PixelFormat::Invalid; - for (const auto& set : RangeFromInterval(surface_cache, interval)) - for (const auto& surface : set.second) - if (surface->pixel_format == PixelFormat::Invalid) { - LOG_DEBUG(Render_OpenGL, "Surface {:#x} found with invalid pixel format", - surface->addr); - return true; - } - return false; -} - -bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval) { - const PixelFormat dst_format = surface->pixel_format; - const SurfaceType type = GetFormatType(dst_format); - - for (auto& reinterpreter : - format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) { - - params.pixel_format = reinterpreter->GetSourceFormat(); - Surface reinterpret_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - - if (reinterpret_surface != nullptr) { - auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); - auto reinterpret_params = surface->FromInterval(reinterpret_interval); - auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); - auto dest_rect = surface->GetScaledSubRect(reinterpret_params); - - if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && - surface->res_scale == resolution_scale_factor) { - // The destination surface is either a framebuffer, or a filtered texture. - // Create an intermediate surface to convert to before blitting to the - // destination. - const u32 width = dest_rect.GetHeight() / resolution_scale_factor; - const u32 height = dest_rect.GetWidth() / resolution_scale_factor; - const Common::Rectangle tmp_rect{0, width, height, 0}; - - OGLTexture tmp_tex = AllocateSurfaceTexture(dst_format, height, width); - reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, tmp_tex, - tmp_rect); - - if (!texture_filterer->Filter(tmp_tex, tmp_rect, surface->texture, dest_rect, type)) { - const TextureBlit texture_blit = { - .surface_type = type, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {0, 0}, - .end = {width, height} - }, - .dst_region = Region2D{ - .start = {dest_rect.left, dest_rect.bottom}, - .end = {dest_rect.right, dest_rect.top} - } - }; - - runtime.BlitTextures(tmp_tex, surface->texture, texture_blit); - } - - } else { - reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, - dest_rect); - } - - return true; - } - } - - return false; -} - -void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { - std::lock_guard lock{mutex}; - - if (size == 0) [[unlikely]] { - return; - } - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to - // access that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface != nullptr && surface != flush_surface) - continue; - - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); - - DownloadSurface(surface, interval); - flushed_intervals += interval; - } - - // Reset dirty regions - dirty_regions -= flushed_intervals; -} - -void RasterizerCache::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); -} - -void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { - std::lock_guard lock{mutex}; - - if (size == 0) - return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(region_owner->type != SurfaceType::Texture); - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); - } - - for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (const auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - cached_surface->InvalidateAllWatcher(); - - // If the surface has no salvageable data it should be removed from the cache to avoid - // clogging the data structure - if (cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } - } - - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (const auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); - } - - remove_surfaces.clear(); -} - -Surface RasterizerCache::CreateSurface(const SurfaceParams& params) { - Surface surface = std::make_shared(params, *this, runtime); - surface->invalid_regions.insert(surface->GetInterval()); - - // Allocate surface texture - surface->texture = - AllocateSurfaceTexture(params.pixel_format, surface->GetScaledWidth(), surface->GetScaledHeight()); - - return surface; -} - -void RasterizerCache::RegisterSurface(const Surface& surface) { - std::lock_guard lock{mutex}; - - if (surface->registered) { - return; - } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, 1); -} - -void RasterizerCache::UnregisterSurface(const Surface& surface) { - std::lock_guard lock{mutex}; - - if (!surface->registered) { - return; - } - surface->registered = false; - rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); -} - -// Allocate an uninitialized texture of appropriate size and format for the surface -OGLTexture RasterizerCache::AllocateSurfaceTexture(PixelFormat format, u32 width, u32 height) { - const FormatTuple& tuple = GetFormatTuple(format); - auto recycled_tex = host_texture_recycler.find({format, width, height}); - if (recycled_tex != host_texture_recycler.end()) { - OGLTexture texture = std::move(recycled_tex->second); - host_texture_recycler.erase(recycled_tex); - return texture; - } - - const GLsizei levels = static_cast(std::log2(std::max(width, height))) + 1; - - OGLTexture texture; - texture.Create(); - texture.Allocate(GL_TEXTURE_2D, levels, tuple.internal_format, width, height); - - return texture; -} - -} // namespace OpenGL +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 273f4e1a9..35154e80e 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -4,52 +4,81 @@ #pragma once #include -#include "video_core/rasterizer_cache/cached_surface.h" +#include +#include +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "video_core/pica_state.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/rasterizer_cache/surface_params.h" #include "video_core/texture/texture_decode.h" +#include "video_core/video_core.h" namespace VideoCore { -class RasterizerAccelerated; + +inline auto RangeFromInterval(auto& map, SurfaceInterval interval) { + return boost::make_iterator_range(map.equal_range(interval)); } -namespace OpenGL { - -// Declare rasterizer interval types -using SurfaceSet = std::set; -using SurfaceMap = - boost::icl::interval_map; -using SurfaceCache = - boost::icl::interval_map; - -static_assert(std::is_same() && - std::is_same(), - "Incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple>; -using SurfaceSurfaceRect_Tuple = std::tuple>; - enum class ScaleMatch { - Exact, // Only accept same res scale - Upscale, // Only allow higher scale than params - Ignore // Accept every scaled res + Exact, ///< Only accept same res scale + Upscale, ///< Only allow higher scale than params + Ignore ///< Accept every scaled res }; -class Driver; -class TextureDownloaderES; -class TextureFilterer; -class FormatReinterpreterOpenGL; +enum class MatchFlags { + Invalid = 1, ///< Surface is allowed to be only partially valid + Exact = 1 << 1, ///< Surface perfectly matches params + SubRect = 1 << 2, ///< Surface encompasses params + Copy = 1 << 3, ///< Surface that can be used as a copy source + Expand = 1 << 4, ///< Surface that can expand params + TexCopy = 1 << 5 ///< Surface that will match a display transfer "texture copy" parameters +}; +DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); + +class RasterizerAccelerated; + +template class RasterizerCache : NonCopyable { public: - RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, Driver& driver); - ~RasterizerCache(); + using TextureRuntime = typename T::Runtime; + using CachedSurface = typename T::Surface; + using Watcher = SurfaceWatcher; + + /// Declare rasterizer interval types + using Surface = std::shared_ptr; + using SurfaceSet = std::set; + using SurfaceMap = + boost::icl::interval_map; + using SurfaceCache = + boost::icl::interval_map; + + static_assert(std::is_same() && + std::is_same(), + "Incorrect interval types"); + + using SurfaceRect_Tuple = std::tuple>; + using SurfaceSurfaceRect_Tuple = std::tuple>; + +public: + RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, TextureRuntime& runtime); + ~RasterizerCache() = default; + + /// Get the best surface match (and its match type) for the given flags + template + Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval = std::nullopt); /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, - const Surface& dst_surface, const Common::Rectangle& dst_rect); + bool BlitSurfaces(const Surface& src_surface, Common::Rectangle src_rect, + const Surface& dst_surface, Common::Rectangle dst_rect); /// Copy one surface's region to another void CopySurface(const Surface& src_surface, const Surface& dst_surface, @@ -69,7 +98,7 @@ public: Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); /// Get a texture cube based on the texture configuration - const CachedTextureCube& GetTextureCube(const TextureCubeConfig& config); + const Surface& GetTextureCube(const TextureCubeConfig& config); /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, @@ -90,12 +119,6 @@ public: /// Flush all cached resources tracked by this cache manager void FlushAll(); - // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead - // in the driver - // this must be placed above the surface_cache to ensure all cached surfaces are destroyed - // before destroying the recycler - std::unordered_multimap host_texture_recycler; - private: void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); @@ -103,25 +126,24 @@ private: void ValidateSurface(const Surface& surface, PAddr addr, u32 size); /// Copies pixel data in interval from the guest VRAM to the host GPU surface - void UploadSurface(const Surface& surface, const SurfaceInterval& interval); + void UploadSurface(const Surface& surface, SurfaceInterval interval); /// Copies pixel data in interval from the host GPU surface to the guest VRAM - void DownloadSurface(const Surface& surface, const SurfaceInterval& interval); + void DownloadSurface(const Surface& surface, SurfaceInterval interval); /// Returns false if there is a surface in the cache at the interval with the same bit-width, - bool NoUnimplementedReinterpretations(const OpenGL::Surface& surface, - OpenGL::SurfaceParams& params, - const OpenGL::SurfaceInterval& interval); + bool NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams& params, + SurfaceInterval interval); /// Return true if a surface with an invalid pixel format exists at the interval - bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); + bool IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval); /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation - bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, - const SurfaceInterval& interval); + bool ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params, + SurfaceInterval interval); /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); + Surface CreateSurface(SurfaceParams& params); /// Register surface into the cache void RegisterSurface(const Surface& surface); @@ -129,24 +151,1115 @@ private: /// Remove surface from the cache void UnregisterSurface(const Surface& surface); +private: VideoCore::RasterizerAccelerated& rasterizer; - TextureRuntime runtime; + TextureRuntime& runtime; SurfaceCache surface_cache; SurfaceMap dirty_regions; SurfaceSet remove_surfaces; - u16 resolution_scale_factor; - std::unordered_map texture_cube_cache; - + std::unordered_map texture_cube_cache; std::recursive_mutex mutex; - -public: - OGLTexture AllocateSurfaceTexture(PixelFormat format, u32 width, u32 height); - - std::unique_ptr texture_filterer; - std::unique_ptr format_reinterpreter; - std::unique_ptr texture_downloader_es; }; -} // namespace OpenGL +template +RasterizerCache::RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, TextureRuntime& runtime) + : rasterizer(rasterizer), runtime{runtime} { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); +} + +template +template +auto RasterizerCache::FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval) -> Surface { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (const auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + True(find_flags & MatchFlags::Copy) + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (False(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (False(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + auto copy_interval = + surface->GetCopyableInterval(params.FromInterval(*validate_interval)); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +MICROPROFILE_DECLARE(RasterizerCache_BlitSurface); +template +bool RasterizerCache::BlitSurfaces(const Surface& src_surface, Common::Rectangle src_rect, + const Surface& dst_surface, Common::Rectangle dst_rect) { + MICROPROFILE_SCOPE(RasterizerCache_BlitSurface); + + if (CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { + dst_surface->InvalidateAllWatcher(); + + const TextureBlit texture_blit = { + .surface_type = src_surface->type, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {dst_rect.left, dst_rect.bottom}, + .end = {dst_rect.right, dst_rect.top} + } + }; + + return runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); + } + + return false; +} + +MICROPROFILE_DECLARE(RasterizerCache_CopySurface); +template +void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + MICROPROFILE_SCOPE(RasterizerCache_CopySurface); + + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval && src_surface != dst_surface); + + const auto dst_rect = dst_surface->GetScaledSubRect(subrect_params); + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (std::size_t i = 0; i < fill_buffer.size(); i++) { + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + } + + const ClearValue clear_value = + MakeClearValue(dst_surface->type, dst_surface->pixel_format, fill_buffer.data()); + + const TextureClear clear_rect = { + .surface_type = dst_surface->type, + .texture_format = dst_surface->pixel_format, + .texture_level = 0, + .rect = Rect2D{ + .offset = {dst_rect.left, dst_rect.bottom}, + .extent = {dst_rect.GetWidth(), dst_rect.GetHeight()} + } + }; + + runtime.ClearTexture(dst_surface->texture, clear_rect, clear_value); + return; + } + + if (src_surface->CanSubRect(subrect_params)) { + const auto src_rect = src_surface->GetScaledSubRect(subrect_params); + const TextureBlit texture_blit = { + .surface_type = src_surface->type, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {dst_rect.left, dst_rect.bottom}, + .end = {dst_rect.right, dst_rect.top} + } + }; + + runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); + return; + } + + UNREACHABLE(); +} + +template +auto RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) -> Surface { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find + // it to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +template +auto RasterizerCache::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) -> SurfaceRect_Tuple { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, Common::Rectangle{}); + } + + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); + } + + return std::make_tuple(surface, surface->GetScaledSubRect(params)); +} + +template +auto RasterizerCache::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) -> Surface { + const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + return GetTextureSurface(info, config.config.lod.max_level); +} + +template +auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level) -> Surface { + if (info.physical_address == 0) { + return nullptr; + } + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = PixelFormatFromTextureFormat(info.format); + params.res_scale = /*texture_filterer->IsNull() ?*/ 1 /*: resolution_scale_factor*/; + params.UpdateParams(); + + u32 min_width = info.width >> max_level; + u32 min_height = info.height >> max_level; + if (min_width % 8 != 0 || min_height % 8 != 0) { + LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, + min_height); + return nullptr; + } + if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { + LOG_CRITICAL(Render_OpenGL, + "Texture size ({}x{}) does not support required mipmap level ({})", + params.width, params.height, max_level); + return nullptr; + } + + auto surface = GetSurface(params, ScaleMatch::Ignore, true); + if (!surface) + return nullptr; + + // Update mipmap if necessary + if (max_level != 0) { + if (max_level >= 8) { + // since PICA only supports texture size between 8 and 1024, there are at most eight + // possible mipmap levels including the base. + LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); + return nullptr; + } + + // Allocate more mipmap level if necessary + if (surface->max_level < max_level) { + /*if (!texture_filterer->IsNull()) { + // TODO: proper mipmap support for custom textures + runtime.GenerateMipmaps(surface->texture, max_level); + }*/ + + surface->max_level = max_level; + } + + // Blit mipmaps that have been invalidated + SurfaceParams surface_params = *surface; + for (u32 level = 1; level <= max_level; ++level) { + // In PICA all mipmap levels are stored next to each other + surface_params.addr += + surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; + surface_params.width /= 2; + surface_params.height /= 2; + surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it + surface_params.UpdateParams(); + + auto& watcher = surface->level_watchers[level - 1]; + if (!watcher || !watcher->Get()) { + auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); + if (level_surface) { + watcher = level_surface->CreateWatcher(); + } else { + watcher = nullptr; + } + } + + if (watcher && !watcher->IsValid()) { + auto level_surface = watcher->Get(); + if (!level_surface->invalid_regions.empty()) { + ValidateSurface(level_surface, level_surface->addr, level_surface->size); + } + + if (/*texture_filterer->IsNull()*/true) { + const auto src_rect = level_surface->GetScaledRect(); + const auto dst_rect = surface_params.GetScaledRect(); + const TextureBlit texture_blit = { + .surface_type = surface->type, + .src_level = 0, + .dst_level = level, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {dst_rect.left, dst_rect.bottom}, + .end = {dst_rect.right, dst_rect.top} + } + }; + + runtime.BlitTextures(level_surface->texture, surface->texture, texture_blit); + } + + watcher->Validate(); + } + } + } + + return surface; +} + +template +auto RasterizerCache::GetTextureCube(const TextureCubeConfig& config) -> const Surface& { + auto& cube = texture_cube_cache[config]; + + struct Face { + Face(std::shared_ptr& watcher, PAddr address) + : watcher(watcher), address(address) {} + std::shared_ptr& watcher; + PAddr address; + }; + + const std::array faces{{ + {cube->level_watchers[0], config.px}, + {cube->level_watchers[1], config.nx}, + {cube->level_watchers[2], config.py}, + {cube->level_watchers[3], config.ny}, + {cube->level_watchers[4], config.pz}, + {cube->level_watchers[5], config.nz}, + }}; + + for (const Face& face : faces) { + if (!face.watcher || !face.watcher->Get()) { + Pica::Texture::TextureInfo info; + info.physical_address = face.address; + info.height = info.width = config.width; + info.format = config.format; + info.SetDefaultStride(); + auto surface = GetTextureSurface(info); + if (surface) { + face.watcher = surface->CreateWatcher(); + } else { + // Can occur when texture address is invalid. We mark the watcher with nullptr + // in this case and the content of the face wouldn't get updated. These are + // usually leftover setup in the texture unit and games are not supposed to draw + // using them. + face.watcher = nullptr; + } + } + } + + if (cube->texture.handle == 0) { + for (const Face& face : faces) { + if (face.watcher) { + auto surface = face.watcher->Get(); + cube->res_scale = std::max(cube->res_scale, surface->res_scale); + } + } + + const u32 width = cube->res_scale * config.width; + cube->texture = runtime.AllocateCubeMap(width, PixelFormatFromTextureFormat(config.format)); + } + + u32 scaled_size = cube->res_scale * config.width; + + for (std::size_t i = 0; i < faces.size(); i++) { + const Face& face = faces[i]; + if (face.watcher && !face.watcher->IsValid()) { + auto surface = face.watcher->Get(); + if (!surface->invalid_regions.empty()) { + ValidateSurface(surface, surface->addr, surface->size); + } + + const auto src_rect = surface->GetScaledRect(); + const TextureBlit texture_blit = { + .surface_type = SurfaceType::Color, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = static_cast(i), + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {0, 0}, + .end = {scaled_size, scaled_size} + } + }; + + runtime.BlitTextures(surface->texture, cube->texture, texture_blit); + face.watcher->Validate(); + } + } + + return cube; +} + +template +auto RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const Common::Rectangle& viewport_rect) + -> SurfaceSurfaceRect_Tuple { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // Update resolution_scale_factor and reset cache if changed + const bool resolution_scale_changed = + resolution_scale_factor != VideoCore::GetResolutionScaleFactor(); + const bool texture_filter_changed = + /*VideoCore::g_texture_filter_update_requested.exchange(false) && + texture_filterer->Reset(Settings::values.texture_filter_name, + VideoCore::GetResolutionScaleFactor())*/false; + + if (resolution_scale_changed || texture_filter_changed) { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + texture_cube_cache.clear(); + } + + Common::Rectangle viewport_clamped{ + static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + Common::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + Common::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + Common::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + color_surface->InvalidateAllWatcher(); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + depth_surface->InvalidateAllWatcher(); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); +} + +template +auto RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) -> Surface { + SurfaceParams params; + params.addr = config.GetStartAddress(); + params.end = config.GetEndAddress(); + params.size = params.end - params.addr; + params.type = SurfaceType::Fill; + params.res_scale = std::numeric_limits::max(); + + Surface new_surface = std::make_shared(params, runtime); + + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +template +auto RasterizerCache::GetTexCopySurface(const SurfaceParams& params) -> SurfaceRect_Tuple { + Common::Rectangle rect{}; + + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return std::make_tuple(match_surface, rect); +} + +template +void RasterizerCache::DuplicateSurface(const Surface& src_surface, const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (const auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +template +void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) [[unlikely]] { + return; + } + + const SurfaceInterval validate_interval(addr, addr + size); + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + auto validate_regions = surface->invalid_regions & validate_interval; + auto NotifyValidated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; + + while (true) { + const auto it = validate_regions.begin(); + if (it == validate_regions.end()) { + break; + } + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = copy_surface->GetCopyableInterval(params); + CopySurface(copy_surface, surface, copy_interval); + NotifyValidated(copy_interval); + continue; + } + + // Try to find surface in cache with different format + // that can can be reinterpreted to the requested format. + if (ValidateByReinterpretation(surface, params, interval)) { + NotifyValidated(interval); + continue; + } + // Could not find a matching reinterpreter, check if we need to implement a + // reinterpreter + if (NoUnimplementedReinterpretations(surface, params, interval) && + !IntervalHasInvalidPixelFormat(params, interval)) { + // No surfaces were found in the cache that had a matching bit-width. + // If the region was created entirely on the GPU, + // assume it was a developer mistake and skip flushing. + if (boost::icl::contains(dirty_regions, interval)) { + LOG_INFO(Render_OpenGL, "Region created fully on GPU and reinterpretation is " + "invalid. Skipping validation"); + validate_regions.erase(interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + UploadSurface(surface, interval); + NotifyValidated(params.GetInterval()); + } +} + +MICROPROFILE_DECLARE(RasterizerCache_SurfaceLoad); +template +void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval interval) { + const SurfaceParams info = surface->FromInterval(interval); + const u32 load_start = info.addr; + const u32 load_end = info.end; + ASSERT(load_start >= surface->addr && load_end <= surface->end); + + const auto& staging = runtime.FindStaging( + surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true); + MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); + if (!source_ptr) [[unlikely]] { + return; + } + + const u32 start_offset = load_start - surface->addr; + const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); + const u32 upload_size = static_cast(upload_data.size()); + + MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); + + if (!surface->is_tiled) { + ASSERT(surface->type == SurfaceType::Color); + + const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); + /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { + Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); + } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { + Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); + } else { + std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + }*/ + std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + } else { + UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); + } + + surface->UploadTexture(surface->GetSubRect(info), staging); +} + +MICROPROFILE_DECLARE(RasterizerCache_SurfaceFlush); +template +void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval interval) { + const u32 flush_start = boost::icl::first(interval); + const u32 flush_end = boost::icl::last_next(interval); + ASSERT(flush_start >= surface->addr && flush_end <= surface->end); + + const auto& staging = runtime.FindStaging( + surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadTexture(surface->GetSubRect(params), staging); + } + + MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); + if (!dest_ptr) [[unlikely]] { + return; + } + + const auto start_offset = flush_start - surface->addr; + const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); + const auto download_size = static_cast(download_dest.size()); + + MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); + + if (surface->type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % surface->fill_size); + const u32 backup_bytes = start_offset % surface->fill_size; + std::array backup_data; + if (backup_bytes) { + std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); + } + + for (u32 offset = coarse_start_offset; offset < download_size; offset += surface->fill_size) { + std::memcpy(&dest_ptr[offset], &surface->fill_data[0], + std::min(surface->fill_size, download_size - offset)); + } + + if (backup_bytes) + std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!surface->is_tiled) { + ASSERT(surface->type == SurfaceType::Color); + + const auto download_data = staging.mapped.subspan(start_offset, download_size); + /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { + Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); + } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { + Pica::Texture::ConvertBGRToRGB(download_data, download_dest); + } else { + std::memcpy(download_dest.data(), download_data.data(), download_size); + }*/ + std::memcpy(download_dest.data(), download_data.data(), download_size); + } else { + SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); + } +} + +template +bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams& params, + SurfaceInterval interval) { + static constexpr std::array all_formats{ + PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, + PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, + PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, + PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, + PixelFormat::D24S8, + }; + bool implemented = true; + for (PixelFormat format : all_formats) { + if (GetFormatBpp(format) == surface->GetFormatBpp()) { + params.pixel_format = format; + // This could potentially be expensive, + // although experimentally it hasn't been too bad + Surface test_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (test_surface != nullptr) { + LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", + PixelFormatAsString(format), + PixelFormatAsString(surface->pixel_format)); + implemented = false; + } + } + } + return implemented; +} + +template +bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval) { + params.pixel_format = PixelFormat::Invalid; + for (const auto& set : RangeFromInterval(surface_cache, interval)) + for (const auto& surface : set.second) + if (surface->pixel_format == PixelFormat::Invalid) { + LOG_DEBUG(Render_OpenGL, "Surface {:#x} found with invalid pixel format", + surface->addr); + return true; + } + return false; +} + +template +bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params, + SurfaceInterval interval) { + /*const PixelFormat dst_format = surface->pixel_format; + const SurfaceType type = GetFormatType(dst_format); + + for (auto& reinterpreter : + format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) { + + params.pixel_format = reinterpreter->GetSourceFormat(); + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + + if (reinterpret_surface != nullptr) { + auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); + auto reinterpret_params = surface->FromInterval(reinterpret_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); + auto dest_rect = surface->GetScaledSubRect(reinterpret_params); + + if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && + surface->res_scale == resolution_scale_factor) { + // The destination surface is either a framebuffer, or a filtered texture. + // Create an intermediate surface to convert to before blitting to the + // destination. + const u32 width = dest_rect.GetHeight() / resolution_scale_factor; + const u32 height = dest_rect.GetWidth() / resolution_scale_factor; + const Common::Rectangle tmp_rect{0, width, height, 0}; + + OGLTexture tmp_tex = AllocateSurfaceTexture(dst_format, height, width); + reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, tmp_tex, + tmp_rect); + + if (!texture_filterer->Filter(tmp_tex, tmp_rect, surface->texture, dest_rect, type)) { + const TextureBlit texture_blit = { + .surface_type = type, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {0, 0}, + .end = {width, height} + }, + .dst_region = Region2D{ + .start = {dest_rect.left, dest_rect.bottom}, + .end = {dest_rect.right, dest_rect.top} + } + }; + + runtime.BlitTextures(tmp_tex, surface->texture, texture_blit); + } + + } else { + reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, + dest_rect); + } + + return true; + } + }*/ + + return false; +} + +template +void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + std::lock_guard lock{mutex}; + + if (size == 0) [[unlikely]] { + return; + } + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + DownloadSurface(surface, interval); + flushed_intervals += interval; + } + + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +template +void RasterizerCache::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +template +void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (const auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + cached_surface->InvalidateAllWatcher(); + + // If the surface has no salvageable data it should be removed from the cache to avoid + // clogging the data structure + if (cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } + } + } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (const auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +template +auto RasterizerCache::CreateSurface(SurfaceParams& params) -> Surface { + Surface surface = std::make_shared(params, runtime); + surface->invalid_regions.insert(surface->GetInterval()); + + // Allocate surface texture + surface->texture = + runtime.Allocate2D(surface->GetScaledWidth(), surface->GetScaledHeight(), params.pixel_format); + + return surface; +} + +template +void RasterizerCache::RegisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +template +void RasterizerCache::UnregisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (!surface->registered) { + return; + } + surface->registered = false; + rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h new file mode 100644 index 000000000..afe629e98 --- /dev/null +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -0,0 +1,211 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "video_core/rasterizer_cache/surface_params.h" + +namespace VideoCore { + +using SurfaceRegions = boost::icl::interval_set; + +/** + * A watcher that notifies whether a cached surface has been changed. This is useful for caching + * surface collection objects, including texture cube and mipmap. + */ +template +class SurfaceWatcher { +public: + explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} + + /// Checks whether the surface has been changed. + bool IsValid() const { + return !surface.expired() && valid; + } + + /// Marks that the content of the referencing surface has been updated to the watcher user. + void Validate() { + ASSERT(!surface.expired()); + valid = true; + } + + /// Gets the referencing surface. Returns null if the surface has been destroyed + std::shared_ptr Get() const { + return surface.lock(); + } + +public: + std::weak_ptr surface; + bool valid = false; +}; + +template +class SurfaceBase : public SurfaceParams, public std::enable_shared_from_this { + using Watcher = SurfaceWatcher; +public: + SurfaceBase(SurfaceParams& params) : SurfaceParams{params} {} + virtual ~SurfaceBase() = default; + + /// Returns true when this surface can be used to fill the fill_interval of dest_surface + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + + /// Returns true when copy_interval of dest_surface can be validated by copying from this surface + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + /// Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const SurfaceParams& params) const; + + /// Creates a surface watcher linked to this surface + std::shared_ptr CreateWatcher(); + + /// Invalidates all watchers linked to this surface + void InvalidateAllWatcher(); + + /// Removes any linked watchers from this surface + void UnlinkAllWatcher(); + + /// Returns true when the region denoted by interval is valid + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + /// Returns true when the entire surface is invalid + bool IsSurfaceFullyInvalid() const { + auto interval = GetInterval(); + return *invalid_regions.equal_range(interval).first == interval; + } + +public: + bool registered = false; + bool is_texture_cube = false; + SurfaceRegions invalid_regions; + std::array, 7> level_watchers; + u32 max_level = 0; + std::array fill_data; + u32 fill_size = 0; + +public: + u32 watcher_count = 0; + std::array, 8> watchers; +}; + +template +bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +template +bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +template +SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) const { + SurfaceInterval result{}; + const u32 tile_align = params.BytesInPixels(params.is_tiled ? 8 * 8 : 1); + const auto valid_regions = SurfaceRegions{params.GetInterval() & GetInterval()} - invalid_regions; + + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + params.addr + Common::AlignUp(boost::icl::first(valid_interval) - params.addr, tile_align), + params.addr + Common::AlignDown(boost::icl::last_next(valid_interval) - params.addr, tile_align) + }; + + if (params.BytesInPixels(tile_align) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = params.BytesInPixels(params.stride) * (params.is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + params.addr + Common::AlignUp(boost::icl::first(aligned_interval) - params.addr, stride_bytes), + params.addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - params.addr, stride_bytes), + }; + + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +template +auto SurfaceBase::CreateWatcher() -> std::shared_ptr { + S* derived = reinterpret_cast(this); + auto watcher = std::make_shared(std::move(derived->weak_from_this())); + watchers[watcher_count++] = watcher; + return watcher; +} + +template +void SurfaceBase::InvalidateAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + } + } +} + +template +void SurfaceBase::UnlinkAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + locked->surface.reset(); + } + } + + watchers = {}; + watcher_count = 0; +} + +} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/surface_params.cpp b/src/video_core/rasterizer_cache/surface_params.cpp index 90f2776ea..165b60146 100644 --- a/src/video_core/rasterizer_cache/surface_params.cpp +++ b/src/video_core/rasterizer_cache/surface_params.cpp @@ -6,7 +6,7 @@ #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/surface_params.h" -namespace OpenGL { +namespace VideoCore { SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; @@ -64,47 +64,6 @@ SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle unscale return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; } -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } - } - return result; -} - Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); diff --git a/src/video_core/rasterizer_cache/surface_params.h b/src/video_core/rasterizer_cache/surface_params.h index 26c54e10c..8d5f75869 100644 --- a/src/video_core/rasterizer_cache/surface_params.h +++ b/src/video_core/rasterizer_cache/surface_params.h @@ -11,16 +11,13 @@ #include "common/math_util.h" #include "video_core/rasterizer_cache/pixel_format.h" -namespace OpenGL { - -class CachedSurface; -using Surface = std::shared_ptr; +namespace VideoCore { using SurfaceInterval = boost::icl::right_open_interval; class SurfaceParams { public: - // Surface match traits + /// Surface match traits bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanExpand(const SurfaceParams& expanded_surface) const; @@ -29,13 +26,10 @@ public: Common::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; Common::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; - // Returns the outer rectangle containing "interval" + /// Returns the outer rectangle containing "interval" SurfaceParams FromInterval(SurfaceInterval interval) const; SurfaceInterval GetSubRectInterval(Common::Rectangle unscaled_rect) const; - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - /// Updates remaining members from the already set addr, width, height and pixel_format void UpdateParams() { if (stride == 0) { @@ -57,7 +51,7 @@ public: } u32 GetFormatBpp() const { - return OpenGL::GetFormatBpp(pixel_format); + return VideoCore::GetFormatBpp(pixel_format); } u32 GetScaledWidth() const { diff --git a/src/video_core/rasterizer_cache/texture_runtime.cpp b/src/video_core/rasterizer_cache/texture_runtime.cpp deleted file mode 100644 index 588515a0e..000000000 --- a/src/video_core/rasterizer_cache/texture_runtime.cpp +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/scope_exit.h" -#include "video_core/rasterizer_cache/utils.h" -#include "video_core/rasterizer_cache/texture_runtime.h" -#include "video_core/renderer_opengl/gl_driver.h" -#include "video_core/renderer_opengl/gl_state.h" - -namespace OpenGL { - -GLbitfield MakeBufferMask(SurfaceType type) { - switch (type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - return GL_COLOR_BUFFER_BIT; - case SurfaceType::Depth: - return GL_DEPTH_BUFFER_BIT; - case SurfaceType::DepthStencil: - return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - return GL_COLOR_BUFFER_BIT; -} - -TextureRuntime::TextureRuntime(Driver& driver) : driver(driver) { - read_fbo.Create(); - draw_fbo.Create(); -} - -void TextureRuntime::ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy, - PixelFormat format, std::span pixels) { - - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state{}; - state.ResetTexture(texture.handle); - state.draw.read_framebuffer = read_fbo.handle; - state.Apply(); - - switch (copy.surface_type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, - copy.texture_level); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - break; - case SurfaceType::Depth: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, - copy.texture_level); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - break; - case SurfaceType::DepthStencil: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, copy.texture_level); - break; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - // TODO: Use PBO here - const FormatTuple& tuple = GetFormatTuple(format); - glReadPixels(copy.texture_offset.x, copy.texture_offset.y, - copy.texture_offset.x + copy.texture_extent.width, - copy.texture_offset.y + copy.texture_extent.height, - tuple.format, tuple.type, pixels.data() + copy.buffer_offset); -} - -bool TextureRuntime::ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - // Setup scissor rectangle according to the clear rectangle - OpenGLState state{}; - state.scissor.enabled = true; - state.scissor.x = clear.rect.offset.x; - state.scissor.y = clear.rect.offset.y; - state.scissor.width = clear.rect.extent.width; - state.scissor.height = clear.rect.extent.height; - state.draw.draw_framebuffer = draw_fbo.handle; - state.Apply(); - - switch (clear.surface_type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, - clear.texture_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - state.color_mask.red_enabled = true; - state.color_mask.green_enabled = true; - state.color_mask.blue_enabled = true; - state.color_mask.alpha_enabled = true; - state.Apply(); - - glClearBufferfv(GL_COLOR, 0, value.color.AsArray()); - break; - case SurfaceType::Depth: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, - clear.texture_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - state.depth.write_mask = GL_TRUE; - state.Apply(); - - glClearBufferfv(GL_DEPTH, 0, &value.depth); - break; - case SurfaceType::DepthStencil: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, clear.texture_level); - - state.depth.write_mask = GL_TRUE; - state.stencil.write_mask = -1; - state.Apply(); - - glClearBufferfi(GL_DEPTH_STENCIL, 0, value.depth, value.stencil); - break; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - return true; -} - -bool TextureRuntime::CopyTextures(OGLTexture& source, OGLTexture& dest, const TextureCopy& copy) { - return true; -} - -bool TextureRuntime::BlitTextures(OGLTexture& source, OGLTexture& dest, const TextureBlit& blit) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state{}; - state.draw.read_framebuffer = read_fbo.handle; - state.draw.draw_framebuffer = draw_fbo.handle; - state.Apply(); - - auto BindAttachment = [&blit, &source, &dest](GLenum attachment, u32 src_tex, u32 dst_tex) -> void { - const GLenum src_target = source.target == GL_TEXTURE_CUBE_MAP ? - GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.src_layer : source.target; - const GLenum dst_target = dest.target == GL_TEXTURE_CUBE_MAP ? - GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.dst_layer : dest.target; - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, attachment, src_target, src_tex, blit.src_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, dst_target, dst_tex, blit.dst_level); - }; - - switch (blit.surface_type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - // Bind only color - BindAttachment(GL_COLOR_ATTACHMENT0, source.handle, dest.handle); - BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, 0, 0); - break; - case SurfaceType::Depth: - // Bind only depth - BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); - BindAttachment(GL_DEPTH_ATTACHMENT, source.handle, dest.handle); - BindAttachment(GL_STENCIL_ATTACHMENT, 0, 0); - break; - case SurfaceType::DepthStencil: - // Bind to combined depth + stencil - BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); - BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, source.handle, dest.handle); - break; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - // TODO (wwylele): use GL_NEAREST for shadow map texture - // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but - // doing linear intepolation componentwise would cause incorrect value. However, for a - // well-programmed game this code path should be rarely executed for shadow map with - // inconsistent scale. - const GLbitfield buffer_mask = MakeBufferMask(blit.surface_type); - const GLenum filter = buffer_mask == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST; - glBlitFramebuffer(blit.src_region.start.x, blit.src_region.start.y, - blit.src_region.end.x, blit.src_region.end.y, - blit.dst_region.start.x, blit.dst_region.start.y, - blit.dst_region.end.x, blit.dst_region.end.y, - buffer_mask, filter); - - return true; -} - -void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state{}; - state.texture_units[0].texture_2d = texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); - - glGenerateMipmap(GL_TEXTURE_2D); -} - -const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { - const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER; - const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT; - auto& search = upload ? upload_buffers : download_buffers; - - // Attempt to find a free buffer that fits the requested data - for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) { - if (!upload || it->IsFree()) { - return *it; - } - } - - OGLBuffer buffer{}; - buffer.Create(); - - glBindBuffer(target, buffer.handle); - - // Allocate a new buffer and map the data to the host - std::byte* data = nullptr; - if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) { - const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT; - glBufferStorageEXT(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT | - GL_MAP_COHERENT_BIT_EXT); - data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | - GL_MAP_COHERENT_BIT_EXT)); - } else if (driver.HasArbBufferStorage()) { - const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT; - glBufferStorage(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT); - data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT)); - } else { - UNIMPLEMENTED(); - } - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - StagingBuffer staging = { - .buffer = std::move(buffer), - .mapped = std::span{data, size}, - .size = size - }; - - const auto& it = search.emplace(std::move(staging)); - return *it; -} - -} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/types.h b/src/video_core/rasterizer_cache/types.h index be85a8c01..60b8ca025 100644 --- a/src/video_core/rasterizer_cache/types.h +++ b/src/video_core/rasterizer_cache/types.h @@ -7,7 +7,7 @@ #include "common/vector_math.h" #include "video_core/rasterizer_cache/pixel_format.h" -namespace OpenGL { +namespace VideoCore { struct Offset { constexpr auto operator<=>(const Offset&) const noexcept = default; diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index ddfa369d2..c8e4ab1f2 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -13,51 +13,7 @@ #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/video_core.h" -namespace OpenGL { - -constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - -static constexpr std::array depth_format_tuples = {{ - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 - {}, - {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 -}}; - -static constexpr std::array fb_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 - {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 -}}; - -// Same as above, with minor changes for OpenGL ES. Replaced -// GL_UNSIGNED_INT_8_8_8_8 with GL_UNSIGNED_BYTE and -// GL_BGR with GL_RGB -static constexpr std::array fb_format_tuples_oes = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE}, // RGB8 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 -}}; - -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - const SurfaceType type = GetFormatType(pixel_format); - const std::size_t format_index = static_cast(pixel_format); - - if (type == SurfaceType::Color) { - ASSERT(format_index < fb_format_tuples.size()); - return (GLES ? fb_format_tuples_oes : fb_format_tuples)[format_index]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - const std::size_t tuple_idx = format_index - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - return depth_format_tuples[tuple_idx]; - } - - return tex_tuple; -} +namespace VideoCore { void SwizzleTexture(const SurfaceParams& params, u32 start_offset, std::span source_linear, std::span dest_tiled) { diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index e66016e14..1f749ae24 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -9,15 +9,7 @@ #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/rasterizer_cache/types.h" -namespace OpenGL { - -struct FormatTuple { - int internal_format; - u32 format; - u32 type; -}; - -const FormatTuple& GetFormatTuple(PixelFormat pixel_format); +namespace VideoCore { struct HostTextureTag { PixelFormat format{}; @@ -70,15 +62,15 @@ void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, namespace std { template <> -struct hash { - std::size_t operator()(const OpenGL::HostTextureTag& tag) const noexcept { +struct hash { + std::size_t operator()(const VideoCore::HostTextureTag& tag) const noexcept { return tag.Hash(); } }; template <> -struct hash { - std::size_t operator()(const OpenGL::TextureCubeConfig& config) const noexcept { +struct hash { + std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept { return config.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index 437675c46..d5202169f 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -60,8 +60,8 @@ void main() { vao.Create(); } - PixelFormat GetSourceFormat() const override { - return PixelFormat::RGBA4; + VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::RGBA4; } void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, @@ -170,8 +170,8 @@ void main() { } } - PixelFormat GetSourceFormat() const override { - return PixelFormat::D24S8; + VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::D24S8; } void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, @@ -246,18 +246,18 @@ FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { const std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; const std::string_view version{reinterpret_cast(glGetString(GL_VERSION))}; - auto Register = [this](PixelFormat dest, std::unique_ptr&& obj) { + auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { const u32 dst_index = static_cast(dest); return reinterpreters[dst_index].push_back(std::move(obj)); }; - Register(PixelFormat::RGBA8, std::make_unique()); + Register(VideoCore::PixelFormat::RGBA8, std::make_unique()); LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation"); - Register(PixelFormat::RGB5A1, std::make_unique()); + Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); } -auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(PixelFormat dst_format) +auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(VideoCore::PixelFormat dst_format) -> const ReinterpreterList& { return reinterpreters[static_cast(dst_format)]; } diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.h b/src/video_core/renderer_opengl/gl_format_reinterpreter.h index 9e1cd06db..0e9cc61e7 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.h +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.h @@ -22,7 +22,7 @@ public: virtual ~FormatReinterpreterBase() = default; - virtual PixelFormat GetSourceFormat() const = 0; + virtual VideoCore::PixelFormat GetSourceFormat() const = 0; virtual void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, const OGLTexture& dst_tex, Common::Rectangle dst_rect) = 0; @@ -38,10 +38,10 @@ public: FormatReinterpreterOpenGL(); ~FormatReinterpreterOpenGL() = default; - const ReinterpreterList& GetPossibleReinterpretations(PixelFormat dst_format); + const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dst_format); private: - std::array reinterpreters; + std::array reinterpreters; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 42597138e..324b1dd32 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -39,7 +39,8 @@ static bool IsVendorIntel() { #endif RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver) - : driver(driver), is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), + : driver{driver}, runtime{driver}, res_cache{*this, runtime}, + is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), @@ -526,8 +527,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { regs.rasterizer.viewport_corner.y // bottom }; - Surface color_surface; - Surface depth_surface; + RasterizerCache::Surface color_surface, depth_surface; Common::Rectangle surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); @@ -638,7 +638,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { const auto BindCubeFace = [&](GLuint& target, Pica::TexturingRegs::CubeFace face, Pica::Texture::TextureInfo& info) { info.physical_address = regs.texturing.GetCubePhysicalAddress(face); - Surface surface = res_cache.GetTextureSurface(info); + auto surface = res_cache.GetTextureSurface(info); if (surface != nullptr) { CheckBarrier(target = surface->texture.handle); @@ -657,7 +657,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; switch (texture.config.type.Value()) { case TextureType::Shadow2D: { - Surface surface = res_cache.GetTextureSurface(texture); + auto surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); } else { @@ -677,23 +677,26 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { BindCubeFace(state.image_shadow_texture_nz, CubeFace::NegativeZ, info); continue; } - case TextureType::TextureCube: + case TextureType::TextureCube: { using CubeFace = Pica::TexturingRegs::CubeFace; - TextureCubeConfig config; - config.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); - config.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); - config.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); - config.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); - config.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); - config.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); - config.width = texture.config.width; - config.format = texture.format; + const VideoCore::TextureCubeConfig config = { + .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), + .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX), + .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY), + .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY), + .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ), + .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ), + .width = texture.config.width, + .format = texture.format + }; + state.texture_cube_unit.texture_cube = - res_cache.GetTextureCube(config).texture.handle; + res_cache.GetTextureCube(config)->texture.handle; texture_cube_sampler.SyncWithConfig(texture.config); state.texture_units[texture_index].texture_2d = 0; continue; // Texture unit 0 setup finished. Continue to next unit + } default: break; } @@ -702,7 +705,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } texture_samplers[texture_index].SyncWithConfig(texture.config); - Surface surface = res_cache.GetTextureSurface(texture); + auto surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { CheckBarrier(state.texture_units[texture_index].texture_2d = surface->texture.handle); @@ -721,19 +724,15 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } } + // The game is trying to use a surface as a texture and framebuffer at the same time + // which causes unpredictable behavior on the host. + // Making a copy to sample from eliminates this issue and seems to be fairly cheap. OGLTexture temp_tex; if (need_duplicate_texture) { - const auto& tuple = GetFormatTuple(color_surface->pixel_format); - const GLsizei levels = color_surface->max_level + 1; + temp_tex = runtime.Allocate2D(color_surface->GetScaledWidth(), color_surface->GetScaledHeight(), + color_surface->pixel_format); - // The game is trying to use a surface as a texture and framebuffer at the same time - // which causes unpredictable behavior on the host. - // Making a copy to sample from eliminates this issue and seems to be fairly cheap. - temp_tex.Create(); - temp_tex.Allocate(GL_TEXTURE_2D, levels, tuple.internal_format, - color_surface->GetScaledWidth(), color_surface->GetScaledHeight()); - - temp_tex.CopyFrom(color_surface->texture, GL_TEXTURE_2D, levels, + temp_tex.CopyFrom(color_surface->texture, GL_TEXTURE_2D, color_surface->max_level + 1, color_surface->GetScaledWidth(), color_surface->GetScaledHeight()); for (auto& unit : state.texture_units) { @@ -1364,40 +1363,37 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - SurfaceParams src_params; + VideoCore::SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); src_params.width = config.output_width; src_params.stride = config.input_width; src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; - src_params.pixel_format = PixelFormatFromGPUPixelFormat(config.input_format); + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.input_format); src_params.UpdateParams(); - SurfaceParams dst_params; + VideoCore::SurfaceParams dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 : config.output_width.Value(); dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.output_format); dst_params.UpdateParams(); - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); if (src_surface == nullptr) return false; dst_params.res_scale = src_surface->res_scale; - Common::Rectangle dst_rect; - Surface dst_surface; - std::tie(dst_surface, dst_rect) = - res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); - if (dst_surface == nullptr) + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, false); + if (dst_surface == nullptr) { return false; + } if (src_surface->is_tiled != dst_surface->is_tiled) std::swap(src_rect.top, src_rect.bottom); @@ -1444,7 +1440,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon return false; } - SurfaceParams src_params; + VideoCore::SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); src_params.stride = input_width + input_gap; // stride in bytes src_params.width = input_width; // width in bytes @@ -1452,9 +1448,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; src_params.end = src_params.addr + src_params.size; - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + auto [src_surface, src_rect] = res_cache.GetTexCopySurface(src_params); if (src_surface == nullptr) { return false; } @@ -1466,7 +1460,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon return false; } - SurfaceParams dst_params = *src_surface; + VideoCore::SurfaceParams dst_params = *src_surface; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = src_rect.GetWidth() / src_surface->res_scale; dst_params.stride = dst_params.width + src_surface->PixelsInBytes( @@ -1477,15 +1471,13 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon // Since we are going to invalidate the gap if there is one, we will have to load it first const bool load_gap = output_gap != 0; - Common::Rectangle dst_rect; - Surface dst_surface; - std::tie(dst_surface, dst_rect) = - res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, load_gap); if (dst_surface == nullptr) { return false; } - if (dst_surface->type == SurfaceType::Texture) { + if (dst_surface->type == VideoCore::SurfaceType::Texture) { return false; } @@ -1498,7 +1490,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon } bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - Surface dst_surface = res_cache.GetFillSurface(config); + auto dst_surface = res_cache.GetFillSurface(config); if (dst_surface == nullptr) return false; @@ -1514,19 +1506,17 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - SurfaceParams src_params; + VideoCore::SurfaceParams src_params; src_params.addr = framebuffer_addr; src_params.width = std::min(config.width.Value(), pixel_stride); src_params.height = config.height; src_params.stride = pixel_stride; src_params.is_tiled = false; - src_params.pixel_format = PixelFormatFromGPUPixelFormat(config.color_format); + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); src_params.UpdateParams(); - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + const auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); if (src_surface == nullptr) { return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1cb2489fc..eaf1751a3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -6,10 +6,10 @@ #include "common/vector_math.h" #include "core/hw/gpu.h" #include "video_core/pica_types.h" -#include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/regs_lighting.h" #include "video_core/regs_texturing.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -252,6 +252,7 @@ private: OpenGLState state; GLuint default_texture; + TextureRuntime runtime; RasterizerCache res_cache; std::vector vertex_batch; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp new file mode 100644 index 000000000..2c731982f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -0,0 +1,490 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/scope_exit.h" +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" +#include "video_core/renderer_opengl/gl_driver.h" +#include "video_core/renderer_opengl/gl_state.h" + +namespace OpenGL { + +constexpr FormatTuple DEFAULT_TUPLE = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; + +static constexpr std::array DEPTH_TUPLES = { + FormatTuple{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 + FormatTuple{}, + FormatTuple{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + FormatTuple{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 +}; + +static constexpr std::array COLOR_TUPLES = { + FormatTuple{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 + FormatTuple{GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 + FormatTuple{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 + FormatTuple{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 +}; + +static constexpr std::array COLOR_TUPLES_OES = { + FormatTuple{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + FormatTuple{GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE}, // RGB8 + FormatTuple{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 + FormatTuple{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 +}; + +GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return GL_COLOR_BUFFER_BIT; + case VideoCore::SurfaceType::Depth: + return GL_DEPTH_BUFFER_BIT; + case VideoCore::SurfaceType::DepthStencil: + return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return GL_COLOR_BUFFER_BIT; +} + +TextureRuntime::TextureRuntime(Driver& driver) : driver(driver) { + read_fbo.Create(); + draw_fbo.Create(); +} + +const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { + const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER; + const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT; + auto& search = upload ? upload_buffers : download_buffers; + + // Attempt to find a free buffer that fits the requested data + for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) { + if (!upload || it->IsFree()) { + return *it; + } + } + + OGLBuffer buffer{}; + buffer.Create(); + + glBindBuffer(target, buffer.handle); + + // Allocate a new buffer and map the data to the host + std::byte* data = nullptr; + if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) { + const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT; + glBufferStorageEXT(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT | + GL_MAP_COHERENT_BIT_EXT); + data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | + GL_MAP_COHERENT_BIT_EXT)); + } else if (driver.HasArbBufferStorage()) { + const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT; + glBufferStorage(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT); + data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT)); + } else { + UNIMPLEMENTED(); + } + + glBindBuffer(target, 0); + + StagingBuffer staging = { + .buffer = std::move(buffer), + .mapped = std::span{data, size}, + .size = size + }; + + const auto& it = search.emplace(std::move(staging)); + return *it; +} + +const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) { + const auto type = GetFormatType(pixel_format); + const std::size_t format_index = static_cast(pixel_format); + + if (type == VideoCore::SurfaceType::Color) { + ASSERT(format_index < COLOR_TUPLES.size()); + return (driver.IsOpenGLES() ? COLOR_TUPLES_OES : COLOR_TUPLES)[format_index]; + } else if (type == VideoCore::SurfaceType::Depth || + type == VideoCore::SurfaceType::DepthStencil) { + const std::size_t tuple_idx = format_index - 14; + ASSERT(tuple_idx < DEPTH_TUPLES.size()); + return DEPTH_TUPLES[tuple_idx]; + } + + return DEFAULT_TUPLE; +} + +OGLTexture TextureRuntime::Allocate2D(u32 width, u32 height, VideoCore::PixelFormat format) { + const auto& tuple = GetFormatTuple(format); + auto recycled_tex = texture2d_recycler.find({format, width, height}); + if (recycled_tex != texture2d_recycler.end()) { + OGLTexture texture = std::move(recycled_tex->second); + texture2d_recycler.erase(recycled_tex); + return texture; + } + + // Allocate the 2D texture + OGLTexture texture{}; + texture.Create(); + texture.Allocate(GL_TEXTURE_2D, std::bit_width(std::max(width, height)), + tuple.internal_format, width, height); + + return texture; +} + +OGLTexture TextureRuntime::AllocateCubeMap(u32 width, VideoCore::PixelFormat format) { + const auto& tuple = GetFormatTuple(format); + + // Allocate the cube texture + OGLTexture texture{}; + texture.Create(); + texture.Allocate(GL_TEXTURE_CUBE_MAP, std::bit_width(width), + tuple.internal_format, width, width); + + return texture; +} + +void TextureRuntime::ReadTexture(OGLTexture& texture, const VideoCore::BufferTextureCopy& copy, + VideoCore::PixelFormat format) { + + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state{}; + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = read_fbo.handle; + state.Apply(); + + switch (copy.surface_type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, + copy.texture_level); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + break; + case VideoCore::SurfaceType::Depth: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, + copy.texture_level); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + break; + case VideoCore::SurfaceType::DepthStencil: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, copy.texture_level); + break; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + const FormatTuple& tuple = GetFormatTuple(format); + glReadPixels(copy.texture_offset.x, copy.texture_offset.y, + copy.texture_offset.x + copy.texture_extent.width, + copy.texture_offset.y + copy.texture_extent.height, + tuple.format, tuple.type, + reinterpret_cast(copy.buffer_offset)); +} + +bool TextureRuntime::ClearTexture(OGLTexture& texture, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + // Setup scissor rectangle according to the clear rectangle + OpenGLState state{}; + state.scissor.enabled = true; + state.scissor.x = clear.rect.offset.x; + state.scissor.y = clear.rect.offset.y; + state.scissor.width = clear.rect.extent.width; + state.scissor.height = clear.rect.extent.height; + state.draw.draw_framebuffer = draw_fbo.handle; + state.Apply(); + + switch (clear.surface_type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, + clear.texture_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + state.color_mask.red_enabled = true; + state.color_mask.green_enabled = true; + state.color_mask.blue_enabled = true; + state.color_mask.alpha_enabled = true; + state.Apply(); + + glClearBufferfv(GL_COLOR, 0, value.color.AsArray()); + break; + case VideoCore::SurfaceType::Depth: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, + clear.texture_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + state.depth.write_mask = GL_TRUE; + state.Apply(); + + glClearBufferfv(GL_DEPTH, 0, &value.depth); + break; + case VideoCore::SurfaceType::DepthStencil: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, clear.texture_level); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + + glClearBufferfi(GL_DEPTH_STENCIL, 0, value.depth, value.stencil); + break; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return true; +} + +bool TextureRuntime::CopyTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureCopy& copy) { + return true; +} + +bool TextureRuntime::BlitTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureBlit& blit) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state{}; + state.draw.read_framebuffer = read_fbo.handle; + state.draw.draw_framebuffer = draw_fbo.handle; + state.Apply(); + + auto BindAttachment = [&blit, &source, &dest](GLenum attachment, u32 src_tex, u32 dst_tex) -> void { + const GLenum src_target = source.target == GL_TEXTURE_CUBE_MAP ? + GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.src_layer : source.target; + const GLenum dst_target = dest.target == GL_TEXTURE_CUBE_MAP ? + GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.dst_layer : dest.target; + + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, attachment, src_target, src_tex, blit.src_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, dst_target, dst_tex, blit.dst_level); + }; + + switch (blit.surface_type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + // Bind only color + BindAttachment(GL_COLOR_ATTACHMENT0, source.handle, dest.handle); + BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, 0, 0); + break; + case VideoCore::SurfaceType::Depth: + // Bind only depth + BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); + BindAttachment(GL_DEPTH_ATTACHMENT, source.handle, dest.handle); + BindAttachment(GL_STENCIL_ATTACHMENT, 0, 0); + break; + case VideoCore::SurfaceType::DepthStencil: + // Bind to combined depth + stencil + BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); + BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, source.handle, dest.handle); + break; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + // TODO (wwylele): use GL_NEAREST for shadow map texture + // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but + // doing linear intepolation componentwise would cause incorrect value. However, for a + // well-programmed game this code path should be rarely executed for shadow map with + // inconsistent scale. + const GLbitfield buffer_mask = MakeBufferMask(blit.surface_type); + const GLenum filter = buffer_mask == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST; + glBlitFramebuffer(blit.src_region.start.x, blit.src_region.start.y, + blit.src_region.end.x, blit.src_region.end.y, + blit.dst_region.start.x, blit.dst_region.start.y, + blit.dst_region.end.x, blit.dst_region.end.y, + buffer_mask, filter); + + return true; +} + +void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state{}; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); + + glGenerateMipmap(GL_TEXTURE_2D); +} + +MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); +void CachedSurface::UploadTexture(Common::Rectangle rect, const StagingBuffer& staging) { + MICROPROFILE_SCOPE(RasterizerCache_TextureUL); + + const FormatTuple& tuple = runtime.GetFormatTuple(pixel_format); + + // Load data from memory to the surface + GLint x0 = static_cast(rect.left); + GLint y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); + + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + unscaled_tex = runtime.Allocate2D(rect.GetWidth(), rect.GetHeight(), pixel_format); + target_tex = unscaled_tex.handle; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = target_tex; + cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + reinterpret_cast(buffer_offset)); + + staging.Lock(); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + const Common::Rectangle from_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + /*if (!owner.texture_filterer->Filter(unscaled_tex, from_rect, texture, scaled_rect, type)) { + const TextureBlit texture_blit = { + .surface_type = type, + .src_level = 0, + .dst_level = 0, + .src_region = Region2D{ + .start = {0, 0}, + .end = {width, height} + }, + .dst_region = Region2D{ + .start = {rect.left, rect.bottom}, + .end = {rect.right, rect.top} + } + }; + + runtime.BlitTextures(unscaled_tex, texture, texture_blit); + }*/ + } + + InvalidateAllWatcher(); +} + +MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging) { + MICROPROFILE_SCOPE(RasterizerCache_TextureDL); + + const FormatTuple& tuple = runtime.GetFormatTuple(pixel_format); + + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle); + const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + OGLTexture unscaled_tex = runtime.Allocate2D(rect.GetWidth(), rect.GetHeight(), pixel_format); + + const VideoCore::TextureBlit texture_blit = { + .surface_type = type, + .src_level = 0, + .dst_level = 0, + .src_region = VideoCore::Region2D{ + .start = {scaled_rect.left, scaled_rect.bottom}, + .end = {scaled_rect.right, scaled_rect.top} + }, + .dst_region = VideoCore::Region2D{ + .start = {0, 0}, + .end = {rect.GetWidth(), rect.GetHeight()} + } + }; + + // Blit scaled texture to the unscaled one + runtime.BlitTextures(texture, unscaled_tex, texture_blit); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + + /*if (GLES) { + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect.GetHeight(), rect.GetWidth(), + reinterpret_cast(buffer_offset)); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); + }*/ + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); + } else { + const u32 download_size = width * height * GetBytesPerPixel(pixel_format); + const VideoCore::BufferTextureCopy texture_download = { + .buffer_offset = buffer_offset, + .buffer_size = download_size, + .buffer_row_length = stride, + .buffer_height = height, + .surface_type = type, + .texture_level = 0, + .texture_offset = {rect.bottom, rect.left}, + .texture_extent = {rect.GetWidth(), rect.GetHeight()} + }; + + runtime.ReadTexture(texture, texture_download, pixel_format); + } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h similarity index 52% rename from src/video_core/rasterizer_cache/texture_runtime.h rename to src/video_core/renderer_opengl/gl_texture_runtime.h index d7758cef1..7eaf5efa4 100644 --- a/src/video_core/rasterizer_cache/texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -5,12 +5,18 @@ #pragma once #include #include +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/rasterizer_cache/types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -struct FormatTuple; +struct FormatTuple { + GLint internal_format; + GLenum format; + GLenum type; +}; struct StagingBuffer { OGLBuffer buffer{}; @@ -50,6 +56,7 @@ class Driver; * Separating this into a class makes it easier to abstract graphics API code */ class TextureRuntime { + friend class CachedSurface; public: TextureRuntime(Driver& driver); ~TextureRuntime() = default; @@ -57,18 +64,28 @@ public: /// Maps an internal staging buffer of the provided size of pixel uploads/downloads const StagingBuffer& FindStaging(u32 size, bool upload); + /// Returns the OpenGL format tuple associated with the provided pixel format + const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); + + /// Allocates a 2D OpenGL texture with the specified dimentions and format + OGLTexture Allocate2D(u32 width, u32 height, VideoCore::PixelFormat format); + + /// Allocates an OpenGL cube map texture with the specified dimentions and format + OGLTexture AllocateCubeMap(u32 width, VideoCore::PixelFormat format); + /// Copies the GPU pixel data to the provided pixels buffer - void ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy, - PixelFormat format, std::span pixels); + void ReadTexture(OGLTexture& texture, const VideoCore::BufferTextureCopy& copy, + VideoCore::PixelFormat format); /// Fills the rectangle of the texture with the clear value provided - bool ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value); + bool ClearTexture(OGLTexture& texture, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value); /// Copies a rectangle of src_tex to another rectange of dst_rect - bool CopyTextures(OGLTexture& source, OGLTexture& dest, const TextureCopy& copy); + bool CopyTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureCopy& copy); /// Blits a rectangle of src_tex to another rectange of dst_rect - bool BlitTextures(OGLTexture& source, OGLTexture& dest, const TextureBlit& blit); + bool BlitTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureBlit& blit); /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(OGLTexture& texture, u32 max_level); @@ -76,8 +93,37 @@ public: private: Driver& driver; OGLFramebuffer read_fbo, draw_fbo; + std::unordered_multimap texture2d_recycler; + + // Staging buffers stored in increasing size std::multiset upload_buffers; std::multiset download_buffers; }; +class CachedSurface : public VideoCore::SurfaceBase { +public: + CachedSurface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) + : VideoCore::SurfaceBase{params}, runtime{runtime} {} + ~CachedSurface() override = default; + + /// Uploads pixel data in staging to a rectangle region of the surface texture + void UploadTexture(Common::Rectangle rect, const StagingBuffer& staging); + + /// Downloads pixel data to staging from a rectangle region of the surface texture + void DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging); + +private: + TextureRuntime& runtime; + +public: + OGLTexture texture{}; +}; + +struct Traits { + using Runtime = TextureRuntime; + using Surface = CachedSurface; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/texture_downloader_es.cpp b/src/video_core/renderer_opengl/texture_downloader_es.cpp index 4d2339801..fe4c47bad 100644 --- a/src/video_core/renderer_opengl/texture_downloader_es.cpp +++ b/src/video_core/renderer_opengl/texture_downloader_es.cpp @@ -8,6 +8,7 @@ #include "common/logging/log.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" #include "video_core/renderer_opengl/texture_downloader_es.h" #include "shaders/depth_to_color.frag" @@ -16,6 +17,17 @@ namespace OpenGL { +static constexpr std::array DEPTH_TUPLES_HACK = { + FormatTuple{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 + FormatTuple{}, + FormatTuple{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + FormatTuple{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 +}; + +const FormatTuple& GetFormatTuple(VideoCore::PixelFormat format) { + return DEPTH_TUPLES_HACK[static_cast(format)]; +} + /** * Self tests for the texture downloader */ @@ -75,13 +87,13 @@ void TextureDownloaderES::Test() { } }; LOG_INFO(Render_OpenGL, "GL_DEPTH24_STENCIL8 download test starting"); - test(GetFormatTuple(PixelFormat::D24S8), std::vector{}, 4096, + test(GetFormatTuple(VideoCore::PixelFormat::D24S8), std::vector{}, 4096, [](std::size_t idx) { return static_cast((idx << 8) | (idx & 0xFF)); }); LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT24 download test starting"); - test(GetFormatTuple(PixelFormat::D24), std::vector{}, 4096, + test(GetFormatTuple(VideoCore::PixelFormat::D24), std::vector{}, 4096, [](std::size_t idx) { return static_cast(idx << 8); }); LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT16 download test starting"); - test(GetFormatTuple(PixelFormat::D16), std::vector{}, 256, + test(GetFormatTuple(VideoCore::PixelFormat::D16), std::vector{}, 256, [](std::size_t idx) { return static_cast(idx); }); cur_state.Apply(); diff --git a/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp b/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp index 264a3a767..d5b0859ca 100644 --- a/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp +++ b/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp @@ -62,10 +62,10 @@ bool TextureFilterer::IsNull() const { bool TextureFilterer::Filter(const OGLTexture& src_tex, Common::Rectangle src_rect, const OGLTexture& dst_tex, Common::Rectangle dst_rect, - SurfaceType type) { + VideoCore::SurfaceType type) { // Depth/Stencil texture filtering is not supported for now - if (IsNull() || (type != SurfaceType::Color && type != SurfaceType::Texture)) { + if (IsNull() || (type != VideoCore::SurfaceType::Color && type != VideoCore::SurfaceType::Texture)) { return false; } diff --git a/src/video_core/renderer_opengl/texture_filters/texture_filterer.h b/src/video_core/renderer_opengl/texture_filters/texture_filterer.h index ca3fec4f7..b9f5e0634 100644 --- a/src/video_core/renderer_opengl/texture_filters/texture_filterer.h +++ b/src/video_core/renderer_opengl/texture_filters/texture_filterer.h @@ -27,7 +27,8 @@ public: // Returns true if the texture was able to be filtered bool Filter(const OGLTexture& src_tex, Common::Rectangle src_rect, - const OGLTexture& dst_tex, Common::Rectangle dst_rect, SurfaceType type); + const OGLTexture& dst_tex, Common::Rectangle dst_rect, + VideoCore::SurfaceType type); static std::vector GetFilterNames();