diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index fe3566aa7..bef0b3fc0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -26,19 +26,16 @@ add_library(video_core STATIC regs_texturing.h renderer_base.cpp renderer_base.h - rasterizer_cache/cached_surface.cpp - rasterizer_cache/cached_surface.h rasterizer_cache/morton_swizzle.h rasterizer_cache/pixel_format.h rasterizer_cache/rasterizer_cache.cpp rasterizer_cache/rasterizer_cache.h + rasterizer_cache/surface_base.h rasterizer_cache/types.h rasterizer_cache/utils.cpp rasterizer_cache/utils.h rasterizer_cache/surface_params.cpp rasterizer_cache/surface_params.h - rasterizer_cache/texture_runtime.cpp - rasterizer_cache/texture_runtime.h renderer_opengl/frame_dumper_opengl.cpp renderer_opengl/frame_dumper_opengl.h renderer_opengl/gl_driver.cpp @@ -61,6 +58,8 @@ add_library(video_core STATIC renderer_opengl/gl_state.h renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_stream_buffer.h + renderer_opengl/gl_texture_runtime.cpp + renderer_opengl/gl_texture_runtime.h renderer_opengl/gl_vars.cpp renderer_opengl/gl_vars.h renderer_opengl/pica_to_gl.h diff --git a/src/video_core/rasterizer_cache/cached_surface.cpp b/src/video_core/rasterizer_cache/cached_surface.cpp deleted file mode 100644 index 201fa5a09..000000000 --- a/src/video_core/rasterizer_cache/cached_surface.cpp +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "video_core/rasterizer_cache/cached_surface.h" -#include "video_core/rasterizer_cache/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/renderer_opengl/texture_downloader_es.h" -#include "video_core/renderer_opengl/texture_filters/texture_filterer.h" - -namespace OpenGL { - -CachedSurface::~CachedSurface() { - if (texture.handle) { - const auto tag = HostTextureTag{pixel_format, GetScaledWidth(), GetScaledHeight()}; - owner.host_texture_recycler.emplace(tag, std::move(texture)); - } -} - -MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadTexture(Common::Rectangle rect, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureUL); - - // Load data from memory to the surface - GLint x0 = static_cast(rect.left); - GLint y0 = static_cast(rect.bottom); - std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); - - GLuint target_tex = texture.handle; - - // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in surface - OGLTexture unscaled_tex; - if (res_scale != 1) { - x0 = 0; - y0 = 0; - - unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight()); - target_tex = unscaled_tex.handle; - } - - OpenGLState cur_state = OpenGLState::GetCurState(); - - GLuint old_tex = cur_state.texture_units[0].texture_2d; - cur_state.texture_units[0].texture_2d = target_tex; - cur_state.Apply(); - - const FormatTuple& tuple = GetFormatTuple(pixel_format); - - // Ensure no bad interactions with GL_UNPACK_ALIGNMENT - ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); - - glActiveTexture(GL_TEXTURE0); - glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), - static_cast(rect.GetHeight()), tuple.format, tuple.type, - reinterpret_cast(buffer_offset)); - - staging.Lock(); - - cur_state.texture_units[0].texture_2d = old_tex; - cur_state.Apply(); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); - - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - const Common::Rectangle from_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; - if (!owner.texture_filterer->Filter(unscaled_tex, from_rect, texture, scaled_rect, type)) { - const TextureBlit texture_blit = { - .surface_type = type, - .src_level = 0, - .dst_level = 0, - .src_region = Region2D{ - .start = {0, 0}, - .end = {width, height} - }, - .dst_region = Region2D{ - .start = {rect.left, rect.bottom}, - .end = {rect.right, rect.top} - } - }; - - runtime.BlitTextures(unscaled_tex, texture, texture_blit); - } - } - - InvalidateAllWatcher(); -} - -MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); -void CachedSurface::DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging) { - MICROPROFILE_SCOPE(RasterizerCache_TextureDL); - - OpenGLState state = OpenGLState::GetCurState(); - OpenGLState prev_state = state; - SCOPE_EXIT({ prev_state.Apply(); }); - - // Ensure no bad interactions with GL_PACK_ALIGNMENT - ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); - glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle); - const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); - - // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush - if (res_scale != 1) { - auto scaled_rect = rect; - scaled_rect.left *= res_scale; - scaled_rect.top *= res_scale; - scaled_rect.right *= res_scale; - scaled_rect.bottom *= res_scale; - - OGLTexture unscaled_tex = owner.AllocateSurfaceTexture(pixel_format, rect.GetWidth(), rect.GetHeight()); - - const TextureBlit texture_blit = { - .surface_type = type, - .src_level = 0, - .dst_level = 0, - .src_region = Region2D{ - .start = {scaled_rect.left, scaled_rect.bottom}, - .end = {scaled_rect.right, scaled_rect.top} - }, - .dst_region = Region2D{ - .start = {0, 0}, - .end = {rect.GetWidth(), rect.GetHeight()} - } - }; - - // Blit scaled texture to the unscaled one - runtime.BlitTextures(texture, unscaled_tex, texture_blit); - - state.texture_units[0].texture_2d = unscaled_tex.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - - const FormatTuple& tuple = GetFormatTuple(pixel_format); - if (GLES) { - owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, - rect.GetHeight(), rect.GetWidth(), - reinterpret_cast(buffer_offset)); - } else { - glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); - } - } else { - const u32 download_size = width * height * GetBytesPerPixel(pixel_format); - const BufferTextureCopy texture_download = { - .buffer_offset = buffer_offset, - .buffer_size = download_size, - .buffer_row_length = stride, - .buffer_height = height, - .surface_type = type, - .texture_level = 0, - .texture_offset = {rect.bottom, rect.left}, - .texture_extent = {rect.GetWidth(), rect.GetHeight()} - }; - - runtime.ReadTexture(texture, texture_download, pixel_format, staging.mapped); - } - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); -} - -bool CachedSurface::CanFill(const SurfaceParams& dest_surface, - SurfaceInterval fill_interval) const { - if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && - boost::icl::first(fill_interval) >= addr && - boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range - dest_surface.FromInterval(fill_interval).GetInterval() == - fill_interval) { // make sure interval is a rectangle in dest surface - if (fill_size * 8 != dest_surface.GetFormatBpp()) { - // Check if bits repeat for our fill_size - const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); - std::vector fill_test(fill_size * dest_bytes_per_pixel); - - for (u32 i = 0; i < dest_bytes_per_pixel; ++i) - std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); - - for (u32 i = 0; i < fill_size; ++i) - if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], - dest_bytes_per_pixel) != 0) - return false; - - if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) - return false; - } - return true; - } - return false; -} - -bool CachedSurface::CanCopy(const SurfaceParams& dest_surface, - SurfaceInterval copy_interval) const { - SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval); - if (CanSubRect(subrect_params)) - return true; - - if (CanFill(dest_surface, copy_interval)) - return true; - - return false; -} - -} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/cached_surface.h b/src/video_core/rasterizer_cache/cached_surface.h deleted file mode 100644 index 66152b52a..000000000 --- a/src/video_core/rasterizer_cache/cached_surface.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once -#include "common/assert.h" -#include "core/custom_tex_cache.h" -#include "video_core/rasterizer_cache/surface_params.h" -#include "video_core/rasterizer_cache/texture_runtime.h" - -namespace OpenGL { - -using SurfaceRegions = boost::icl::interval_set; - -/** - * A watcher that notifies whether a cached surface has been changed. This is useful for caching - * surface collection objects, including texture cube and mipmap. - */ -class SurfaceWatcher { - friend class CachedSurface; - -public: - explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} - - /// Checks whether the surface has been changed. - bool IsValid() const { - return !surface.expired() && valid; - } - - /// Marks that the content of the referencing surface has been updated to the watcher user. - void Validate() { - ASSERT(!surface.expired()); - valid = true; - } - - /// Gets the referencing surface. Returns null if the surface has been destroyed - Surface Get() const { - return surface.lock(); - } - -private: - std::weak_ptr surface; - bool valid = false; -}; - -class RasterizerCache; -class StagingBuffer; - -class CachedSurface : public SurfaceParams, public std::enable_shared_from_this { -public: - CachedSurface(SurfaceParams params, RasterizerCache& owner, TextureRuntime& runtime) - : SurfaceParams(params), owner(owner), runtime(runtime) {} - ~CachedSurface(); - - /// Upload/Download data in gl_buffer in/to this surface's texture - void UploadTexture(Common::Rectangle rect, const StagingBuffer& staging); - void DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging); - - bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; - bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; - - bool IsRegionValid(SurfaceInterval interval) const { - return (invalid_regions.find(interval) == invalid_regions.end()); - } - - bool IsSurfaceFullyInvalid() const { - auto interval = GetInterval(); - return *invalid_regions.equal_range(interval).first == interval; - } - - std::shared_ptr CreateWatcher() { - auto watcher = std::make_shared(weak_from_this()); - watchers[watcher_count++] = watcher; - return watcher; - } - - void InvalidateAllWatcher() { - for (const auto& watcher : watchers) { - if (auto locked = watcher.lock()) { - locked->valid = false; - } - } - } - - void UnlinkAllWatcher() { - for (const auto& watcher : watchers) { - if (auto locked = watcher.lock()) { - locked->valid = false; - locked->surface.reset(); - } - } - - watchers = {}; - watcher_count = 0; - } - -public: - bool registered = false; - SurfaceRegions invalid_regions; - - // Number of bytes to read from fill_data - u32 fill_size = 0; - std::array fill_data; - OGLTexture texture; - - std::array, 7> level_watchers; - u32 max_level = 0; - -private: - RasterizerCache& owner; - TextureRuntime& runtime; - u32 watcher_count = 0; - std::array, 8> watchers; -}; - -struct CachedTextureCube { - OGLTexture texture; - u16 res_scale = 1; - std::shared_ptr px; - std::shared_ptr nx; - std::shared_ptr py; - std::shared_ptr ny; - std::shared_ptr pz; - std::shared_ptr nz; -}; - -} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index 01a87317a..7066c53a9 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -9,11 +9,10 @@ #include "common/alignment.h" #include "common/color.h" #include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/renderer_opengl/gl_vars.h" #include "video_core/texture/etc1.h" #include "video_core/utils.h" -namespace OpenGL { +namespace VideoCore { template inline T MakeInt(const std::byte* bytes) { @@ -46,14 +45,6 @@ inline void DecodePixel(const std::byte* source, std::byte* dest) { const u8 ia4 = static_cast(source[0]); std::memset(dest, Color::Convert4To8(ia4 >> 4), 3); dest[3] = std::byte{Color::Convert4To8(ia4 & 0xF)}; - } else if (format == PixelFormat::RGBA8 && GLES) { - const u32 abgr = MakeInt(source); - const u32 rgba = std::byteswap(abgr); - std::memcpy(dest, &rgba, sizeof(u32)); - } else if (format == PixelFormat::RGB8 && GLES) { - dest[0] = source[2]; - dest[1] = source[1]; - dest[2] = source[0]; } else { std::memcpy(dest, source, bytes_per_pixel); } @@ -111,13 +102,6 @@ inline void EncodePixel(const std::byte* source, std::byte* dest) { if constexpr (format == PixelFormat::D24S8) { const u32 s8d24 = std::rotr(MakeInt(source), 8); std::memcpy(dest, &s8d24, sizeof(u32)); - } else if (format == PixelFormat::RGBA8 && GLES) { - const u32 abgr = std::byteswap(MakeInt(source)); - std::memcpy(dest, &abgr, sizeof(u32)); - } else if (format == PixelFormat::RGB8 && GLES) { - dest[0] = source[2]; - dest[1] = source[1]; - dest[2] = source[0]; } else { std::memcpy(dest, source, bytes_per_pixel); } diff --git a/src/video_core/rasterizer_cache/pixel_format.h b/src/video_core/rasterizer_cache/pixel_format.h index e9a0b7cd0..b8ee07753 100644 --- a/src/video_core/rasterizer_cache/pixel_format.h +++ b/src/video_core/rasterizer_cache/pixel_format.h @@ -8,7 +8,7 @@ #include "video_core/regs_framebuffer.h" #include "video_core/regs_texturing.h" -namespace OpenGL { +namespace VideoCore { constexpr u32 PIXEL_FORMAT_COUNT = 18; diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.cpp b/src/video_core/rasterizer_cache/rasterizer_cache.cpp index f4e0a95f6..6f2436350 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.cpp +++ b/src/video_core/rasterizer_cache/rasterizer_cache.cpp @@ -2,1149 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include -#include -#include "common/alignment.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "video_core/pica_state.h" -#include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_cache/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_format_reinterpreter.h" -#include "video_core/renderer_opengl/gl_vars.h" -#include "video_core/renderer_opengl/texture_downloader_es.h" -#include "video_core/renderer_opengl/texture_filters/texture_filterer.h" -namespace OpenGL { - -template -static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -enum class MatchFlags { - Invalid = 1, ///< Surface is allowed to be only partially valid - Exact = 1 << 1, ///< Surface perfectly matches params - SubRect = 1 << 2, ///< Surface encompasses params - Copy = 1 << 3, ///< Surface that can be used as a copy source - Expand = 1 << 4, ///< Surface that can expand params - TexCopy = 1 << 5 ///< Surface that will match a display transfer "texture copy" parameters -}; - -DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); - -/// Get the best surface match (and its match type) for the given flags -template -static Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, - ScaleMatch match_scale_type, - std::optional validate_interval = std::nullopt) { - Surface match_surface = nullptr; - bool match_valid = false; - u32 match_scale = 0; - SurfaceInterval match_interval{}; - - for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { - for (const auto& surface : pair.second) { - const bool res_scale_matched = match_scale_type == ScaleMatch::Exact - ? (params.res_scale == surface->res_scale) - : (params.res_scale <= surface->res_scale); - // validity will be checked in GetCopyableInterval - bool is_valid = - True(find_flags & MatchFlags::Copy) - ? true - : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); - - if (False(find_flags & MatchFlags::Invalid) && !is_valid) - continue; - - auto IsMatch_Helper = [&](auto check_type, auto match_fn) { - if (False(find_flags & check_type)) - return; - - bool matched; - SurfaceInterval surface_interval; - std::tie(matched, surface_interval) = match_fn(); - if (!matched) - return; - - if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && - surface->type != SurfaceType::Fill) - return; - - // Found a match, update only if this is better than the previous one - auto UpdateMatch = [&] { - match_surface = surface; - match_valid = is_valid; - match_scale = surface->res_scale; - match_interval = surface_interval; - }; - - if (surface->res_scale > match_scale) { - UpdateMatch(); - return; - } else if (surface->res_scale < match_scale) { - return; - } - - if (is_valid && !match_valid) { - UpdateMatch(); - return; - } else if (is_valid != match_valid) { - return; - } - - if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { - UpdateMatch(); - } - }; - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - ASSERT(validate_interval); - auto copy_interval = - params.FromInterval(*validate_interval).GetCopyableInterval(surface); - bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && - surface->CanCopy(params, copy_interval); - return std::make_pair(matched, copy_interval); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanExpand(params), surface->GetInterval()); - }); - IsMatch_Helper(std::integral_constant{}, [&] { - return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); - }); - } - } - return match_surface; -} - -RasterizerCache::RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, Driver& driver) - : rasterizer(rasterizer), runtime(driver) { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); - texture_filterer = std::make_unique( - Settings::values.texture_filter_name.GetValue(), resolution_scale_factor); - format_reinterpreter = std::make_unique(); - texture_downloader_es = std::make_unique(false); -} - -RasterizerCache::~RasterizerCache() = default; - -MICROPROFILE_DEFINE(RasterizerCache_BlitSurface, "RasterizerCache", "BlitSurface", - MP_RGB(128, 192, 64)); -bool RasterizerCache::BlitSurfaces(const Surface& src_surface, - const Common::Rectangle& src_rect, - const Surface& dst_surface, - const Common::Rectangle& dst_rect) { - MICROPROFILE_SCOPE(RasterizerCache_BlitSurface); - - if (CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { - dst_surface->InvalidateAllWatcher(); - - const TextureBlit texture_blit = { - .surface_type = src_surface->type, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {dst_rect.left, dst_rect.bottom}, - .end = {dst_rect.right, dst_rect.top} - } - }; - - return runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); - } - - return false; -} +namespace VideoCore { +MICROPROFILE_DEFINE(RasterizerCache_BlitSurface, "RasterizerCache", "BlitSurface", MP_RGB(128, 192, 64)); MICROPROFILE_DEFINE(RasterizerCache_CopySurface, "RasterizerCache", "CopySurface", MP_RGB(128, 192, 64)); -void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst_surface, - SurfaceInterval copy_interval) { - MICROPROFILE_SCOPE(RasterizerCache_CopySurface); - - SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); - ASSERT(subrect_params.GetInterval() == copy_interval && src_surface != dst_surface); - - const auto dst_rect = dst_surface->GetScaledSubRect(subrect_params); - if (src_surface->type == SurfaceType::Fill) { - // FillSurface needs a 4 bytes buffer - const u32 fill_offset = - (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; - std::array fill_buffer; - - u32 fill_buff_pos = fill_offset; - for (std::size_t i = 0; i < fill_buffer.size(); i++) { - fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; - } - - const ClearValue clear_value = - MakeClearValue(dst_surface->type, dst_surface->pixel_format, fill_buffer.data()); - - const TextureClear clear_rect = { - .surface_type = dst_surface->type, - .texture_format = dst_surface->pixel_format, - .texture_level = 0, - .rect = Rect2D{ - .offset = {dst_rect.left, dst_rect.bottom}, - .extent = {dst_rect.GetWidth(), dst_rect.GetHeight()} - } - }; - - runtime.ClearTexture(dst_surface->texture, clear_rect, clear_value); - return; - } - - if (src_surface->CanSubRect(subrect_params)) { - const auto src_rect = src_surface->GetScaledSubRect(subrect_params); - const TextureBlit texture_blit = { - .surface_type = src_surface->type, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {dst_rect.left, dst_rect.bottom}, - .end = {dst_rect.right, dst_rect.top} - } - }; - - runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); - return; - } - - UNREACHABLE(); -} - - -Surface RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return nullptr; - } - // Use GetSurfaceSubRect instead - ASSERT(params.width == params.stride); - - ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); - - // Check for an exact match in existing surfaces - Surface surface = - FindMatch(surface_cache, params, match_res_scale); - - if (surface == nullptr) { - u16 target_res_scale = params.res_scale; - if (match_res_scale != ScaleMatch::Exact) { - // This surface may have a subrect of another surface with a higher res_scale, find - // it to adjust our params - SurfaceParams find_params = params; - Surface expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - // Keep res_scale when reinterpreting d24s8 -> rgba8 - if (params.pixel_format == PixelFormat::RGBA8) { - find_params.pixel_format = PixelFormat::D24S8; - expandable = FindMatch( - surface_cache, find_params, match_res_scale); - if (expandable != nullptr && expandable->res_scale > target_res_scale) { - target_res_scale = expandable->res_scale; - } - } - } - SurfaceParams new_params = params; - new_params.res_scale = target_res_scale; - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - - if (load_if_create) { - ValidateSurface(surface, params.addr, params.size); - } - - return surface; -} - -SurfaceRect_Tuple RasterizerCache::GetSurfaceSubRect(const SurfaceParams& params, - ScaleMatch match_res_scale, - bool load_if_create) { - if (params.addr == 0 || params.height * params.width == 0) { - return std::make_tuple(nullptr, Common::Rectangle{}); - } - - // Attempt to find encompassing surface - Surface surface = FindMatch(surface_cache, params, - match_res_scale); - - // Check if FindMatch failed because of res scaling - // If that's the case create a new surface with - // the dimensions of the lower res_scale surface - // to suggest it should not be used again - if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { - surface = FindMatch(surface_cache, params, - ScaleMatch::Ignore); - if (surface != nullptr) { - SurfaceParams new_params = *surface; - new_params.res_scale = params.res_scale; - - surface = CreateSurface(new_params); - RegisterSurface(surface); - } - } - - SurfaceParams aligned_params = params; - if (params.is_tiled) { - aligned_params.height = Common::AlignUp(params.height, 8); - aligned_params.width = Common::AlignUp(params.width, 8); - aligned_params.stride = Common::AlignUp(params.stride, 8); - aligned_params.UpdateParams(); - } - - // Check for a surface we can expand before creating a new one - if (surface == nullptr) { - surface = FindMatch(surface_cache, aligned_params, - match_res_scale); - if (surface != nullptr) { - aligned_params.width = aligned_params.stride; - aligned_params.UpdateParams(); - - SurfaceParams new_params = *surface; - new_params.addr = std::min(aligned_params.addr, surface->addr); - new_params.end = std::max(aligned_params.end, surface->end); - new_params.size = new_params.end - new_params.addr; - new_params.height = - new_params.size / aligned_params.BytesInPixels(aligned_params.stride); - ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); - - Surface new_surface = CreateSurface(new_params); - DuplicateSurface(surface, new_surface); - - // Delete the expanded surface, this can't be done safely yet - // because it may still be in use - surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted - remove_surfaces.emplace(surface); - - surface = new_surface; - RegisterSurface(new_surface); - } - } - - // No subrect found - create and return a new surface - if (surface == nullptr) { - SurfaceParams new_params = aligned_params; - // Can't have gaps in a surface - new_params.width = aligned_params.stride; - new_params.UpdateParams(); - // GetSurface will create the new surface and possibly adjust res_scale if necessary - surface = GetSurface(new_params, match_res_scale, load_if_create); - } else if (load_if_create) { - ValidateSurface(surface, aligned_params.addr, aligned_params.size); - } - - return std::make_tuple(surface, surface->GetScaledSubRect(params)); -} - -Surface RasterizerCache::GetTextureSurface( - const Pica::TexturingRegs::FullTextureConfig& config) { - Pica::Texture::TextureInfo info = - Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); - return GetTextureSurface(info, config.config.lod.max_level); -} - -Surface RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& info, - u32 max_level) { - if (info.physical_address == 0) { - return nullptr; - } - - SurfaceParams params; - params.addr = info.physical_address; - params.width = info.width; - params.height = info.height; - params.is_tiled = true; - params.pixel_format = PixelFormatFromTextureFormat(info.format); - params.res_scale = texture_filterer->IsNull() ? 1 : resolution_scale_factor; - params.UpdateParams(); - - u32 min_width = info.width >> max_level; - u32 min_height = info.height >> max_level; - if (min_width % 8 != 0 || min_height % 8 != 0) { - LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, - min_height); - return nullptr; - } - if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { - LOG_CRITICAL(Render_OpenGL, - "Texture size ({}x{}) does not support required mipmap level ({})", - params.width, params.height, max_level); - return nullptr; - } - - auto surface = GetSurface(params, ScaleMatch::Ignore, true); - if (!surface) - return nullptr; - - // Update mipmap if necessary - if (max_level != 0) { - if (max_level >= 8) { - // since PICA only supports texture size between 8 and 1024, there are at most eight - // possible mipmap levels including the base. - LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); - return nullptr; - } - - // Allocate more mipmap level if necessary - if (surface->max_level < max_level) { - if (!texture_filterer->IsNull()) { - // TODO: proper mipmap support for custom textures - runtime.GenerateMipmaps(surface->texture, max_level); - } - - surface->max_level = max_level; - } - - // Blit mipmaps that have been invalidated - SurfaceParams surface_params = *surface; - for (u32 level = 1; level <= max_level; ++level) { - // In PICA all mipmap levels are stored next to each other - surface_params.addr += - surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; - surface_params.width /= 2; - surface_params.height /= 2; - surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it - surface_params.UpdateParams(); - - auto& watcher = surface->level_watchers[level - 1]; - if (!watcher || !watcher->Get()) { - auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); - if (level_surface) { - watcher = level_surface->CreateWatcher(); - } else { - watcher = nullptr; - } - } - - if (watcher && !watcher->IsValid()) { - auto level_surface = watcher->Get(); - if (!level_surface->invalid_regions.empty()) { - ValidateSurface(level_surface, level_surface->addr, level_surface->size); - } - - if (texture_filterer->IsNull()) { - const auto src_rect = level_surface->GetScaledRect(); - const auto dst_rect = surface_params.GetScaledRect(); - const TextureBlit texture_blit = { - .surface_type = surface->type, - .src_level = 0, - .dst_level = level, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {dst_rect.left, dst_rect.bottom}, - .end = {dst_rect.right, dst_rect.top} - } - }; - - runtime.BlitTextures(level_surface->texture, surface->texture, texture_blit); - } - - watcher->Validate(); - } - } - } - - return surface; -} - -const CachedTextureCube& RasterizerCache::GetTextureCube(const TextureCubeConfig& config) { - auto& cube = texture_cube_cache[config]; - - struct Face { - Face(std::shared_ptr& watcher, PAddr address) - : watcher(watcher), address(address) {} - std::shared_ptr& watcher; - PAddr address; - }; - - const std::array faces{{ - {cube.px, config.px}, - {cube.nx, config.nx}, - {cube.py, config.py}, - {cube.ny, config.ny}, - {cube.pz, config.pz}, - {cube.nz, config.nz}, - }}; - - for (const Face& face : faces) { - if (!face.watcher || !face.watcher->Get()) { - Pica::Texture::TextureInfo info; - info.physical_address = face.address; - info.height = info.width = config.width; - info.format = config.format; - info.SetDefaultStride(); - auto surface = GetTextureSurface(info); - if (surface) { - face.watcher = surface->CreateWatcher(); - } else { - // Can occur when texture address is invalid. We mark the watcher with nullptr - // in this case and the content of the face wouldn't get updated. These are - // usually leftover setup in the texture unit and games are not supposed to draw - // using them. - face.watcher = nullptr; - } - } - } - - if (cube.texture.handle == 0) { - for (const Face& face : faces) { - if (face.watcher) { - auto surface = face.watcher->Get(); - cube.res_scale = std::max(cube.res_scale, surface->res_scale); - } - } - - const auto& tuple = GetFormatTuple(PixelFormatFromTextureFormat(config.format)); - const u32 width = cube.res_scale * config.width; - const GLsizei levels = static_cast(std::log2(width)) + 1; - - // Allocate the cube texture - cube.texture.Create(); - cube.texture.Allocate(GL_TEXTURE_CUBE_MAP, levels, tuple.internal_format, width, width); - } - - u32 scaled_size = cube.res_scale * config.width; - - for (std::size_t i = 0; i < faces.size(); i++) { - const Face& face = faces[i]; - if (face.watcher && !face.watcher->IsValid()) { - auto surface = face.watcher->Get(); - if (!surface->invalid_regions.empty()) { - ValidateSurface(surface, surface->addr, surface->size); - } - - const auto src_rect = surface->GetScaledRect(); - const TextureBlit texture_blit = { - .surface_type = SurfaceType::Color, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = static_cast(i), - .src_region = Region2D{ - .start = {src_rect.left, src_rect.bottom}, - .end = {src_rect.right, src_rect.top} - }, - .dst_region = Region2D{ - .start = {0, 0}, - .end = {scaled_size, scaled_size} - } - }; - - runtime.BlitTextures(surface->texture, cube.texture, texture_blit); - face.watcher->Validate(); - } - } - - return cube; -} - -SurfaceSurfaceRect_Tuple RasterizerCache::GetFramebufferSurfaces( - bool using_color_fb, bool using_depth_fb, const Common::Rectangle& viewport_rect) { - const auto& regs = Pica::g_state.regs; - const auto& config = regs.framebuffer.framebuffer; - - // Update resolution_scale_factor and reset cache if changed - const bool resolution_scale_changed = - resolution_scale_factor != VideoCore::GetResolutionScaleFactor(); - const bool texture_filter_changed = - VideoCore::g_texture_filter_update_requested.exchange(false) && - texture_filterer->Reset(Settings::values.texture_filter_name.GetValue(), - VideoCore::GetResolutionScaleFactor()); - - if (resolution_scale_changed || texture_filter_changed) { - resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); - FlushAll(); - while (!surface_cache.empty()) - UnregisterSurface(*surface_cache.begin()->second.begin()); - texture_cube_cache.clear(); - } - - Common::Rectangle viewport_clamped{ - static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), - static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), - static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), - static_cast( - std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; - - // get color and depth surfaces - SurfaceParams color_params; - color_params.is_tiled = true; - color_params.res_scale = resolution_scale_factor; - color_params.width = config.GetWidth(); - color_params.height = config.GetHeight(); - SurfaceParams depth_params = color_params; - - color_params.addr = config.GetColorBufferPhysicalAddress(); - color_params.pixel_format = PixelFormatFromColorFormat(config.color_format); - color_params.UpdateParams(); - - depth_params.addr = config.GetDepthBufferPhysicalAddress(); - depth_params.pixel_format = PixelFormatFromDepthFormat(config.depth_format); - depth_params.UpdateParams(); - - auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); - auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); - - // Make sure that framebuffers don't overlap if both color and depth are being used - if (using_color_fb && using_depth_fb && - boost::icl::length(color_vp_interval & depth_vp_interval)) { - LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " - "overlapping framebuffers not supported!"); - using_depth_fb = false; - } - - Common::Rectangle color_rect{}; - Surface color_surface = nullptr; - if (using_color_fb) - std::tie(color_surface, color_rect) = - GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); - - Common::Rectangle depth_rect{}; - Surface depth_surface = nullptr; - if (using_depth_fb) - std::tie(depth_surface, depth_rect) = - GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); - - Common::Rectangle fb_rect{}; - if (color_surface != nullptr && depth_surface != nullptr) { - fb_rect = color_rect; - // Color and Depth surfaces must have the same dimensions and offsets - if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || - color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { - color_surface = GetSurface(color_params, ScaleMatch::Exact, false); - depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); - fb_rect = color_surface->GetScaledRect(); - } - } else if (color_surface != nullptr) { - fb_rect = color_rect; - } else if (depth_surface != nullptr) { - fb_rect = depth_rect; - } - - if (color_surface != nullptr) { - ValidateSurface(color_surface, boost::icl::first(color_vp_interval), - boost::icl::length(color_vp_interval)); - color_surface->InvalidateAllWatcher(); - } - if (depth_surface != nullptr) { - ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), - boost::icl::length(depth_vp_interval)); - depth_surface->InvalidateAllWatcher(); - } - - return std::make_tuple(color_surface, depth_surface, fb_rect); -} - -Surface RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) { - SurfaceParams params; - params.addr = config.GetStartAddress(); - params.end = config.GetEndAddress(); - params.size = params.end - params.addr; - params.type = SurfaceType::Fill; - params.res_scale = std::numeric_limits::max(); - - Surface new_surface = std::make_shared(params, *this, runtime); - - std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); - if (config.fill_32bit) { - new_surface->fill_size = 4; - } else if (config.fill_24bit) { - new_surface->fill_size = 3; - } else { - new_surface->fill_size = 2; - } - - RegisterSurface(new_surface); - return new_surface; -} - -SurfaceRect_Tuple RasterizerCache::GetTexCopySurface(const SurfaceParams& params) { - Common::Rectangle rect{}; - - Surface match_surface = FindMatch( - surface_cache, params, ScaleMatch::Ignore); - - if (match_surface != nullptr) { - ValidateSurface(match_surface, params.addr, params.size); - - SurfaceParams match_subrect; - if (params.width != params.stride) { - const u32 tiled_size = match_surface->is_tiled ? 8 : 1; - match_subrect = params; - match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; - match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; - match_subrect.height *= tiled_size; - } else { - match_subrect = match_surface->FromInterval(params.GetInterval()); - ASSERT(match_subrect.GetInterval() == params.GetInterval()); - } - - rect = match_surface->GetScaledSubRect(match_subrect); - } - - return std::make_tuple(match_surface, rect); -} - -void RasterizerCache::DuplicateSurface(const Surface& src_surface, - const Surface& dest_surface) { - ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); - - BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, - dest_surface->GetScaledSubRect(*src_surface)); - - dest_surface->invalid_regions -= src_surface->GetInterval(); - dest_surface->invalid_regions += src_surface->invalid_regions; - - SurfaceRegions regions; - for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { - if (pair.second == src_surface) { - regions += pair.first; - } - } - for (const auto& interval : regions) { - dirty_regions.set({interval, dest_surface}); - } -} - -void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { - if (size == 0) [[unlikely]] { - return; - } - - const SurfaceInterval validate_interval(addr, addr + size); - if (surface->type == SurfaceType::Fill) { - // Sanity check, fill surfaces will always be valid when used - ASSERT(surface->IsRegionValid(validate_interval)); - return; - } - - auto validate_regions = surface->invalid_regions & validate_interval; - auto NotifyValidated = [&](SurfaceInterval interval) { - surface->invalid_regions.erase(interval); - validate_regions.erase(interval); - }; - - while (true) { - const auto it = validate_regions.begin(); - if (it == validate_regions.end()) { - break; - } - - const auto interval = *it & validate_interval; - // Look for a valid surface to copy from - SurfaceParams params = surface->FromInterval(interval); - - Surface copy_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (copy_surface != nullptr) { - SurfaceInterval copy_interval = params.GetCopyableInterval(copy_surface); - CopySurface(copy_surface, surface, copy_interval); - NotifyValidated(copy_interval); - continue; - } - - // Try to find surface in cache with different format - // that can can be reinterpreted to the requested format. - if (ValidateByReinterpretation(surface, params, interval)) { - NotifyValidated(interval); - continue; - } - // Could not find a matching reinterpreter, check if we need to implement a - // reinterpreter - if (NoUnimplementedReinterpretations(surface, params, interval) && - !IntervalHasInvalidPixelFormat(params, interval)) { - // No surfaces were found in the cache that had a matching bit-width. - // If the region was created entirely on the GPU, - // assume it was a developer mistake and skip flushing. - if (boost::icl::contains(dirty_regions, interval)) { - LOG_DEBUG(Render_OpenGL, "Region created fully on GPU and reinterpretation is " - "invalid. Skipping validation"); - validate_regions.erase(interval); - continue; - } - } - - // Load data from 3DS memory - FlushRegion(params.addr, params.size); - UploadSurface(surface, interval); - NotifyValidated(params.GetInterval()); - } -} - MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Load", MP_RGB(128, 192, 64)); -void RasterizerCache::UploadSurface(const Surface& surface, const SurfaceInterval& interval) { - const SurfaceParams info = surface->FromInterval(interval); - const u32 load_start = info.addr; - const u32 load_end = info.end; - ASSERT(load_start >= surface->addr && load_end <= surface->end); - - const StagingBuffer& staging = runtime.FindStaging( - surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true); - auto source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); - if (!source_ptr) [[unlikely]] { - return; - } - - const auto start_offset = load_start - surface->addr; - const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); - const auto upload_size = static_cast(upload_data.size()); - - MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); - - if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); - if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); - } else { - std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); - } - } else { - UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); - } - - surface->UploadTexture(surface->GetSubRect(info), staging); -} - MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64)); -void RasterizerCache::DownloadSurface(const Surface& surface, const SurfaceInterval& interval) { - const u32 flush_start = boost::icl::first(interval); - const u32 flush_end = boost::icl::last_next(interval); - ASSERT(flush_start >= surface->addr && flush_end <= surface->end); - const StagingBuffer& staging = runtime.FindStaging( - surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); - if (surface->type != SurfaceType::Fill) { - SurfaceParams params = surface->FromInterval(interval); - surface->DownloadTexture(surface->GetSubRect(params), staging); - } - - auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); - if (!dest_ptr) [[unlikely]] { - return; - } - - const auto start_offset = flush_start - surface->addr; - const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); - const auto download_size = static_cast(download_dest.size()); - - MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); - - if (surface->type == SurfaceType::Fill) { - const u32 coarse_start_offset = start_offset - (start_offset % surface->fill_size); - const u32 backup_bytes = start_offset % surface->fill_size; - std::array backup_data; - if (backup_bytes) { - std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); - } - - for (u32 offset = coarse_start_offset; offset < download_size; offset += surface->fill_size) { - std::memcpy(&dest_ptr[offset], &surface->fill_data[0], - std::min(surface->fill_size, download_size - offset)); - } - - if (backup_bytes) - std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); - } else if (!surface->is_tiled) { - ASSERT(surface->type == SurfaceType::Color); - - const auto download_data = staging.mapped.subspan(start_offset, download_size); - if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { - Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); - } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { - Pica::Texture::ConvertBGRToRGB(download_data, download_dest); - } else { - std::memcpy(download_dest.data(), download_data.data(), download_size); - } - } else { - SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); - } -} - - -bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval) { - static constexpr std::array all_formats{ - PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, - PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, - PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, - PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, - PixelFormat::D24S8, - }; - bool implemented = true; - for (PixelFormat format : all_formats) { - if (GetFormatBpp(format) == surface->GetFormatBpp()) { - params.pixel_format = format; - // This could potentially be expensive, - // although experimentally it hasn't been too bad - Surface test_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - if (test_surface != nullptr) { - LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", - PixelFormatAsString(format), - PixelFormatAsString(surface->pixel_format)); - implemented = false; - } - } - } - return implemented; -} - -bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, - const SurfaceInterval& interval) { - params.pixel_format = PixelFormat::Invalid; - for (const auto& set : RangeFromInterval(surface_cache, interval)) - for (const auto& surface : set.second) - if (surface->pixel_format == PixelFormat::Invalid) { - LOG_DEBUG(Render_OpenGL, "Surface {:#x} found with invalid pixel format", - surface->addr); - return true; - } - return false; -} - -bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, - SurfaceParams& params, - const SurfaceInterval& interval) { - const PixelFormat dst_format = surface->pixel_format; - const SurfaceType type = GetFormatType(dst_format); - - for (auto& reinterpreter : - format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) { - - params.pixel_format = reinterpreter->GetSourceFormat(); - Surface reinterpret_surface = - FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); - - if (reinterpret_surface != nullptr) { - auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); - auto reinterpret_params = surface->FromInterval(reinterpret_interval); - auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); - auto dest_rect = surface->GetScaledSubRect(reinterpret_params); - - if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && - surface->res_scale == resolution_scale_factor) { - // The destination surface is either a framebuffer, or a filtered texture. - // Create an intermediate surface to convert to before blitting to the - // destination. - const u32 width = dest_rect.GetHeight() / resolution_scale_factor; - const u32 height = dest_rect.GetWidth() / resolution_scale_factor; - const Common::Rectangle tmp_rect{0, width, height, 0}; - - OGLTexture tmp_tex = AllocateSurfaceTexture(dst_format, height, width); - reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, tmp_tex, - tmp_rect); - - if (!texture_filterer->Filter(tmp_tex, tmp_rect, surface->texture, dest_rect, type)) { - const TextureBlit texture_blit = { - .surface_type = type, - .src_level = 0, - .dst_level = 0, - .src_layer = 0, - .dst_layer = 0, - .src_region = Region2D{ - .start = {0, 0}, - .end = {width, height} - }, - .dst_region = Region2D{ - .start = {dest_rect.left, dest_rect.bottom}, - .end = {dest_rect.right, dest_rect.top} - } - }; - - runtime.BlitTextures(tmp_tex, surface->texture, texture_blit); - } - - } else { - reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, - dest_rect); - } - - return true; - } - } - - return false; -} - -void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { - std::lock_guard lock{mutex}; - - if (size == 0) [[unlikely]] { - return; - } - - const SurfaceInterval flush_interval(addr, addr + size); - SurfaceRegions flushed_intervals; - - for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { - // small sizes imply that this most likely comes from the cpu, flush the entire region - // the point is to avoid thousands of small writes every frame if the cpu decides to - // access that region, anything higher than 8 you're guaranteed it comes from a service - const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; - auto& surface = pair.second; - - if (flush_surface != nullptr && surface != flush_surface) - continue; - - // Sanity check, this surface is the last one that marked this region dirty - ASSERT(surface->IsRegionValid(interval)); - - DownloadSurface(surface, interval); - flushed_intervals += interval; - } - - // Reset dirty regions - dirty_regions -= flushed_intervals; -} - -void RasterizerCache::FlushAll() { - FlushRegion(0, 0xFFFFFFFF); -} - -void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { - std::lock_guard lock{mutex}; - - if (size == 0) - return; - - const SurfaceInterval invalid_interval(addr, addr + size); - - if (region_owner != nullptr) { - ASSERT(region_owner->type != SurfaceType::Texture); - ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); - // Surfaces can't have a gap - ASSERT(region_owner->width == region_owner->stride); - region_owner->invalid_regions.erase(invalid_interval); - } - - for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { - for (const auto& cached_surface : pair.second) { - if (cached_surface == region_owner) - continue; - - // If cpu is invalidating this region we want to remove it - // to (likely) mark the memory pages as uncached - if (region_owner == nullptr && size <= 8) { - FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); - remove_surfaces.emplace(cached_surface); - continue; - } - - const auto interval = cached_surface->GetInterval() & invalid_interval; - cached_surface->invalid_regions.insert(interval); - cached_surface->InvalidateAllWatcher(); - - // If the surface has no salvageable data it should be removed from the cache to avoid - // clogging the data structure - if (cached_surface->IsSurfaceFullyInvalid()) { - remove_surfaces.emplace(cached_surface); - } - } - } - - if (region_owner != nullptr) - dirty_regions.set({invalid_interval, region_owner}); - else - dirty_regions.erase(invalid_interval); - - for (const auto& remove_surface : remove_surfaces) { - if (remove_surface == region_owner) { - Surface expanded_surface = FindMatch( - surface_cache, *region_owner, ScaleMatch::Ignore); - ASSERT(expanded_surface); - - if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { - DuplicateSurface(region_owner, expanded_surface); - } else { - continue; - } - } - UnregisterSurface(remove_surface); - } - - remove_surfaces.clear(); -} - -Surface RasterizerCache::CreateSurface(const SurfaceParams& params) { - Surface surface = std::make_shared(params, *this, runtime); - surface->invalid_regions.insert(surface->GetInterval()); - - // Allocate surface texture - surface->texture = - AllocateSurfaceTexture(params.pixel_format, surface->GetScaledWidth(), surface->GetScaledHeight()); - - return surface; -} - -void RasterizerCache::RegisterSurface(const Surface& surface) { - std::lock_guard lock{mutex}; - - if (surface->registered) { - return; - } - surface->registered = true; - surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); - rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, 1); -} - -void RasterizerCache::UnregisterSurface(const Surface& surface) { - std::lock_guard lock{mutex}; - - if (!surface->registered) { - return; - } - surface->registered = false; - rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, -1); - surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); -} - -// Allocate an uninitialized texture of appropriate size and format for the surface -OGLTexture RasterizerCache::AllocateSurfaceTexture(PixelFormat format, u32 width, u32 height) { - const FormatTuple& tuple = GetFormatTuple(format); - auto recycled_tex = host_texture_recycler.find({format, width, height}); - if (recycled_tex != host_texture_recycler.end()) { - OGLTexture texture = std::move(recycled_tex->second); - host_texture_recycler.erase(recycled_tex); - return texture; - } - - const GLsizei levels = static_cast(std::log2(std::max(width, height))) + 1; - - OGLTexture texture; - texture.Create(); - texture.Allocate(GL_TEXTURE_2D, levels, tuple.internal_format, width, height); - - return texture; -} - -} // namespace OpenGL +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 273f4e1a9..35154e80e 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -4,52 +4,81 @@ #pragma once #include -#include "video_core/rasterizer_cache/cached_surface.h" +#include +#include +#include "common/alignment.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "video_core/pica_state.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/rasterizer_cache/surface_params.h" #include "video_core/texture/texture_decode.h" +#include "video_core/video_core.h" namespace VideoCore { -class RasterizerAccelerated; + +inline auto RangeFromInterval(auto& map, SurfaceInterval interval) { + return boost::make_iterator_range(map.equal_range(interval)); } -namespace OpenGL { - -// Declare rasterizer interval types -using SurfaceSet = std::set; -using SurfaceMap = - boost::icl::interval_map; -using SurfaceCache = - boost::icl::interval_map; - -static_assert(std::is_same() && - std::is_same(), - "Incorrect interval types"); - -using SurfaceRect_Tuple = std::tuple>; -using SurfaceSurfaceRect_Tuple = std::tuple>; - enum class ScaleMatch { - Exact, // Only accept same res scale - Upscale, // Only allow higher scale than params - Ignore // Accept every scaled res + Exact, ///< Only accept same res scale + Upscale, ///< Only allow higher scale than params + Ignore ///< Accept every scaled res }; -class Driver; -class TextureDownloaderES; -class TextureFilterer; -class FormatReinterpreterOpenGL; +enum class MatchFlags { + Invalid = 1, ///< Surface is allowed to be only partially valid + Exact = 1 << 1, ///< Surface perfectly matches params + SubRect = 1 << 2, ///< Surface encompasses params + Copy = 1 << 3, ///< Surface that can be used as a copy source + Expand = 1 << 4, ///< Surface that can expand params + TexCopy = 1 << 5 ///< Surface that will match a display transfer "texture copy" parameters +}; +DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); + +class RasterizerAccelerated; + +template class RasterizerCache : NonCopyable { public: - RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, Driver& driver); - ~RasterizerCache(); + using TextureRuntime = typename T::Runtime; + using CachedSurface = typename T::Surface; + using Watcher = SurfaceWatcher; + + /// Declare rasterizer interval types + using Surface = std::shared_ptr; + using SurfaceSet = std::set; + using SurfaceMap = + boost::icl::interval_map; + using SurfaceCache = + boost::icl::interval_map; + + static_assert(std::is_same() && + std::is_same(), + "Incorrect interval types"); + + using SurfaceRect_Tuple = std::tuple>; + using SurfaceSurfaceRect_Tuple = std::tuple>; + +public: + RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, TextureRuntime& runtime); + ~RasterizerCache() = default; + + /// Get the best surface match (and its match type) for the given flags + template + Surface FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval = std::nullopt); /// Blit one surface's texture to another - bool BlitSurfaces(const Surface& src_surface, const Common::Rectangle& src_rect, - const Surface& dst_surface, const Common::Rectangle& dst_rect); + bool BlitSurfaces(const Surface& src_surface, Common::Rectangle src_rect, + const Surface& dst_surface, Common::Rectangle dst_rect); /// Copy one surface's region to another void CopySurface(const Surface& src_surface, const Surface& dst_surface, @@ -69,7 +98,7 @@ public: Surface GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level = 0); /// Get a texture cube based on the texture configuration - const CachedTextureCube& GetTextureCube(const TextureCubeConfig& config); + const Surface& GetTextureCube(const TextureCubeConfig& config); /// Get the color and depth surfaces based on the framebuffer configuration SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, @@ -90,12 +119,6 @@ public: /// Flush all cached resources tracked by this cache manager void FlushAll(); - // Textures from destroyed surfaces are stored here to be recyled to reduce allocation overhead - // in the driver - // this must be placed above the surface_cache to ensure all cached surfaces are destroyed - // before destroying the recycler - std::unordered_multimap host_texture_recycler; - private: void DuplicateSurface(const Surface& src_surface, const Surface& dest_surface); @@ -103,25 +126,24 @@ private: void ValidateSurface(const Surface& surface, PAddr addr, u32 size); /// Copies pixel data in interval from the guest VRAM to the host GPU surface - void UploadSurface(const Surface& surface, const SurfaceInterval& interval); + void UploadSurface(const Surface& surface, SurfaceInterval interval); /// Copies pixel data in interval from the host GPU surface to the guest VRAM - void DownloadSurface(const Surface& surface, const SurfaceInterval& interval); + void DownloadSurface(const Surface& surface, SurfaceInterval interval); /// Returns false if there is a surface in the cache at the interval with the same bit-width, - bool NoUnimplementedReinterpretations(const OpenGL::Surface& surface, - OpenGL::SurfaceParams& params, - const OpenGL::SurfaceInterval& interval); + bool NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams& params, + SurfaceInterval interval); /// Return true if a surface with an invalid pixel format exists at the interval - bool IntervalHasInvalidPixelFormat(SurfaceParams& params, const SurfaceInterval& interval); + bool IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval); /// Attempt to find a reinterpretable surface in the cache and use it to copy for validation - bool ValidateByReinterpretation(const Surface& surface, SurfaceParams& params, - const SurfaceInterval& interval); + bool ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params, + SurfaceInterval interval); /// Create a new surface - Surface CreateSurface(const SurfaceParams& params); + Surface CreateSurface(SurfaceParams& params); /// Register surface into the cache void RegisterSurface(const Surface& surface); @@ -129,24 +151,1115 @@ private: /// Remove surface from the cache void UnregisterSurface(const Surface& surface); +private: VideoCore::RasterizerAccelerated& rasterizer; - TextureRuntime runtime; + TextureRuntime& runtime; SurfaceCache surface_cache; SurfaceMap dirty_regions; SurfaceSet remove_surfaces; - u16 resolution_scale_factor; - std::unordered_map texture_cube_cache; - + std::unordered_map texture_cube_cache; std::recursive_mutex mutex; - -public: - OGLTexture AllocateSurfaceTexture(PixelFormat format, u32 width, u32 height); - - std::unique_ptr texture_filterer; - std::unique_ptr format_reinterpreter; - std::unique_ptr texture_downloader_es; }; -} // namespace OpenGL +template +RasterizerCache::RasterizerCache(VideoCore::RasterizerAccelerated& rasterizer, TextureRuntime& runtime) + : rasterizer(rasterizer), runtime{runtime} { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); +} + +template +template +auto RasterizerCache::FindMatch(const SurfaceCache& surface_cache, const SurfaceParams& params, + ScaleMatch match_scale_type, + std::optional validate_interval) -> Surface { + Surface match_surface = nullptr; + bool match_valid = false; + u32 match_scale = 0; + SurfaceInterval match_interval{}; + + for (const auto& pair : RangeFromInterval(surface_cache, params.GetInterval())) { + for (const auto& surface : pair.second) { + const bool res_scale_matched = match_scale_type == ScaleMatch::Exact + ? (params.res_scale == surface->res_scale) + : (params.res_scale <= surface->res_scale); + // validity will be checked in GetCopyableInterval + bool is_valid = + True(find_flags & MatchFlags::Copy) + ? true + : surface->IsRegionValid(validate_interval.value_or(params.GetInterval())); + + if (False(find_flags & MatchFlags::Invalid) && !is_valid) + continue; + + auto IsMatch_Helper = [&](auto check_type, auto match_fn) { + if (False(find_flags & check_type)) + return; + + bool matched; + SurfaceInterval surface_interval; + std::tie(matched, surface_interval) = match_fn(); + if (!matched) + return; + + if (!res_scale_matched && match_scale_type != ScaleMatch::Ignore && + surface->type != SurfaceType::Fill) + return; + + // Found a match, update only if this is better than the previous one + auto UpdateMatch = [&] { + match_surface = surface; + match_valid = is_valid; + match_scale = surface->res_scale; + match_interval = surface_interval; + }; + + if (surface->res_scale > match_scale) { + UpdateMatch(); + return; + } else if (surface->res_scale < match_scale) { + return; + } + + if (is_valid && !match_valid) { + UpdateMatch(); + return; + } else if (is_valid != match_valid) { + return; + } + + if (boost::icl::length(surface_interval) > boost::icl::length(match_interval)) { + UpdateMatch(); + } + }; + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->ExactMatch(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanSubRect(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + ASSERT(validate_interval); + auto copy_interval = + surface->GetCopyableInterval(params.FromInterval(*validate_interval)); + bool matched = boost::icl::length(copy_interval & *validate_interval) != 0 && + surface->CanCopy(params, copy_interval); + return std::make_pair(matched, copy_interval); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanExpand(params), surface->GetInterval()); + }); + IsMatch_Helper(std::integral_constant{}, [&] { + return std::make_pair(surface->CanTexCopy(params), surface->GetInterval()); + }); + } + } + return match_surface; +} + +MICROPROFILE_DECLARE(RasterizerCache_BlitSurface); +template +bool RasterizerCache::BlitSurfaces(const Surface& src_surface, Common::Rectangle src_rect, + const Surface& dst_surface, Common::Rectangle dst_rect) { + MICROPROFILE_SCOPE(RasterizerCache_BlitSurface); + + if (CheckFormatsBlittable(src_surface->pixel_format, dst_surface->pixel_format)) { + dst_surface->InvalidateAllWatcher(); + + const TextureBlit texture_blit = { + .surface_type = src_surface->type, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {dst_rect.left, dst_rect.bottom}, + .end = {dst_rect.right, dst_rect.top} + } + }; + + return runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); + } + + return false; +} + +MICROPROFILE_DECLARE(RasterizerCache_CopySurface); +template +void RasterizerCache::CopySurface(const Surface& src_surface, const Surface& dst_surface, + SurfaceInterval copy_interval) { + MICROPROFILE_SCOPE(RasterizerCache_CopySurface); + + SurfaceParams subrect_params = dst_surface->FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval && src_surface != dst_surface); + + const auto dst_rect = dst_surface->GetScaledSubRect(subrect_params); + if (src_surface->type == SurfaceType::Fill) { + // FillSurface needs a 4 bytes buffer + const u32 fill_offset = + (boost::icl::first(copy_interval) - src_surface->addr) % src_surface->fill_size; + std::array fill_buffer; + + u32 fill_buff_pos = fill_offset; + for (std::size_t i = 0; i < fill_buffer.size(); i++) { + fill_buffer[i] = src_surface->fill_data[fill_buff_pos++ % src_surface->fill_size]; + } + + const ClearValue clear_value = + MakeClearValue(dst_surface->type, dst_surface->pixel_format, fill_buffer.data()); + + const TextureClear clear_rect = { + .surface_type = dst_surface->type, + .texture_format = dst_surface->pixel_format, + .texture_level = 0, + .rect = Rect2D{ + .offset = {dst_rect.left, dst_rect.bottom}, + .extent = {dst_rect.GetWidth(), dst_rect.GetHeight()} + } + }; + + runtime.ClearTexture(dst_surface->texture, clear_rect, clear_value); + return; + } + + if (src_surface->CanSubRect(subrect_params)) { + const auto src_rect = src_surface->GetScaledSubRect(subrect_params); + const TextureBlit texture_blit = { + .surface_type = src_surface->type, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {dst_rect.left, dst_rect.bottom}, + .end = {dst_rect.right, dst_rect.top} + } + }; + + runtime.BlitTextures(src_surface->texture, dst_surface->texture, texture_blit); + return; + } + + UNREACHABLE(); +} + +template +auto RasterizerCache::GetSurface(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) -> Surface { + if (params.addr == 0 || params.height * params.width == 0) { + return nullptr; + } + + // Use GetSurfaceSubRect instead + ASSERT(params.width == params.stride); + ASSERT(!params.is_tiled || (params.width % 8 == 0 && params.height % 8 == 0)); + + // Check for an exact match in existing surfaces + Surface surface = + FindMatch(surface_cache, params, match_res_scale); + + if (surface == nullptr) { + u16 target_res_scale = params.res_scale; + if (match_res_scale != ScaleMatch::Exact) { + // This surface may have a subrect of another surface with a higher res_scale, find + // it to adjust our params + SurfaceParams find_params = params; + Surface expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + // Keep res_scale when reinterpreting d24s8 -> rgba8 + if (params.pixel_format == PixelFormat::RGBA8) { + find_params.pixel_format = PixelFormat::D24S8; + expandable = FindMatch( + surface_cache, find_params, match_res_scale); + if (expandable != nullptr && expandable->res_scale > target_res_scale) { + target_res_scale = expandable->res_scale; + } + } + } + + SurfaceParams new_params = params; + new_params.res_scale = target_res_scale; + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + + if (load_if_create) { + ValidateSurface(surface, params.addr, params.size); + } + + return surface; +} + +template +auto RasterizerCache::GetSurfaceSubRect(const SurfaceParams& params, ScaleMatch match_res_scale, + bool load_if_create) -> SurfaceRect_Tuple { + if (params.addr == 0 || params.height * params.width == 0) { + return std::make_tuple(nullptr, Common::Rectangle{}); + } + + // Attempt to find encompassing surface + Surface surface = FindMatch(surface_cache, params, + match_res_scale); + + // Check if FindMatch failed because of res scaling + // If that's the case create a new surface with + // the dimensions of the lower res_scale surface + // to suggest it should not be used again + if (surface == nullptr && match_res_scale != ScaleMatch::Ignore) { + surface = FindMatch(surface_cache, params, + ScaleMatch::Ignore); + if (surface != nullptr) { + SurfaceParams new_params = *surface; + new_params.res_scale = params.res_scale; + + surface = CreateSurface(new_params); + RegisterSurface(surface); + } + } + + SurfaceParams aligned_params = params; + if (params.is_tiled) { + aligned_params.height = Common::AlignUp(params.height, 8); + aligned_params.width = Common::AlignUp(params.width, 8); + aligned_params.stride = Common::AlignUp(params.stride, 8); + aligned_params.UpdateParams(); + } + + // Check for a surface we can expand before creating a new one + if (surface == nullptr) { + surface = FindMatch(surface_cache, aligned_params, + match_res_scale); + if (surface != nullptr) { + aligned_params.width = aligned_params.stride; + aligned_params.UpdateParams(); + + SurfaceParams new_params = *surface; + new_params.addr = std::min(aligned_params.addr, surface->addr); + new_params.end = std::max(aligned_params.end, surface->end); + new_params.size = new_params.end - new_params.addr; + new_params.height = + new_params.size / aligned_params.BytesInPixels(aligned_params.stride); + ASSERT(new_params.size % aligned_params.BytesInPixels(aligned_params.stride) == 0); + + Surface new_surface = CreateSurface(new_params); + DuplicateSurface(surface, new_surface); + + // Delete the expanded surface, this can't be done safely yet + // because it may still be in use + surface->UnlinkAllWatcher(); // unlink watchers as if this surface is already deleted + remove_surfaces.emplace(surface); + + surface = new_surface; + RegisterSurface(new_surface); + } + } + + // No subrect found - create and return a new surface + if (surface == nullptr) { + SurfaceParams new_params = aligned_params; + // Can't have gaps in a surface + new_params.width = aligned_params.stride; + new_params.UpdateParams(); + // GetSurface will create the new surface and possibly adjust res_scale if necessary + surface = GetSurface(new_params, match_res_scale, load_if_create); + } else if (load_if_create) { + ValidateSurface(surface, aligned_params.addr, aligned_params.size); + } + + return std::make_tuple(surface, surface->GetScaledSubRect(params)); +} + +template +auto RasterizerCache::GetTextureSurface(const Pica::TexturingRegs::FullTextureConfig& config) -> Surface { + const auto info = Pica::Texture::TextureInfo::FromPicaRegister(config.config, config.format); + return GetTextureSurface(info, config.config.lod.max_level); +} + +template +auto RasterizerCache::GetTextureSurface(const Pica::Texture::TextureInfo& info, u32 max_level) -> Surface { + if (info.physical_address == 0) { + return nullptr; + } + + SurfaceParams params; + params.addr = info.physical_address; + params.width = info.width; + params.height = info.height; + params.is_tiled = true; + params.pixel_format = PixelFormatFromTextureFormat(info.format); + params.res_scale = /*texture_filterer->IsNull() ?*/ 1 /*: resolution_scale_factor*/; + params.UpdateParams(); + + u32 min_width = info.width >> max_level; + u32 min_height = info.height >> max_level; + if (min_width % 8 != 0 || min_height % 8 != 0) { + LOG_CRITICAL(Render_OpenGL, "Texture size ({}x{}) is not multiple of 8", min_width, + min_height); + return nullptr; + } + if (info.width != (min_width << max_level) || info.height != (min_height << max_level)) { + LOG_CRITICAL(Render_OpenGL, + "Texture size ({}x{}) does not support required mipmap level ({})", + params.width, params.height, max_level); + return nullptr; + } + + auto surface = GetSurface(params, ScaleMatch::Ignore, true); + if (!surface) + return nullptr; + + // Update mipmap if necessary + if (max_level != 0) { + if (max_level >= 8) { + // since PICA only supports texture size between 8 and 1024, there are at most eight + // possible mipmap levels including the base. + LOG_CRITICAL(Render_OpenGL, "Unsupported mipmap level {}", max_level); + return nullptr; + } + + // Allocate more mipmap level if necessary + if (surface->max_level < max_level) { + /*if (!texture_filterer->IsNull()) { + // TODO: proper mipmap support for custom textures + runtime.GenerateMipmaps(surface->texture, max_level); + }*/ + + surface->max_level = max_level; + } + + // Blit mipmaps that have been invalidated + SurfaceParams surface_params = *surface; + for (u32 level = 1; level <= max_level; ++level) { + // In PICA all mipmap levels are stored next to each other + surface_params.addr += + surface_params.width * surface_params.height * surface_params.GetFormatBpp() / 8; + surface_params.width /= 2; + surface_params.height /= 2; + surface_params.stride = 0; // reset stride and let UpdateParams re-initialize it + surface_params.UpdateParams(); + + auto& watcher = surface->level_watchers[level - 1]; + if (!watcher || !watcher->Get()) { + auto level_surface = GetSurface(surface_params, ScaleMatch::Ignore, true); + if (level_surface) { + watcher = level_surface->CreateWatcher(); + } else { + watcher = nullptr; + } + } + + if (watcher && !watcher->IsValid()) { + auto level_surface = watcher->Get(); + if (!level_surface->invalid_regions.empty()) { + ValidateSurface(level_surface, level_surface->addr, level_surface->size); + } + + if (/*texture_filterer->IsNull()*/true) { + const auto src_rect = level_surface->GetScaledRect(); + const auto dst_rect = surface_params.GetScaledRect(); + const TextureBlit texture_blit = { + .surface_type = surface->type, + .src_level = 0, + .dst_level = level, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {dst_rect.left, dst_rect.bottom}, + .end = {dst_rect.right, dst_rect.top} + } + }; + + runtime.BlitTextures(level_surface->texture, surface->texture, texture_blit); + } + + watcher->Validate(); + } + } + } + + return surface; +} + +template +auto RasterizerCache::GetTextureCube(const TextureCubeConfig& config) -> const Surface& { + auto& cube = texture_cube_cache[config]; + + struct Face { + Face(std::shared_ptr& watcher, PAddr address) + : watcher(watcher), address(address) {} + std::shared_ptr& watcher; + PAddr address; + }; + + const std::array faces{{ + {cube->level_watchers[0], config.px}, + {cube->level_watchers[1], config.nx}, + {cube->level_watchers[2], config.py}, + {cube->level_watchers[3], config.ny}, + {cube->level_watchers[4], config.pz}, + {cube->level_watchers[5], config.nz}, + }}; + + for (const Face& face : faces) { + if (!face.watcher || !face.watcher->Get()) { + Pica::Texture::TextureInfo info; + info.physical_address = face.address; + info.height = info.width = config.width; + info.format = config.format; + info.SetDefaultStride(); + auto surface = GetTextureSurface(info); + if (surface) { + face.watcher = surface->CreateWatcher(); + } else { + // Can occur when texture address is invalid. We mark the watcher with nullptr + // in this case and the content of the face wouldn't get updated. These are + // usually leftover setup in the texture unit and games are not supposed to draw + // using them. + face.watcher = nullptr; + } + } + } + + if (cube->texture.handle == 0) { + for (const Face& face : faces) { + if (face.watcher) { + auto surface = face.watcher->Get(); + cube->res_scale = std::max(cube->res_scale, surface->res_scale); + } + } + + const u32 width = cube->res_scale * config.width; + cube->texture = runtime.AllocateCubeMap(width, PixelFormatFromTextureFormat(config.format)); + } + + u32 scaled_size = cube->res_scale * config.width; + + for (std::size_t i = 0; i < faces.size(); i++) { + const Face& face = faces[i]; + if (face.watcher && !face.watcher->IsValid()) { + auto surface = face.watcher->Get(); + if (!surface->invalid_regions.empty()) { + ValidateSurface(surface, surface->addr, surface->size); + } + + const auto src_rect = surface->GetScaledRect(); + const TextureBlit texture_blit = { + .surface_type = SurfaceType::Color, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = static_cast(i), + .src_region = Region2D{ + .start = {src_rect.left, src_rect.bottom}, + .end = {src_rect.right, src_rect.top} + }, + .dst_region = Region2D{ + .start = {0, 0}, + .end = {scaled_size, scaled_size} + } + }; + + runtime.BlitTextures(surface->texture, cube->texture, texture_blit); + face.watcher->Validate(); + } + } + + return cube; +} + +template +auto RasterizerCache::GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb, + const Common::Rectangle& viewport_rect) + -> SurfaceSurfaceRect_Tuple { + const auto& regs = Pica::g_state.regs; + const auto& config = regs.framebuffer.framebuffer; + + // Update resolution_scale_factor and reset cache if changed + const bool resolution_scale_changed = + resolution_scale_factor != VideoCore::GetResolutionScaleFactor(); + const bool texture_filter_changed = + /*VideoCore::g_texture_filter_update_requested.exchange(false) && + texture_filterer->Reset(Settings::values.texture_filter_name, + VideoCore::GetResolutionScaleFactor())*/false; + + if (resolution_scale_changed || texture_filter_changed) { + resolution_scale_factor = VideoCore::GetResolutionScaleFactor(); + FlushAll(); + while (!surface_cache.empty()) + UnregisterSurface(*surface_cache.begin()->second.begin()); + texture_cube_cache.clear(); + } + + Common::Rectangle viewport_clamped{ + static_cast(std::clamp(viewport_rect.left, 0, static_cast(config.GetWidth()))), + static_cast(std::clamp(viewport_rect.top, 0, static_cast(config.GetHeight()))), + static_cast(std::clamp(viewport_rect.right, 0, static_cast(config.GetWidth()))), + static_cast( + std::clamp(viewport_rect.bottom, 0, static_cast(config.GetHeight())))}; + + // get color and depth surfaces + SurfaceParams color_params; + color_params.is_tiled = true; + color_params.res_scale = resolution_scale_factor; + color_params.width = config.GetWidth(); + color_params.height = config.GetHeight(); + SurfaceParams depth_params = color_params; + + color_params.addr = config.GetColorBufferPhysicalAddress(); + color_params.pixel_format = PixelFormatFromColorFormat(config.color_format); + color_params.UpdateParams(); + + depth_params.addr = config.GetDepthBufferPhysicalAddress(); + depth_params.pixel_format = PixelFormatFromDepthFormat(config.depth_format); + depth_params.UpdateParams(); + + auto color_vp_interval = color_params.GetSubRectInterval(viewport_clamped); + auto depth_vp_interval = depth_params.GetSubRectInterval(viewport_clamped); + + // Make sure that framebuffers don't overlap if both color and depth are being used + if (using_color_fb && using_depth_fb && + boost::icl::length(color_vp_interval & depth_vp_interval)) { + LOG_CRITICAL(Render_OpenGL, "Color and depth framebuffer memory regions overlap; " + "overlapping framebuffers not supported!"); + using_depth_fb = false; + } + + Common::Rectangle color_rect{}; + Surface color_surface = nullptr; + if (using_color_fb) + std::tie(color_surface, color_rect) = + GetSurfaceSubRect(color_params, ScaleMatch::Exact, false); + + Common::Rectangle depth_rect{}; + Surface depth_surface = nullptr; + if (using_depth_fb) + std::tie(depth_surface, depth_rect) = + GetSurfaceSubRect(depth_params, ScaleMatch::Exact, false); + + Common::Rectangle fb_rect{}; + if (color_surface != nullptr && depth_surface != nullptr) { + fb_rect = color_rect; + // Color and Depth surfaces must have the same dimensions and offsets + if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top || + color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) { + color_surface = GetSurface(color_params, ScaleMatch::Exact, false); + depth_surface = GetSurface(depth_params, ScaleMatch::Exact, false); + fb_rect = color_surface->GetScaledRect(); + } + } else if (color_surface != nullptr) { + fb_rect = color_rect; + } else if (depth_surface != nullptr) { + fb_rect = depth_rect; + } + + if (color_surface != nullptr) { + ValidateSurface(color_surface, boost::icl::first(color_vp_interval), + boost::icl::length(color_vp_interval)); + color_surface->InvalidateAllWatcher(); + } + if (depth_surface != nullptr) { + ValidateSurface(depth_surface, boost::icl::first(depth_vp_interval), + boost::icl::length(depth_vp_interval)); + depth_surface->InvalidateAllWatcher(); + } + + return std::make_tuple(color_surface, depth_surface, fb_rect); +} + +template +auto RasterizerCache::GetFillSurface(const GPU::Regs::MemoryFillConfig& config) -> Surface { + SurfaceParams params; + params.addr = config.GetStartAddress(); + params.end = config.GetEndAddress(); + params.size = params.end - params.addr; + params.type = SurfaceType::Fill; + params.res_scale = std::numeric_limits::max(); + + Surface new_surface = std::make_shared(params, runtime); + + std::memcpy(&new_surface->fill_data[0], &config.value_32bit, 4); + if (config.fill_32bit) { + new_surface->fill_size = 4; + } else if (config.fill_24bit) { + new_surface->fill_size = 3; + } else { + new_surface->fill_size = 2; + } + + RegisterSurface(new_surface); + return new_surface; +} + +template +auto RasterizerCache::GetTexCopySurface(const SurfaceParams& params) -> SurfaceRect_Tuple { + Common::Rectangle rect{}; + + Surface match_surface = FindMatch( + surface_cache, params, ScaleMatch::Ignore); + + if (match_surface != nullptr) { + ValidateSurface(match_surface, params.addr, params.size); + + SurfaceParams match_subrect; + if (params.width != params.stride) { + const u32 tiled_size = match_surface->is_tiled ? 8 : 1; + match_subrect = params; + match_subrect.width = match_surface->PixelsInBytes(params.width) / tiled_size; + match_subrect.stride = match_surface->PixelsInBytes(params.stride) / tiled_size; + match_subrect.height *= tiled_size; + } else { + match_subrect = match_surface->FromInterval(params.GetInterval()); + ASSERT(match_subrect.GetInterval() == params.GetInterval()); + } + + rect = match_surface->GetScaledSubRect(match_subrect); + } + + return std::make_tuple(match_surface, rect); +} + +template +void RasterizerCache::DuplicateSurface(const Surface& src_surface, const Surface& dest_surface) { + ASSERT(dest_surface->addr <= src_surface->addr && dest_surface->end >= src_surface->end); + + BlitSurfaces(src_surface, src_surface->GetScaledRect(), dest_surface, + dest_surface->GetScaledSubRect(*src_surface)); + + dest_surface->invalid_regions -= src_surface->GetInterval(); + dest_surface->invalid_regions += src_surface->invalid_regions; + + SurfaceRegions regions; + for (const auto& pair : RangeFromInterval(dirty_regions, src_surface->GetInterval())) { + if (pair.second == src_surface) { + regions += pair.first; + } + } + for (const auto& interval : regions) { + dirty_regions.set({interval, dest_surface}); + } +} + +template +void RasterizerCache::ValidateSurface(const Surface& surface, PAddr addr, u32 size) { + if (size == 0) [[unlikely]] { + return; + } + + const SurfaceInterval validate_interval(addr, addr + size); + if (surface->type == SurfaceType::Fill) { + // Sanity check, fill surfaces will always be valid when used + ASSERT(surface->IsRegionValid(validate_interval)); + return; + } + + auto validate_regions = surface->invalid_regions & validate_interval; + auto NotifyValidated = [&](SurfaceInterval interval) { + surface->invalid_regions.erase(interval); + validate_regions.erase(interval); + }; + + while (true) { + const auto it = validate_regions.begin(); + if (it == validate_regions.end()) { + break; + } + + const auto interval = *it & validate_interval; + // Look for a valid surface to copy from + SurfaceParams params = surface->FromInterval(interval); + + Surface copy_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (copy_surface != nullptr) { + SurfaceInterval copy_interval = copy_surface->GetCopyableInterval(params); + CopySurface(copy_surface, surface, copy_interval); + NotifyValidated(copy_interval); + continue; + } + + // Try to find surface in cache with different format + // that can can be reinterpreted to the requested format. + if (ValidateByReinterpretation(surface, params, interval)) { + NotifyValidated(interval); + continue; + } + // Could not find a matching reinterpreter, check if we need to implement a + // reinterpreter + if (NoUnimplementedReinterpretations(surface, params, interval) && + !IntervalHasInvalidPixelFormat(params, interval)) { + // No surfaces were found in the cache that had a matching bit-width. + // If the region was created entirely on the GPU, + // assume it was a developer mistake and skip flushing. + if (boost::icl::contains(dirty_regions, interval)) { + LOG_INFO(Render_OpenGL, "Region created fully on GPU and reinterpretation is " + "invalid. Skipping validation"); + validate_regions.erase(interval); + continue; + } + } + + // Load data from 3DS memory + FlushRegion(params.addr, params.size); + UploadSurface(surface, interval); + NotifyValidated(params.GetInterval()); + } +} + +MICROPROFILE_DECLARE(RasterizerCache_SurfaceLoad); +template +void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval interval) { + const SurfaceParams info = surface->FromInterval(interval); + const u32 load_start = info.addr; + const u32 load_end = info.end; + ASSERT(load_start >= surface->addr && load_end <= surface->end); + + const auto& staging = runtime.FindStaging( + surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), true); + MemoryRef source_ptr = VideoCore::g_memory->GetPhysicalRef(info.addr); + if (!source_ptr) [[unlikely]] { + return; + } + + const u32 start_offset = load_start - surface->addr; + const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start); + const u32 upload_size = static_cast(upload_data.size()); + + MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad); + + if (!surface->is_tiled) { + ASSERT(surface->type == SurfaceType::Color); + + const auto dest_buffer = staging.mapped.subspan(start_offset, upload_size); + /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { + Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer); + } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { + Pica::Texture::ConvertBGRToRGB(upload_data, dest_buffer); + } else { + std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + }*/ + std::memcpy(dest_buffer.data(), upload_data.data(), upload_size); + } else { + UnswizzleTexture(*surface, start_offset, upload_data, staging.mapped); + } + + surface->UploadTexture(surface->GetSubRect(info), staging); +} + +MICROPROFILE_DECLARE(RasterizerCache_SurfaceFlush); +template +void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval interval) { + const u32 flush_start = boost::icl::first(interval); + const u32 flush_end = boost::icl::last_next(interval); + ASSERT(flush_start >= surface->addr && flush_end <= surface->end); + + const auto& staging = runtime.FindStaging( + surface->width * surface->height * GetBytesPerPixel(surface->pixel_format), false); + if (surface->type != SurfaceType::Fill) { + SurfaceParams params = surface->FromInterval(interval); + surface->DownloadTexture(surface->GetSubRect(params), staging); + } + + MemoryRef dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start); + if (!dest_ptr) [[unlikely]] { + return; + } + + const auto start_offset = flush_start - surface->addr; + const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start); + const auto download_size = static_cast(download_dest.size()); + + MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush); + + if (surface->type == SurfaceType::Fill) { + const u32 coarse_start_offset = start_offset - (start_offset % surface->fill_size); + const u32 backup_bytes = start_offset % surface->fill_size; + std::array backup_data; + if (backup_bytes) { + std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes); + } + + for (u32 offset = coarse_start_offset; offset < download_size; offset += surface->fill_size) { + std::memcpy(&dest_ptr[offset], &surface->fill_data[0], + std::min(surface->fill_size, download_size - offset)); + } + + if (backup_bytes) + std::memcpy(&dest_ptr[coarse_start_offset], &backup_data[0], backup_bytes); + } else if (!surface->is_tiled) { + ASSERT(surface->type == SurfaceType::Color); + + const auto download_data = staging.mapped.subspan(start_offset, download_size); + /*if (surface->pixel_format == PixelFormat::RGBA8 && GLES) { + Pica::Texture::ConvertABGRToRGBA(download_data, download_dest); + } else if (surface->pixel_format == PixelFormat::RGB8 && GLES) { + Pica::Texture::ConvertBGRToRGB(download_data, download_dest); + } else { + std::memcpy(download_dest.data(), download_data.data(), download_size); + }*/ + std::memcpy(download_dest.data(), download_data.data(), download_size); + } else { + SwizzleTexture(*surface, start_offset, staging.mapped, download_dest); + } +} + +template +bool RasterizerCache::NoUnimplementedReinterpretations(const Surface& surface, SurfaceParams& params, + SurfaceInterval interval) { + static constexpr std::array all_formats{ + PixelFormat::RGBA8, PixelFormat::RGB8, PixelFormat::RGB5A1, PixelFormat::RGB565, + PixelFormat::RGBA4, PixelFormat::IA8, PixelFormat::RG8, PixelFormat::I8, + PixelFormat::A8, PixelFormat::IA4, PixelFormat::I4, PixelFormat::A4, + PixelFormat::ETC1, PixelFormat::ETC1A4, PixelFormat::D16, PixelFormat::D24, + PixelFormat::D24S8, + }; + bool implemented = true; + for (PixelFormat format : all_formats) { + if (GetFormatBpp(format) == surface->GetFormatBpp()) { + params.pixel_format = format; + // This could potentially be expensive, + // although experimentally it hasn't been too bad + Surface test_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + if (test_surface != nullptr) { + LOG_WARNING(Render_OpenGL, "Missing pixel_format reinterpreter: {} -> {}", + PixelFormatAsString(format), + PixelFormatAsString(surface->pixel_format)); + implemented = false; + } + } + } + return implemented; +} + +template +bool RasterizerCache::IntervalHasInvalidPixelFormat(SurfaceParams& params, SurfaceInterval interval) { + params.pixel_format = PixelFormat::Invalid; + for (const auto& set : RangeFromInterval(surface_cache, interval)) + for (const auto& surface : set.second) + if (surface->pixel_format == PixelFormat::Invalid) { + LOG_DEBUG(Render_OpenGL, "Surface {:#x} found with invalid pixel format", + surface->addr); + return true; + } + return false; +} + +template +bool RasterizerCache::ValidateByReinterpretation(const Surface& surface, const SurfaceParams& params, + SurfaceInterval interval) { + /*const PixelFormat dst_format = surface->pixel_format; + const SurfaceType type = GetFormatType(dst_format); + + for (auto& reinterpreter : + format_reinterpreter->GetPossibleReinterpretations(surface->pixel_format)) { + + params.pixel_format = reinterpreter->GetSourceFormat(); + Surface reinterpret_surface = + FindMatch(surface_cache, params, ScaleMatch::Ignore, interval); + + if (reinterpret_surface != nullptr) { + auto reinterpret_interval = params.GetCopyableInterval(reinterpret_surface); + auto reinterpret_params = surface->FromInterval(reinterpret_interval); + auto src_rect = reinterpret_surface->GetScaledSubRect(reinterpret_params); + auto dest_rect = surface->GetScaledSubRect(reinterpret_params); + + if (!texture_filterer->IsNull() && reinterpret_surface->res_scale == 1 && + surface->res_scale == resolution_scale_factor) { + // The destination surface is either a framebuffer, or a filtered texture. + // Create an intermediate surface to convert to before blitting to the + // destination. + const u32 width = dest_rect.GetHeight() / resolution_scale_factor; + const u32 height = dest_rect.GetWidth() / resolution_scale_factor; + const Common::Rectangle tmp_rect{0, width, height, 0}; + + OGLTexture tmp_tex = AllocateSurfaceTexture(dst_format, height, width); + reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, tmp_tex, + tmp_rect); + + if (!texture_filterer->Filter(tmp_tex, tmp_rect, surface->texture, dest_rect, type)) { + const TextureBlit texture_blit = { + .surface_type = type, + .src_level = 0, + .dst_level = 0, + .src_layer = 0, + .dst_layer = 0, + .src_region = Region2D{ + .start = {0, 0}, + .end = {width, height} + }, + .dst_region = Region2D{ + .start = {dest_rect.left, dest_rect.bottom}, + .end = {dest_rect.right, dest_rect.top} + } + }; + + runtime.BlitTextures(tmp_tex, surface->texture, texture_blit); + } + + } else { + reinterpreter->Reinterpret(reinterpret_surface->texture, src_rect, surface->texture, + dest_rect); + } + + return true; + } + }*/ + + return false; +} + +template +void RasterizerCache::FlushRegion(PAddr addr, u32 size, Surface flush_surface) { + std::lock_guard lock{mutex}; + + if (size == 0) [[unlikely]] { + return; + } + + const SurfaceInterval flush_interval(addr, addr + size); + SurfaceRegions flushed_intervals; + + for (auto& pair : RangeFromInterval(dirty_regions, flush_interval)) { + // small sizes imply that this most likely comes from the cpu, flush the entire region + // the point is to avoid thousands of small writes every frame if the cpu decides to + // access that region, anything higher than 8 you're guaranteed it comes from a service + const auto interval = size <= 8 ? pair.first : pair.first & flush_interval; + auto& surface = pair.second; + + if (flush_surface != nullptr && surface != flush_surface) + continue; + + // Sanity check, this surface is the last one that marked this region dirty + ASSERT(surface->IsRegionValid(interval)); + + DownloadSurface(surface, interval); + flushed_intervals += interval; + } + + // Reset dirty regions + dirty_regions -= flushed_intervals; +} + +template +void RasterizerCache::FlushAll() { + FlushRegion(0, 0xFFFFFFFF); +} + +template +void RasterizerCache::InvalidateRegion(PAddr addr, u32 size, const Surface& region_owner) { + std::lock_guard lock{mutex}; + + if (size == 0) + return; + + const SurfaceInterval invalid_interval(addr, addr + size); + + if (region_owner != nullptr) { + ASSERT(region_owner->type != SurfaceType::Texture); + ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end); + // Surfaces can't have a gap + ASSERT(region_owner->width == region_owner->stride); + region_owner->invalid_regions.erase(invalid_interval); + } + + for (const auto& pair : RangeFromInterval(surface_cache, invalid_interval)) { + for (const auto& cached_surface : pair.second) { + if (cached_surface == region_owner) + continue; + + // If cpu is invalidating this region we want to remove it + // to (likely) mark the memory pages as uncached + if (region_owner == nullptr && size <= 8) { + FlushRegion(cached_surface->addr, cached_surface->size, cached_surface); + remove_surfaces.emplace(cached_surface); + continue; + } + + const auto interval = cached_surface->GetInterval() & invalid_interval; + cached_surface->invalid_regions.insert(interval); + cached_surface->InvalidateAllWatcher(); + + // If the surface has no salvageable data it should be removed from the cache to avoid + // clogging the data structure + if (cached_surface->IsSurfaceFullyInvalid()) { + remove_surfaces.emplace(cached_surface); + } + } + } + + if (region_owner != nullptr) + dirty_regions.set({invalid_interval, region_owner}); + else + dirty_regions.erase(invalid_interval); + + for (const auto& remove_surface : remove_surfaces) { + if (remove_surface == region_owner) { + Surface expanded_surface = FindMatch( + surface_cache, *region_owner, ScaleMatch::Ignore); + ASSERT(expanded_surface); + + if ((region_owner->invalid_regions - expanded_surface->invalid_regions).empty()) { + DuplicateSurface(region_owner, expanded_surface); + } else { + continue; + } + } + UnregisterSurface(remove_surface); + } + + remove_surfaces.clear(); +} + +template +auto RasterizerCache::CreateSurface(SurfaceParams& params) -> Surface { + Surface surface = std::make_shared(params, runtime); + surface->invalid_regions.insert(surface->GetInterval()); + + // Allocate surface texture + surface->texture = + runtime.Allocate2D(surface->GetScaledWidth(), surface->GetScaledHeight(), params.pixel_format); + + return surface; +} + +template +void RasterizerCache::RegisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (surface->registered) { + return; + } + surface->registered = true; + surface_cache.add({surface->GetInterval(), SurfaceSet{surface}}); + rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, 1); +} + +template +void RasterizerCache::UnregisterSurface(const Surface& surface) { + std::lock_guard lock{mutex}; + + if (!surface->registered) { + return; + } + surface->registered = false; + rasterizer.UpdatePagesCachedCount(surface->addr, surface->size, -1); + surface_cache.subtract({surface->GetInterval(), SurfaceSet{surface}}); +} + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/surface_base.h b/src/video_core/rasterizer_cache/surface_base.h new file mode 100644 index 000000000..afe629e98 --- /dev/null +++ b/src/video_core/rasterizer_cache/surface_base.h @@ -0,0 +1,211 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "common/alignment.h" +#include "common/assert.h" +#include "video_core/rasterizer_cache/surface_params.h" + +namespace VideoCore { + +using SurfaceRegions = boost::icl::interval_set; + +/** + * A watcher that notifies whether a cached surface has been changed. This is useful for caching + * surface collection objects, including texture cube and mipmap. + */ +template +class SurfaceWatcher { +public: + explicit SurfaceWatcher(std::weak_ptr&& surface) : surface(std::move(surface)) {} + + /// Checks whether the surface has been changed. + bool IsValid() const { + return !surface.expired() && valid; + } + + /// Marks that the content of the referencing surface has been updated to the watcher user. + void Validate() { + ASSERT(!surface.expired()); + valid = true; + } + + /// Gets the referencing surface. Returns null if the surface has been destroyed + std::shared_ptr Get() const { + return surface.lock(); + } + +public: + std::weak_ptr surface; + bool valid = false; +}; + +template +class SurfaceBase : public SurfaceParams, public std::enable_shared_from_this { + using Watcher = SurfaceWatcher; +public: + SurfaceBase(SurfaceParams& params) : SurfaceParams{params} {} + virtual ~SurfaceBase() = default; + + /// Returns true when this surface can be used to fill the fill_interval of dest_surface + bool CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const; + + /// Returns true when copy_interval of dest_surface can be validated by copying from this surface + bool CanCopy(const SurfaceParams& dest_surface, SurfaceInterval copy_interval) const; + + /// Returns the region of the biggest valid rectange within interval + SurfaceInterval GetCopyableInterval(const SurfaceParams& params) const; + + /// Creates a surface watcher linked to this surface + std::shared_ptr CreateWatcher(); + + /// Invalidates all watchers linked to this surface + void InvalidateAllWatcher(); + + /// Removes any linked watchers from this surface + void UnlinkAllWatcher(); + + /// Returns true when the region denoted by interval is valid + bool IsRegionValid(SurfaceInterval interval) const { + return (invalid_regions.find(interval) == invalid_regions.end()); + } + + /// Returns true when the entire surface is invalid + bool IsSurfaceFullyInvalid() const { + auto interval = GetInterval(); + return *invalid_regions.equal_range(interval).first == interval; + } + +public: + bool registered = false; + bool is_texture_cube = false; + SurfaceRegions invalid_regions; + std::array, 7> level_watchers; + u32 max_level = 0; + std::array fill_data; + u32 fill_size = 0; + +public: + u32 watcher_count = 0; + std::array, 8> watchers; +}; + +template +bool SurfaceBase::CanFill(const SurfaceParams& dest_surface, SurfaceInterval fill_interval) const { + if (type == SurfaceType::Fill && IsRegionValid(fill_interval) && + boost::icl::first(fill_interval) >= addr && + boost::icl::last_next(fill_interval) <= end && // dest_surface is within our fill range + dest_surface.FromInterval(fill_interval).GetInterval() == + fill_interval) { // make sure interval is a rectangle in dest surface + + if (fill_size * 8 != dest_surface.GetFormatBpp()) { + // Check if bits repeat for our fill_size + const u32 dest_bytes_per_pixel = std::max(dest_surface.GetFormatBpp() / 8, 1u); + std::vector fill_test(fill_size * dest_bytes_per_pixel); + + for (u32 i = 0; i < dest_bytes_per_pixel; ++i) + std::memcpy(&fill_test[i * fill_size], &fill_data[0], fill_size); + + for (u32 i = 0; i < fill_size; ++i) + if (std::memcmp(&fill_test[dest_bytes_per_pixel * i], &fill_test[0], + dest_bytes_per_pixel) != 0) + return false; + + if (dest_surface.GetFormatBpp() == 4 && (fill_test[0] & 0xF) != (fill_test[0] >> 4)) + return false; + } + return true; + } + return false; +} + +template +bool SurfaceBase::CanCopy(const SurfaceParams& dest_surface, + SurfaceInterval copy_interval) const { + SurfaceParams subrect_params = dest_surface.FromInterval(copy_interval); + ASSERT(subrect_params.GetInterval() == copy_interval); + if (CanSubRect(subrect_params)) + return true; + + if (CanFill(dest_surface, copy_interval)) + return true; + + return false; +} + +template +SurfaceInterval SurfaceBase::GetCopyableInterval(const SurfaceParams& params) const { + SurfaceInterval result{}; + const u32 tile_align = params.BytesInPixels(params.is_tiled ? 8 * 8 : 1); + const auto valid_regions = SurfaceRegions{params.GetInterval() & GetInterval()} - invalid_regions; + + for (auto& valid_interval : valid_regions) { + const SurfaceInterval aligned_interval{ + params.addr + Common::AlignUp(boost::icl::first(valid_interval) - params.addr, tile_align), + params.addr + Common::AlignDown(boost::icl::last_next(valid_interval) - params.addr, tile_align) + }; + + if (params.BytesInPixels(tile_align) > boost::icl::length(valid_interval) || + boost::icl::length(aligned_interval) == 0) { + continue; + } + + // Get the rectangle within aligned_interval + const u32 stride_bytes = params.BytesInPixels(params.stride) * (params.is_tiled ? 8 : 1); + SurfaceInterval rect_interval{ + params.addr + Common::AlignUp(boost::icl::first(aligned_interval) - params.addr, stride_bytes), + params.addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - params.addr, stride_bytes), + }; + + if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { + // 1 row + rect_interval = aligned_interval; + } else if (boost::icl::length(rect_interval) == 0) { + // 2 rows that do not make a rectangle, return the larger one + const SurfaceInterval row1{boost::icl::first(aligned_interval), + boost::icl::first(rect_interval)}; + const SurfaceInterval row2{boost::icl::first(rect_interval), + boost::icl::last_next(aligned_interval)}; + rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; + } + + if (boost::icl::length(rect_interval) > boost::icl::length(result)) { + result = rect_interval; + } + } + return result; +} + +template +auto SurfaceBase::CreateWatcher() -> std::shared_ptr { + S* derived = reinterpret_cast(this); + auto watcher = std::make_shared(std::move(derived->weak_from_this())); + watchers[watcher_count++] = watcher; + return watcher; +} + +template +void SurfaceBase::InvalidateAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + } + } +} + +template +void SurfaceBase::UnlinkAllWatcher() { + for (const auto& watcher : watchers) { + if (auto locked = watcher.lock()) { + locked->valid = false; + locked->surface.reset(); + } + } + + watchers = {}; + watcher_count = 0; +} + +} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/surface_params.cpp b/src/video_core/rasterizer_cache/surface_params.cpp index 90f2776ea..165b60146 100644 --- a/src/video_core/rasterizer_cache/surface_params.cpp +++ b/src/video_core/rasterizer_cache/surface_params.cpp @@ -6,7 +6,7 @@ #include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_cache/surface_params.h" -namespace OpenGL { +namespace VideoCore { SurfaceParams SurfaceParams::FromInterval(SurfaceInterval interval) const { SurfaceParams params = *this; @@ -64,47 +64,6 @@ SurfaceInterval SurfaceParams::GetSubRectInterval(Common::Rectangle unscale return {addr + BytesInPixels(pixel_offset), addr + BytesInPixels(pixel_offset + pixels)}; } -SurfaceInterval SurfaceParams::GetCopyableInterval(const Surface& src_surface) const { - SurfaceInterval result{}; - const auto valid_regions = - SurfaceRegions(GetInterval() & src_surface->GetInterval()) - src_surface->invalid_regions; - for (auto& valid_interval : valid_regions) { - const SurfaceInterval aligned_interval{ - addr + Common::AlignUp(boost::icl::first(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1)), - addr + Common::AlignDown(boost::icl::last_next(valid_interval) - addr, - BytesInPixels(is_tiled ? 8 * 8 : 1))}; - - if (BytesInPixels(is_tiled ? 8 * 8 : 1) > boost::icl::length(valid_interval) || - boost::icl::length(aligned_interval) == 0) { - continue; - } - - // Get the rectangle within aligned_interval - const u32 stride_bytes = BytesInPixels(stride) * (is_tiled ? 8 : 1); - SurfaceInterval rect_interval{ - addr + Common::AlignUp(boost::icl::first(aligned_interval) - addr, stride_bytes), - addr + Common::AlignDown(boost::icl::last_next(aligned_interval) - addr, stride_bytes), - }; - if (boost::icl::first(rect_interval) > boost::icl::last_next(rect_interval)) { - // 1 row - rect_interval = aligned_interval; - } else if (boost::icl::length(rect_interval) == 0) { - // 2 rows that do not make a rectangle, return the larger one - const SurfaceInterval row1{boost::icl::first(aligned_interval), - boost::icl::first(rect_interval)}; - const SurfaceInterval row2{boost::icl::first(rect_interval), - boost::icl::last_next(aligned_interval)}; - rect_interval = (boost::icl::length(row1) > boost::icl::length(row2)) ? row1 : row2; - } - - if (boost::icl::length(rect_interval) > boost::icl::length(result)) { - result = rect_interval; - } - } - return result; -} - Common::Rectangle SurfaceParams::GetSubRect(const SurfaceParams& sub_surface) const { const u32 begin_pixel_index = PixelsInBytes(sub_surface.addr - addr); diff --git a/src/video_core/rasterizer_cache/surface_params.h b/src/video_core/rasterizer_cache/surface_params.h index 26c54e10c..8d5f75869 100644 --- a/src/video_core/rasterizer_cache/surface_params.h +++ b/src/video_core/rasterizer_cache/surface_params.h @@ -11,16 +11,13 @@ #include "common/math_util.h" #include "video_core/rasterizer_cache/pixel_format.h" -namespace OpenGL { - -class CachedSurface; -using Surface = std::shared_ptr; +namespace VideoCore { using SurfaceInterval = boost::icl::right_open_interval; class SurfaceParams { public: - // Surface match traits + /// Surface match traits bool ExactMatch(const SurfaceParams& other_surface) const; bool CanSubRect(const SurfaceParams& sub_surface) const; bool CanExpand(const SurfaceParams& expanded_surface) const; @@ -29,13 +26,10 @@ public: Common::Rectangle GetSubRect(const SurfaceParams& sub_surface) const; Common::Rectangle GetScaledSubRect(const SurfaceParams& sub_surface) const; - // Returns the outer rectangle containing "interval" + /// Returns the outer rectangle containing "interval" SurfaceParams FromInterval(SurfaceInterval interval) const; SurfaceInterval GetSubRectInterval(Common::Rectangle unscaled_rect) const; - // Returns the region of the biggest valid rectange within interval - SurfaceInterval GetCopyableInterval(const Surface& src_surface) const; - /// Updates remaining members from the already set addr, width, height and pixel_format void UpdateParams() { if (stride == 0) { @@ -57,7 +51,7 @@ public: } u32 GetFormatBpp() const { - return OpenGL::GetFormatBpp(pixel_format); + return VideoCore::GetFormatBpp(pixel_format); } u32 GetScaledWidth() const { diff --git a/src/video_core/rasterizer_cache/texture_runtime.cpp b/src/video_core/rasterizer_cache/texture_runtime.cpp deleted file mode 100644 index 588515a0e..000000000 --- a/src/video_core/rasterizer_cache/texture_runtime.cpp +++ /dev/null @@ -1,261 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "common/scope_exit.h" -#include "video_core/rasterizer_cache/utils.h" -#include "video_core/rasterizer_cache/texture_runtime.h" -#include "video_core/renderer_opengl/gl_driver.h" -#include "video_core/renderer_opengl/gl_state.h" - -namespace OpenGL { - -GLbitfield MakeBufferMask(SurfaceType type) { - switch (type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - return GL_COLOR_BUFFER_BIT; - case SurfaceType::Depth: - return GL_DEPTH_BUFFER_BIT; - case SurfaceType::DepthStencil: - return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - return GL_COLOR_BUFFER_BIT; -} - -TextureRuntime::TextureRuntime(Driver& driver) : driver(driver) { - read_fbo.Create(); - draw_fbo.Create(); -} - -void TextureRuntime::ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy, - PixelFormat format, std::span pixels) { - - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state{}; - state.ResetTexture(texture.handle); - state.draw.read_framebuffer = read_fbo.handle; - state.Apply(); - - switch (copy.surface_type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, - copy.texture_level); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - break; - case SurfaceType::Depth: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, - copy.texture_level); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - break; - case SurfaceType::DepthStencil: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, copy.texture_level); - break; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - // TODO: Use PBO here - const FormatTuple& tuple = GetFormatTuple(format); - glReadPixels(copy.texture_offset.x, copy.texture_offset.y, - copy.texture_offset.x + copy.texture_extent.width, - copy.texture_offset.y + copy.texture_extent.height, - tuple.format, tuple.type, pixels.data() + copy.buffer_offset); -} - -bool TextureRuntime::ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - // Setup scissor rectangle according to the clear rectangle - OpenGLState state{}; - state.scissor.enabled = true; - state.scissor.x = clear.rect.offset.x; - state.scissor.y = clear.rect.offset.y; - state.scissor.width = clear.rect.extent.width; - state.scissor.height = clear.rect.extent.height; - state.draw.draw_framebuffer = draw_fbo.handle; - state.Apply(); - - switch (clear.surface_type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, - clear.texture_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, - 0); - - state.color_mask.red_enabled = true; - state.color_mask.green_enabled = true; - state.color_mask.blue_enabled = true; - state.color_mask.alpha_enabled = true; - state.Apply(); - - glClearBufferfv(GL_COLOR, 0, value.color.AsArray()); - break; - case SurfaceType::Depth: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, - clear.texture_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - state.depth.write_mask = GL_TRUE; - state.Apply(); - - glClearBufferfv(GL_DEPTH, 0, &value.depth); - break; - case SurfaceType::DepthStencil: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - texture.handle, clear.texture_level); - - state.depth.write_mask = GL_TRUE; - state.stencil.write_mask = -1; - state.Apply(); - - glClearBufferfi(GL_DEPTH_STENCIL, 0, value.depth, value.stencil); - break; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - return true; -} - -bool TextureRuntime::CopyTextures(OGLTexture& source, OGLTexture& dest, const TextureCopy& copy) { - return true; -} - -bool TextureRuntime::BlitTextures(OGLTexture& source, OGLTexture& dest, const TextureBlit& blit) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state{}; - state.draw.read_framebuffer = read_fbo.handle; - state.draw.draw_framebuffer = draw_fbo.handle; - state.Apply(); - - auto BindAttachment = [&blit, &source, &dest](GLenum attachment, u32 src_tex, u32 dst_tex) -> void { - const GLenum src_target = source.target == GL_TEXTURE_CUBE_MAP ? - GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.src_layer : source.target; - const GLenum dst_target = dest.target == GL_TEXTURE_CUBE_MAP ? - GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.dst_layer : dest.target; - - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, attachment, src_target, src_tex, blit.src_level); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, dst_target, dst_tex, blit.dst_level); - }; - - switch (blit.surface_type) { - case SurfaceType::Color: - case SurfaceType::Texture: - case SurfaceType::Fill: - // Bind only color - BindAttachment(GL_COLOR_ATTACHMENT0, source.handle, dest.handle); - BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, 0, 0); - break; - case SurfaceType::Depth: - // Bind only depth - BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); - BindAttachment(GL_DEPTH_ATTACHMENT, source.handle, dest.handle); - BindAttachment(GL_STENCIL_ATTACHMENT, 0, 0); - break; - case SurfaceType::DepthStencil: - // Bind to combined depth + stencil - BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); - BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, source.handle, dest.handle); - break; - default: - UNREACHABLE_MSG("Invalid surface type!"); - } - - // TODO (wwylele): use GL_NEAREST for shadow map texture - // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but - // doing linear intepolation componentwise would cause incorrect value. However, for a - // well-programmed game this code path should be rarely executed for shadow map with - // inconsistent scale. - const GLbitfield buffer_mask = MakeBufferMask(blit.surface_type); - const GLenum filter = buffer_mask == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST; - glBlitFramebuffer(blit.src_region.start.x, blit.src_region.start.y, - blit.src_region.end.x, blit.src_region.end.y, - blit.dst_region.start.x, blit.dst_region.start.y, - blit.dst_region.end.x, blit.dst_region.end.y, - buffer_mask, filter); - - return true; -} - -void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) { - OpenGLState prev_state = OpenGLState::GetCurState(); - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state{}; - state.texture_units[0].texture_2d = texture.handle; - state.Apply(); - - glActiveTexture(GL_TEXTURE0); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); - - glGenerateMipmap(GL_TEXTURE_2D); -} - -const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { - const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER; - const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT; - auto& search = upload ? upload_buffers : download_buffers; - - // Attempt to find a free buffer that fits the requested data - for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) { - if (!upload || it->IsFree()) { - return *it; - } - } - - OGLBuffer buffer{}; - buffer.Create(); - - glBindBuffer(target, buffer.handle); - - // Allocate a new buffer and map the data to the host - std::byte* data = nullptr; - if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) { - const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT; - glBufferStorageEXT(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT | - GL_MAP_COHERENT_BIT_EXT); - data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | - GL_MAP_COHERENT_BIT_EXT)); - } else if (driver.HasArbBufferStorage()) { - const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT; - glBufferStorage(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT); - data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT | - GL_MAP_COHERENT_BIT)); - } else { - UNIMPLEMENTED(); - } - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - - StagingBuffer staging = { - .buffer = std::move(buffer), - .mapped = std::span{data, size}, - .size = size - }; - - const auto& it = search.emplace(std::move(staging)); - return *it; -} - -} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/types.h b/src/video_core/rasterizer_cache/types.h index be85a8c01..60b8ca025 100644 --- a/src/video_core/rasterizer_cache/types.h +++ b/src/video_core/rasterizer_cache/types.h @@ -7,7 +7,7 @@ #include "common/vector_math.h" #include "video_core/rasterizer_cache/pixel_format.h" -namespace OpenGL { +namespace VideoCore { struct Offset { constexpr auto operator<=>(const Offset&) const noexcept = default; diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index ddfa369d2..c8e4ab1f2 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -13,51 +13,7 @@ #include "video_core/renderer_opengl/gl_vars.h" #include "video_core/video_core.h" -namespace OpenGL { - -constexpr FormatTuple tex_tuple = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; - -static constexpr std::array depth_format_tuples = {{ - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 - {}, - {GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 -}}; - -static constexpr std::array fb_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 - {GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 -}}; - -// Same as above, with minor changes for OpenGL ES. Replaced -// GL_UNSIGNED_INT_8_8_8_8 with GL_UNSIGNED_BYTE and -// GL_BGR with GL_RGB -static constexpr std::array fb_format_tuples_oes = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 - {GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE}, // RGB8 - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 - {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 - {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 -}}; - -const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { - const SurfaceType type = GetFormatType(pixel_format); - const std::size_t format_index = static_cast(pixel_format); - - if (type == SurfaceType::Color) { - ASSERT(format_index < fb_format_tuples.size()); - return (GLES ? fb_format_tuples_oes : fb_format_tuples)[format_index]; - } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) { - const std::size_t tuple_idx = format_index - 14; - ASSERT(tuple_idx < depth_format_tuples.size()); - return depth_format_tuples[tuple_idx]; - } - - return tex_tuple; -} +namespace VideoCore { void SwizzleTexture(const SurfaceParams& params, u32 start_offset, std::span source_linear, std::span dest_tiled) { diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index e66016e14..1f749ae24 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -9,15 +9,7 @@ #include "video_core/rasterizer_cache/pixel_format.h" #include "video_core/rasterizer_cache/types.h" -namespace OpenGL { - -struct FormatTuple { - int internal_format; - u32 format; - u32 type; -}; - -const FormatTuple& GetFormatTuple(PixelFormat pixel_format); +namespace VideoCore { struct HostTextureTag { PixelFormat format{}; @@ -70,15 +62,15 @@ void UnswizzleTexture(const SurfaceParams& params, u32 start_offset, namespace std { template <> -struct hash { - std::size_t operator()(const OpenGL::HostTextureTag& tag) const noexcept { +struct hash { + std::size_t operator()(const VideoCore::HostTextureTag& tag) const noexcept { return tag.Hash(); } }; template <> -struct hash { - std::size_t operator()(const OpenGL::TextureCubeConfig& config) const noexcept { +struct hash { + std::size_t operator()(const VideoCore::TextureCubeConfig& config) const noexcept { return config.Hash(); } }; diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp index 437675c46..d5202169f 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.cpp @@ -60,8 +60,8 @@ void main() { vao.Create(); } - PixelFormat GetSourceFormat() const override { - return PixelFormat::RGBA4; + VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::RGBA4; } void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, @@ -170,8 +170,8 @@ void main() { } } - PixelFormat GetSourceFormat() const override { - return PixelFormat::D24S8; + VideoCore::PixelFormat GetSourceFormat() const override { + return VideoCore::PixelFormat::D24S8; } void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, @@ -246,18 +246,18 @@ FormatReinterpreterOpenGL::FormatReinterpreterOpenGL() { const std::string_view vendor{reinterpret_cast(glGetString(GL_VENDOR))}; const std::string_view version{reinterpret_cast(glGetString(GL_VERSION))}; - auto Register = [this](PixelFormat dest, std::unique_ptr&& obj) { + auto Register = [this](VideoCore::PixelFormat dest, std::unique_ptr&& obj) { const u32 dst_index = static_cast(dest); return reinterpreters[dst_index].push_back(std::move(obj)); }; - Register(PixelFormat::RGBA8, std::make_unique()); + Register(VideoCore::PixelFormat::RGBA8, std::make_unique()); LOG_INFO(Render_OpenGL, "Using shader for D24S8 to RGBA8 reinterpretation"); - Register(PixelFormat::RGB5A1, std::make_unique()); + Register(VideoCore::PixelFormat::RGB5A1, std::make_unique()); } -auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(PixelFormat dst_format) +auto FormatReinterpreterOpenGL::GetPossibleReinterpretations(VideoCore::PixelFormat dst_format) -> const ReinterpreterList& { return reinterpreters[static_cast(dst_format)]; } diff --git a/src/video_core/renderer_opengl/gl_format_reinterpreter.h b/src/video_core/renderer_opengl/gl_format_reinterpreter.h index 9e1cd06db..0e9cc61e7 100644 --- a/src/video_core/renderer_opengl/gl_format_reinterpreter.h +++ b/src/video_core/renderer_opengl/gl_format_reinterpreter.h @@ -22,7 +22,7 @@ public: virtual ~FormatReinterpreterBase() = default; - virtual PixelFormat GetSourceFormat() const = 0; + virtual VideoCore::PixelFormat GetSourceFormat() const = 0; virtual void Reinterpret(const OGLTexture& src_tex, Common::Rectangle src_rect, const OGLTexture& dst_tex, Common::Rectangle dst_rect) = 0; @@ -38,10 +38,10 @@ public: FormatReinterpreterOpenGL(); ~FormatReinterpreterOpenGL() = default; - const ReinterpreterList& GetPossibleReinterpretations(PixelFormat dst_format); + const ReinterpreterList& GetPossibleReinterpretations(VideoCore::PixelFormat dst_format); private: - std::array reinterpreters; + std::array reinterpreters; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 42597138e..324b1dd32 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -39,7 +39,8 @@ static bool IsVendorIntel() { #endif RasterizerOpenGL::RasterizerOpenGL(Frontend::EmuWindow& emu_window, Driver& driver) - : driver(driver), is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), + : driver{driver}, runtime{driver}, res_cache{*this, runtime}, + is_amd(IsVendorAmd()), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE, is_amd), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE, false), index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE, false), texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE, false), @@ -526,8 +527,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { regs.rasterizer.viewport_corner.y // bottom }; - Surface color_surface; - Surface depth_surface; + RasterizerCache::Surface color_surface, depth_surface; Common::Rectangle surfaces_rect; std::tie(color_surface, depth_surface, surfaces_rect) = res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, viewport_rect_unscaled); @@ -638,7 +638,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { const auto BindCubeFace = [&](GLuint& target, Pica::TexturingRegs::CubeFace face, Pica::Texture::TextureInfo& info) { info.physical_address = regs.texturing.GetCubePhysicalAddress(face); - Surface surface = res_cache.GetTextureSurface(info); + auto surface = res_cache.GetTextureSurface(info); if (surface != nullptr) { CheckBarrier(target = surface->texture.handle); @@ -657,7 +657,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; switch (texture.config.type.Value()) { case TextureType::Shadow2D: { - Surface surface = res_cache.GetTextureSurface(texture); + auto surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { CheckBarrier(state.image_shadow_texture_px = surface->texture.handle); } else { @@ -677,23 +677,26 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { BindCubeFace(state.image_shadow_texture_nz, CubeFace::NegativeZ, info); continue; } - case TextureType::TextureCube: + case TextureType::TextureCube: { using CubeFace = Pica::TexturingRegs::CubeFace; - TextureCubeConfig config; - config.px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX); - config.nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX); - config.py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY); - config.ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY); - config.pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ); - config.nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ); - config.width = texture.config.width; - config.format = texture.format; + const VideoCore::TextureCubeConfig config = { + .px = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX), + .nx = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX), + .py = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY), + .ny = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY), + .pz = regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ), + .nz = regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ), + .width = texture.config.width, + .format = texture.format + }; + state.texture_cube_unit.texture_cube = - res_cache.GetTextureCube(config).texture.handle; + res_cache.GetTextureCube(config)->texture.handle; texture_cube_sampler.SyncWithConfig(texture.config); state.texture_units[texture_index].texture_2d = 0; continue; // Texture unit 0 setup finished. Continue to next unit + } default: break; } @@ -702,7 +705,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } texture_samplers[texture_index].SyncWithConfig(texture.config); - Surface surface = res_cache.GetTextureSurface(texture); + auto surface = res_cache.GetTextureSurface(texture); if (surface != nullptr) { CheckBarrier(state.texture_units[texture_index].texture_2d = surface->texture.handle); @@ -721,19 +724,15 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) { } } + // The game is trying to use a surface as a texture and framebuffer at the same time + // which causes unpredictable behavior on the host. + // Making a copy to sample from eliminates this issue and seems to be fairly cheap. OGLTexture temp_tex; if (need_duplicate_texture) { - const auto& tuple = GetFormatTuple(color_surface->pixel_format); - const GLsizei levels = color_surface->max_level + 1; + temp_tex = runtime.Allocate2D(color_surface->GetScaledWidth(), color_surface->GetScaledHeight(), + color_surface->pixel_format); - // The game is trying to use a surface as a texture and framebuffer at the same time - // which causes unpredictable behavior on the host. - // Making a copy to sample from eliminates this issue and seems to be fairly cheap. - temp_tex.Create(); - temp_tex.Allocate(GL_TEXTURE_2D, levels, tuple.internal_format, - color_surface->GetScaledWidth(), color_surface->GetScaledHeight()); - - temp_tex.CopyFrom(color_surface->texture, GL_TEXTURE_2D, levels, + temp_tex.CopyFrom(color_surface->texture, GL_TEXTURE_2D, color_surface->max_level + 1, color_surface->GetScaledWidth(), color_surface->GetScaledHeight()); for (auto& unit : state.texture_units) { @@ -1364,40 +1363,37 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(PAddr addr, u32 size) { bool RasterizerOpenGL::AccelerateDisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { MICROPROFILE_SCOPE(OpenGL_Blits); - SurfaceParams src_params; + VideoCore::SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); src_params.width = config.output_width; src_params.stride = config.input_width; src_params.height = config.output_height; src_params.is_tiled = !config.input_linear; - src_params.pixel_format = PixelFormatFromGPUPixelFormat(config.input_format); + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.input_format); src_params.UpdateParams(); - SurfaceParams dst_params; + VideoCore::SurfaceParams dst_params; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = config.scaling != config.NoScale ? config.output_width.Value() / 2 : config.output_width.Value(); dst_params.height = config.scaling == config.ScaleXY ? config.output_height.Value() / 2 : config.output_height.Value(); dst_params.is_tiled = config.input_linear != config.dont_swizzle; - dst_params.pixel_format = PixelFormatFromGPUPixelFormat(config.output_format); + dst_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.output_format); dst_params.UpdateParams(); - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); if (src_surface == nullptr) return false; dst_params.res_scale = src_surface->res_scale; - Common::Rectangle dst_rect; - Surface dst_surface; - std::tie(dst_surface, dst_rect) = - res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, false); - if (dst_surface == nullptr) + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, false); + if (dst_surface == nullptr) { return false; + } if (src_surface->is_tiled != dst_surface->is_tiled) std::swap(src_rect.top, src_rect.bottom); @@ -1444,7 +1440,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon return false; } - SurfaceParams src_params; + VideoCore::SurfaceParams src_params; src_params.addr = config.GetPhysicalInputAddress(); src_params.stride = input_width + input_gap; // stride in bytes src_params.width = input_width; // width in bytes @@ -1452,9 +1448,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon src_params.size = ((src_params.height - 1) * src_params.stride) + src_params.width; src_params.end = src_params.addr + src_params.size; - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = res_cache.GetTexCopySurface(src_params); + auto [src_surface, src_rect] = res_cache.GetTexCopySurface(src_params); if (src_surface == nullptr) { return false; } @@ -1466,7 +1460,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon return false; } - SurfaceParams dst_params = *src_surface; + VideoCore::SurfaceParams dst_params = *src_surface; dst_params.addr = config.GetPhysicalOutputAddress(); dst_params.width = src_rect.GetWidth() / src_surface->res_scale; dst_params.stride = dst_params.width + src_surface->PixelsInBytes( @@ -1477,15 +1471,13 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon // Since we are going to invalidate the gap if there is one, we will have to load it first const bool load_gap = output_gap != 0; - Common::Rectangle dst_rect; - Surface dst_surface; - std::tie(dst_surface, dst_rect) = - res_cache.GetSurfaceSubRect(dst_params, ScaleMatch::Upscale, load_gap); + auto [dst_surface, dst_rect] = + res_cache.GetSurfaceSubRect(dst_params, VideoCore::ScaleMatch::Upscale, load_gap); if (dst_surface == nullptr) { return false; } - if (dst_surface->type == SurfaceType::Texture) { + if (dst_surface->type == VideoCore::SurfaceType::Texture) { return false; } @@ -1498,7 +1490,7 @@ bool RasterizerOpenGL::AccelerateTextureCopy(const GPU::Regs::DisplayTransferCon } bool RasterizerOpenGL::AccelerateFill(const GPU::Regs::MemoryFillConfig& config) { - Surface dst_surface = res_cache.GetFillSurface(config); + auto dst_surface = res_cache.GetFillSurface(config); if (dst_surface == nullptr) return false; @@ -1514,19 +1506,17 @@ bool RasterizerOpenGL::AccelerateDisplay(const GPU::Regs::FramebufferConfig& con } MICROPROFILE_SCOPE(OpenGL_CacheManagement); - SurfaceParams src_params; + VideoCore::SurfaceParams src_params; src_params.addr = framebuffer_addr; src_params.width = std::min(config.width.Value(), pixel_stride); src_params.height = config.height; src_params.stride = pixel_stride; src_params.is_tiled = false; - src_params.pixel_format = PixelFormatFromGPUPixelFormat(config.color_format); + src_params.pixel_format = VideoCore::PixelFormatFromGPUPixelFormat(config.color_format); src_params.UpdateParams(); - Common::Rectangle src_rect; - Surface src_surface; - std::tie(src_surface, src_rect) = - res_cache.GetSurfaceSubRect(src_params, ScaleMatch::Ignore, true); + const auto [src_surface, src_rect] = + res_cache.GetSurfaceSubRect(src_params, VideoCore::ScaleMatch::Ignore, true); if (src_surface == nullptr) { return false; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1cb2489fc..eaf1751a3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -6,10 +6,10 @@ #include "common/vector_math.h" #include "core/hw/gpu.h" #include "video_core/pica_types.h" -#include "video_core/rasterizer_cache/rasterizer_cache.h" #include "video_core/rasterizer_accelerated.h" #include "video_core/regs_lighting.h" #include "video_core/regs_texturing.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -252,6 +252,7 @@ private: OpenGLState state; GLuint default_texture; + TextureRuntime runtime; RasterizerCache res_cache; std::vector vertex_batch; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp new file mode 100644 index 000000000..2c731982f --- /dev/null +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -0,0 +1,490 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/scope_exit.h" +#include "video_core/rasterizer_cache/utils.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" +#include "video_core/renderer_opengl/gl_driver.h" +#include "video_core/renderer_opengl/gl_state.h" + +namespace OpenGL { + +constexpr FormatTuple DEFAULT_TUPLE = {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}; + +static constexpr std::array DEPTH_TUPLES = { + FormatTuple{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 + FormatTuple{}, + FormatTuple{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + FormatTuple{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 +}; + +static constexpr std::array COLOR_TUPLES = { + FormatTuple{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}, // RGBA8 + FormatTuple{GL_RGB8, GL_BGR, GL_UNSIGNED_BYTE}, // RGB8 + FormatTuple{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 + FormatTuple{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 +}; + +static constexpr std::array COLOR_TUPLES_OES = { + FormatTuple{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // RGBA8 + FormatTuple{GL_RGB8, GL_RGB, GL_UNSIGNED_BYTE}, // RGB8 + FormatTuple{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1}, // RGB5A1 + FormatTuple{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // RGB565 + FormatTuple{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4}, // RGBA4 +}; + +GLbitfield MakeBufferMask(VideoCore::SurfaceType type) { + switch (type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + return GL_COLOR_BUFFER_BIT; + case VideoCore::SurfaceType::Depth: + return GL_DEPTH_BUFFER_BIT; + case VideoCore::SurfaceType::DepthStencil: + return GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return GL_COLOR_BUFFER_BIT; +} + +TextureRuntime::TextureRuntime(Driver& driver) : driver(driver) { + read_fbo.Create(); + draw_fbo.Create(); +} + +const StagingBuffer& TextureRuntime::FindStaging(u32 size, bool upload) { + const GLenum target = upload ? GL_PIXEL_UNPACK_BUFFER : GL_PIXEL_PACK_BUFFER; + const GLbitfield access = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT; + auto& search = upload ? upload_buffers : download_buffers; + + // Attempt to find a free buffer that fits the requested data + for (auto it = search.lower_bound({.size = size}); it != search.end(); it++) { + if (!upload || it->IsFree()) { + return *it; + } + } + + OGLBuffer buffer{}; + buffer.Create(); + + glBindBuffer(target, buffer.handle); + + // Allocate a new buffer and map the data to the host + std::byte* data = nullptr; + if (driver.IsOpenGLES() && driver.HasExtBufferStorage()) { + const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT_EXT; + glBufferStorageEXT(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT_EXT | + GL_MAP_COHERENT_BIT_EXT); + data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT_EXT | + GL_MAP_COHERENT_BIT_EXT)); + } else if (driver.HasArbBufferStorage()) { + const GLbitfield storage = upload ? GL_MAP_WRITE_BIT : GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT; + glBufferStorage(target, size, nullptr, storage | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT); + data = reinterpret_cast(glMapBufferRange(target, 0, size, access | GL_MAP_PERSISTENT_BIT | + GL_MAP_COHERENT_BIT)); + } else { + UNIMPLEMENTED(); + } + + glBindBuffer(target, 0); + + StagingBuffer staging = { + .buffer = std::move(buffer), + .mapped = std::span{data, size}, + .size = size + }; + + const auto& it = search.emplace(std::move(staging)); + return *it; +} + +const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_format) { + const auto type = GetFormatType(pixel_format); + const std::size_t format_index = static_cast(pixel_format); + + if (type == VideoCore::SurfaceType::Color) { + ASSERT(format_index < COLOR_TUPLES.size()); + return (driver.IsOpenGLES() ? COLOR_TUPLES_OES : COLOR_TUPLES)[format_index]; + } else if (type == VideoCore::SurfaceType::Depth || + type == VideoCore::SurfaceType::DepthStencil) { + const std::size_t tuple_idx = format_index - 14; + ASSERT(tuple_idx < DEPTH_TUPLES.size()); + return DEPTH_TUPLES[tuple_idx]; + } + + return DEFAULT_TUPLE; +} + +OGLTexture TextureRuntime::Allocate2D(u32 width, u32 height, VideoCore::PixelFormat format) { + const auto& tuple = GetFormatTuple(format); + auto recycled_tex = texture2d_recycler.find({format, width, height}); + if (recycled_tex != texture2d_recycler.end()) { + OGLTexture texture = std::move(recycled_tex->second); + texture2d_recycler.erase(recycled_tex); + return texture; + } + + // Allocate the 2D texture + OGLTexture texture{}; + texture.Create(); + texture.Allocate(GL_TEXTURE_2D, std::bit_width(std::max(width, height)), + tuple.internal_format, width, height); + + return texture; +} + +OGLTexture TextureRuntime::AllocateCubeMap(u32 width, VideoCore::PixelFormat format) { + const auto& tuple = GetFormatTuple(format); + + // Allocate the cube texture + OGLTexture texture{}; + texture.Create(); + texture.Allocate(GL_TEXTURE_CUBE_MAP, std::bit_width(width), + tuple.internal_format, width, width); + + return texture; +} + +void TextureRuntime::ReadTexture(OGLTexture& texture, const VideoCore::BufferTextureCopy& copy, + VideoCore::PixelFormat format) { + + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state{}; + state.ResetTexture(texture.handle); + state.draw.read_framebuffer = read_fbo.handle; + state.Apply(); + + switch (copy.surface_type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, + copy.texture_level); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + break; + case VideoCore::SurfaceType::Depth: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, + copy.texture_level); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + break; + case VideoCore::SurfaceType::DepthStencil: + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, copy.texture_level); + break; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + const FormatTuple& tuple = GetFormatTuple(format); + glReadPixels(copy.texture_offset.x, copy.texture_offset.y, + copy.texture_offset.x + copy.texture_extent.width, + copy.texture_offset.y + copy.texture_extent.height, + tuple.format, tuple.type, + reinterpret_cast(copy.buffer_offset)); +} + +bool TextureRuntime::ClearTexture(OGLTexture& texture, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + // Setup scissor rectangle according to the clear rectangle + OpenGLState state{}; + state.scissor.enabled = true; + state.scissor.x = clear.rect.offset.x; + state.scissor.y = clear.rect.offset.y; + state.scissor.width = clear.rect.extent.width; + state.scissor.height = clear.rect.extent.height; + state.draw.draw_framebuffer = draw_fbo.handle; + state.Apply(); + + switch (clear.surface_type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture.handle, + clear.texture_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, + 0); + + state.color_mask.red_enabled = true; + state.color_mask.green_enabled = true; + state.color_mask.blue_enabled = true; + state.color_mask.alpha_enabled = true; + state.Apply(); + + glClearBufferfv(GL_COLOR, 0, value.color.AsArray()); + break; + case VideoCore::SurfaceType::Depth: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, texture.handle, + clear.texture_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); + + state.depth.write_mask = GL_TRUE; + state.Apply(); + + glClearBufferfv(GL_DEPTH, 0, &value.depth); + break; + case VideoCore::SurfaceType::DepthStencil: + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, + texture.handle, clear.texture_level); + + state.depth.write_mask = GL_TRUE; + state.stencil.write_mask = -1; + state.Apply(); + + glClearBufferfi(GL_DEPTH_STENCIL, 0, value.depth, value.stencil); + break; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + return true; +} + +bool TextureRuntime::CopyTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureCopy& copy) { + return true; +} + +bool TextureRuntime::BlitTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureBlit& blit) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state{}; + state.draw.read_framebuffer = read_fbo.handle; + state.draw.draw_framebuffer = draw_fbo.handle; + state.Apply(); + + auto BindAttachment = [&blit, &source, &dest](GLenum attachment, u32 src_tex, u32 dst_tex) -> void { + const GLenum src_target = source.target == GL_TEXTURE_CUBE_MAP ? + GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.src_layer : source.target; + const GLenum dst_target = dest.target == GL_TEXTURE_CUBE_MAP ? + GL_TEXTURE_CUBE_MAP_POSITIVE_X + blit.dst_layer : dest.target; + + glFramebufferTexture2D(GL_READ_FRAMEBUFFER, attachment, src_target, src_tex, blit.src_level); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, attachment, dst_target, dst_tex, blit.dst_level); + }; + + switch (blit.surface_type) { + case VideoCore::SurfaceType::Color: + case VideoCore::SurfaceType::Texture: + case VideoCore::SurfaceType::Fill: + // Bind only color + BindAttachment(GL_COLOR_ATTACHMENT0, source.handle, dest.handle); + BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, 0, 0); + break; + case VideoCore::SurfaceType::Depth: + // Bind only depth + BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); + BindAttachment(GL_DEPTH_ATTACHMENT, source.handle, dest.handle); + BindAttachment(GL_STENCIL_ATTACHMENT, 0, 0); + break; + case VideoCore::SurfaceType::DepthStencil: + // Bind to combined depth + stencil + BindAttachment(GL_COLOR_ATTACHMENT0, 0, 0); + BindAttachment(GL_DEPTH_STENCIL_ATTACHMENT, source.handle, dest.handle); + break; + default: + UNREACHABLE_MSG("Invalid surface type!"); + } + + // TODO (wwylele): use GL_NEAREST for shadow map texture + // Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but + // doing linear intepolation componentwise would cause incorrect value. However, for a + // well-programmed game this code path should be rarely executed for shadow map with + // inconsistent scale. + const GLbitfield buffer_mask = MakeBufferMask(blit.surface_type); + const GLenum filter = buffer_mask == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST; + glBlitFramebuffer(blit.src_region.start.x, blit.src_region.start.y, + blit.src_region.end.x, blit.src_region.end.y, + blit.dst_region.start.x, blit.dst_region.start.y, + blit.dst_region.end.x, blit.dst_region.end.y, + buffer_mask, filter); + + return true; +} + +void TextureRuntime::GenerateMipmaps(OGLTexture& texture, u32 max_level) { + OpenGLState prev_state = OpenGLState::GetCurState(); + SCOPE_EXIT({ prev_state.Apply(); }); + + OpenGLState state{}; + state.texture_units[0].texture_2d = texture.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, max_level); + + glGenerateMipmap(GL_TEXTURE_2D); +} + +MICROPROFILE_DEFINE(RasterizerCache_TextureUL, "RasterizerCache", "Texture Upload", MP_RGB(128, 192, 64)); +void CachedSurface::UploadTexture(Common::Rectangle rect, const StagingBuffer& staging) { + MICROPROFILE_SCOPE(RasterizerCache_TextureUL); + + const FormatTuple& tuple = runtime.GetFormatTuple(pixel_format); + + // Load data from memory to the surface + GLint x0 = static_cast(rect.left); + GLint y0 = static_cast(rect.bottom); + std::size_t buffer_offset = (y0 * stride + x0) * GetBytesPerPixel(pixel_format); + + GLuint target_tex = texture.handle; + + // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in surface + OGLTexture unscaled_tex; + if (res_scale != 1) { + x0 = 0; + y0 = 0; + + unscaled_tex = runtime.Allocate2D(rect.GetWidth(), rect.GetHeight(), pixel_format); + target_tex = unscaled_tex.handle; + } + + OpenGLState cur_state = OpenGLState::GetCurState(); + + GLuint old_tex = cur_state.texture_units[0].texture_2d; + cur_state.texture_units[0].texture_2d = target_tex; + cur_state.Apply(); + + // Ensure no bad interactions with GL_UNPACK_ALIGNMENT + ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast(stride)); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, staging.buffer.handle); + + glActiveTexture(GL_TEXTURE0); + glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast(rect.GetWidth()), + static_cast(rect.GetHeight()), tuple.format, tuple.type, + reinterpret_cast(buffer_offset)); + + staging.Lock(); + + cur_state.texture_units[0].texture_2d = old_tex; + cur_state.Apply(); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + const Common::Rectangle from_rect{0, rect.GetHeight(), rect.GetWidth(), 0}; + /*if (!owner.texture_filterer->Filter(unscaled_tex, from_rect, texture, scaled_rect, type)) { + const TextureBlit texture_blit = { + .surface_type = type, + .src_level = 0, + .dst_level = 0, + .src_region = Region2D{ + .start = {0, 0}, + .end = {width, height} + }, + .dst_region = Region2D{ + .start = {rect.left, rect.bottom}, + .end = {rect.right, rect.top} + } + }; + + runtime.BlitTextures(unscaled_tex, texture, texture_blit); + }*/ + } + + InvalidateAllWatcher(); +} + +MICROPROFILE_DEFINE(RasterizerCache_TextureDL, "RasterizerCache", "Texture Download", MP_RGB(128, 192, 64)); +void CachedSurface::DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging) { + MICROPROFILE_SCOPE(RasterizerCache_TextureDL); + + const FormatTuple& tuple = runtime.GetFormatTuple(pixel_format); + + OpenGLState state = OpenGLState::GetCurState(); + OpenGLState prev_state = state; + SCOPE_EXIT({ prev_state.Apply(); }); + + // Ensure no bad interactions with GL_PACK_ALIGNMENT + ASSERT(stride * GetBytesPerPixel(pixel_format) % 4 == 0); + glPixelStorei(GL_PACK_ROW_LENGTH, static_cast(stride)); + glBindBuffer(GL_PIXEL_PACK_BUFFER, staging.buffer.handle); + const u32 buffer_offset = (rect.bottom * stride + rect.left) * GetBytesPerPixel(pixel_format); + + // If not 1x scale, blit scaled texture to a new 1x texture and use that to flush + if (res_scale != 1) { + auto scaled_rect = rect; + scaled_rect.left *= res_scale; + scaled_rect.top *= res_scale; + scaled_rect.right *= res_scale; + scaled_rect.bottom *= res_scale; + + OGLTexture unscaled_tex = runtime.Allocate2D(rect.GetWidth(), rect.GetHeight(), pixel_format); + + const VideoCore::TextureBlit texture_blit = { + .surface_type = type, + .src_level = 0, + .dst_level = 0, + .src_region = VideoCore::Region2D{ + .start = {scaled_rect.left, scaled_rect.bottom}, + .end = {scaled_rect.right, scaled_rect.top} + }, + .dst_region = VideoCore::Region2D{ + .start = {0, 0}, + .end = {rect.GetWidth(), rect.GetHeight()} + } + }; + + // Blit scaled texture to the unscaled one + runtime.BlitTextures(texture, unscaled_tex, texture_blit); + + state.texture_units[0].texture_2d = unscaled_tex.handle; + state.Apply(); + + glActiveTexture(GL_TEXTURE0); + + /*if (GLES) { + owner.texture_downloader_es->GetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, + rect.GetHeight(), rect.GetWidth(), + reinterpret_cast(buffer_offset)); + } else { + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); + }*/ + glGetTexImage(GL_TEXTURE_2D, 0, tuple.format, tuple.type, reinterpret_cast(buffer_offset)); + } else { + const u32 download_size = width * height * GetBytesPerPixel(pixel_format); + const VideoCore::BufferTextureCopy texture_download = { + .buffer_offset = buffer_offset, + .buffer_size = download_size, + .buffer_row_length = stride, + .buffer_height = height, + .surface_type = type, + .texture_level = 0, + .texture_offset = {rect.bottom, rect.left}, + .texture_extent = {rect.GetWidth(), rect.GetHeight()} + }; + + runtime.ReadTexture(texture, texture_download, pixel_format); + } + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_PACK_ROW_LENGTH, 0); +} + +} // namespace OpenGL diff --git a/src/video_core/rasterizer_cache/texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h similarity index 52% rename from src/video_core/rasterizer_cache/texture_runtime.h rename to src/video_core/renderer_opengl/gl_texture_runtime.h index d7758cef1..7eaf5efa4 100644 --- a/src/video_core/rasterizer_cache/texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -5,12 +5,18 @@ #pragma once #include #include +#include "video_core/rasterizer_cache/rasterizer_cache.h" +#include "video_core/rasterizer_cache/surface_base.h" #include "video_core/rasterizer_cache/types.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -struct FormatTuple; +struct FormatTuple { + GLint internal_format; + GLenum format; + GLenum type; +}; struct StagingBuffer { OGLBuffer buffer{}; @@ -50,6 +56,7 @@ class Driver; * Separating this into a class makes it easier to abstract graphics API code */ class TextureRuntime { + friend class CachedSurface; public: TextureRuntime(Driver& driver); ~TextureRuntime() = default; @@ -57,18 +64,28 @@ public: /// Maps an internal staging buffer of the provided size of pixel uploads/downloads const StagingBuffer& FindStaging(u32 size, bool upload); + /// Returns the OpenGL format tuple associated with the provided pixel format + const FormatTuple& GetFormatTuple(VideoCore::PixelFormat pixel_format); + + /// Allocates a 2D OpenGL texture with the specified dimentions and format + OGLTexture Allocate2D(u32 width, u32 height, VideoCore::PixelFormat format); + + /// Allocates an OpenGL cube map texture with the specified dimentions and format + OGLTexture AllocateCubeMap(u32 width, VideoCore::PixelFormat format); + /// Copies the GPU pixel data to the provided pixels buffer - void ReadTexture(OGLTexture& texture, const BufferTextureCopy& copy, - PixelFormat format, std::span pixels); + void ReadTexture(OGLTexture& texture, const VideoCore::BufferTextureCopy& copy, + VideoCore::PixelFormat format); /// Fills the rectangle of the texture with the clear value provided - bool ClearTexture(OGLTexture& texture, const TextureClear& clear, ClearValue value); + bool ClearTexture(OGLTexture& texture, const VideoCore::TextureClear& clear, + VideoCore::ClearValue value); /// Copies a rectangle of src_tex to another rectange of dst_rect - bool CopyTextures(OGLTexture& source, OGLTexture& dest, const TextureCopy& copy); + bool CopyTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureCopy& copy); /// Blits a rectangle of src_tex to another rectange of dst_rect - bool BlitTextures(OGLTexture& source, OGLTexture& dest, const TextureBlit& blit); + bool BlitTextures(OGLTexture& source, OGLTexture& dest, const VideoCore::TextureBlit& blit); /// Generates mipmaps for all the available levels of the texture void GenerateMipmaps(OGLTexture& texture, u32 max_level); @@ -76,8 +93,37 @@ public: private: Driver& driver; OGLFramebuffer read_fbo, draw_fbo; + std::unordered_multimap texture2d_recycler; + + // Staging buffers stored in increasing size std::multiset upload_buffers; std::multiset download_buffers; }; +class CachedSurface : public VideoCore::SurfaceBase { +public: + CachedSurface(VideoCore::SurfaceParams& params, TextureRuntime& runtime) + : VideoCore::SurfaceBase{params}, runtime{runtime} {} + ~CachedSurface() override = default; + + /// Uploads pixel data in staging to a rectangle region of the surface texture + void UploadTexture(Common::Rectangle rect, const StagingBuffer& staging); + + /// Downloads pixel data to staging from a rectangle region of the surface texture + void DownloadTexture(Common::Rectangle rect, const StagingBuffer& staging); + +private: + TextureRuntime& runtime; + +public: + OGLTexture texture{}; +}; + +struct Traits { + using Runtime = TextureRuntime; + using Surface = CachedSurface; +}; + +using RasterizerCache = VideoCore::RasterizerCache; + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/texture_downloader_es.cpp b/src/video_core/renderer_opengl/texture_downloader_es.cpp index 4d2339801..fe4c47bad 100644 --- a/src/video_core/renderer_opengl/texture_downloader_es.cpp +++ b/src/video_core/renderer_opengl/texture_downloader_es.cpp @@ -8,6 +8,7 @@ #include "common/logging/log.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_texture_runtime.h" #include "video_core/renderer_opengl/texture_downloader_es.h" #include "shaders/depth_to_color.frag" @@ -16,6 +17,17 @@ namespace OpenGL { +static constexpr std::array DEPTH_TUPLES_HACK = { + FormatTuple{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16 + FormatTuple{}, + FormatTuple{GL_DEPTH_COMPONENT24, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT}, // D24 + FormatTuple{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24S8 +}; + +const FormatTuple& GetFormatTuple(VideoCore::PixelFormat format) { + return DEPTH_TUPLES_HACK[static_cast(format)]; +} + /** * Self tests for the texture downloader */ @@ -75,13 +87,13 @@ void TextureDownloaderES::Test() { } }; LOG_INFO(Render_OpenGL, "GL_DEPTH24_STENCIL8 download test starting"); - test(GetFormatTuple(PixelFormat::D24S8), std::vector{}, 4096, + test(GetFormatTuple(VideoCore::PixelFormat::D24S8), std::vector{}, 4096, [](std::size_t idx) { return static_cast((idx << 8) | (idx & 0xFF)); }); LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT24 download test starting"); - test(GetFormatTuple(PixelFormat::D24), std::vector{}, 4096, + test(GetFormatTuple(VideoCore::PixelFormat::D24), std::vector{}, 4096, [](std::size_t idx) { return static_cast(idx << 8); }); LOG_INFO(Render_OpenGL, "GL_DEPTH_COMPONENT16 download test starting"); - test(GetFormatTuple(PixelFormat::D16), std::vector{}, 256, + test(GetFormatTuple(VideoCore::PixelFormat::D16), std::vector{}, 256, [](std::size_t idx) { return static_cast(idx); }); cur_state.Apply(); diff --git a/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp b/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp index 264a3a767..d5b0859ca 100644 --- a/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp +++ b/src/video_core/renderer_opengl/texture_filters/texture_filterer.cpp @@ -62,10 +62,10 @@ bool TextureFilterer::IsNull() const { bool TextureFilterer::Filter(const OGLTexture& src_tex, Common::Rectangle src_rect, const OGLTexture& dst_tex, Common::Rectangle dst_rect, - SurfaceType type) { + VideoCore::SurfaceType type) { // Depth/Stencil texture filtering is not supported for now - if (IsNull() || (type != SurfaceType::Color && type != SurfaceType::Texture)) { + if (IsNull() || (type != VideoCore::SurfaceType::Color && type != VideoCore::SurfaceType::Texture)) { return false; } diff --git a/src/video_core/renderer_opengl/texture_filters/texture_filterer.h b/src/video_core/renderer_opengl/texture_filters/texture_filterer.h index ca3fec4f7..b9f5e0634 100644 --- a/src/video_core/renderer_opengl/texture_filters/texture_filterer.h +++ b/src/video_core/renderer_opengl/texture_filters/texture_filterer.h @@ -27,7 +27,8 @@ public: // Returns true if the texture was able to be filtered bool Filter(const OGLTexture& src_tex, Common::Rectangle src_rect, - const OGLTexture& dst_tex, Common::Rectangle dst_rect, SurfaceType type); + const OGLTexture& dst_tex, Common::Rectangle dst_rect, + VideoCore::SurfaceType type); static std::vector GetFilterNames();