diff --git a/CMakeLists.txt b/CMakeLists.txt index 4a3cedcbd..de4338ab4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,7 +134,7 @@ message(STATUS "Target architecture: ${ARCHITECTURE}") # Configure C++ standard # =========================== -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) # set up output paths for executable binaries diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h index 446965e90..ecf257f80 100644 --- a/src/common/common_funcs.h +++ b/src/common/common_funcs.h @@ -59,6 +59,61 @@ __declspec(dllimport) void __stdcall DebugBreak(void); #endif // _MSC_VER ndef +#define DECLARE_ENUM_FLAG_OPERATORS(type) \ + [[nodiscard]] constexpr type operator|(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) | static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator&(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) & static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator^(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) ^ static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) << static_cast(b)); \ + } \ + [[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(static_cast(a) >> static_cast(b)); \ + } \ + constexpr type& operator|=(type& a, type b) noexcept { \ + a = a | b; \ + return a; \ + } \ + constexpr type& operator&=(type& a, type b) noexcept { \ + a = a & b; \ + return a; \ + } \ + constexpr type& operator^=(type& a, type b) noexcept { \ + a = a ^ b; \ + return a; \ + } \ + constexpr type& operator<<=(type& a, type b) noexcept { \ + a = a << b; \ + return a; \ + } \ + constexpr type& operator>>=(type& a, type b) noexcept { \ + a = a >> b; \ + return a; \ + } \ + [[nodiscard]] constexpr type operator~(type key) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(~static_cast(key)); \ + } \ + [[nodiscard]] constexpr bool True(type key) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(key) != 0; \ + } \ + [[nodiscard]] constexpr bool False(type key) noexcept { \ + using T = std::underlying_type_t; \ + return static_cast(key) == 0; \ + } + + // Generic function to get last error message. // Call directly after the command or use the error num. // This function might change the error code. diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 79976fcdb..6f145fe22 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -191,8 +191,7 @@ struct Regs { enum ScalingMode : u32 { NoScale = 0, // Doesn't scale the image ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter - ScaleXY = - 2, // Downscales the image in half in both the X and Y axes and applies a box filter + ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter }; union { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f117e3182..cf75f9bc4 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -12,6 +12,8 @@ add_library(video_core STATIC pica_types.h primitive_assembly.cpp primitive_assembly.h + rasterizer_accelerated.cpp + rasterizer_accelerated.h rasterizer_interface.h regs.cpp regs.h @@ -102,6 +104,14 @@ add_library(video_core STATIC texture/etc1.h texture/texture_decode.cpp texture/texture_decode.h + transform_cache/morton_swizzle.h + transform_cache/pixel_format.h + transform_cache/rasterizer_cache.h + transform_cache/slot_vector.h + transform_cache/surface.cpp + transform_cache/surface.h + transform_cache/types.h + transform_cache/utils.h utils.h vertex_loader.cpp vertex_loader.h diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp new file mode 100644 index 000000000..a57e4eeb0 --- /dev/null +++ b/src/video_core/rasterizer_accelerated.cpp @@ -0,0 +1,74 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "core/memory.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/video_core.h" + +namespace VideoCore { + +void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) { + const u32 page_start = addr >> Memory::PAGE_BITS; + const u32 page_end = ((addr + size - 1) >> Memory::PAGE_BITS); + + u32 uncache_start_addr = 0; + u32 cache_start_addr = 0; + u32 uncache_bytes = 0; + u32 cache_bytes = 0; + + for (u32 page = page_start; page != page_end; page++) { + auto& count = cached_pages.at(page); + + // Ensure no overflow happens + if (delta > 0) { + ASSERT_MSG(count < std::numeric_limits::max(), "Count will overflow!"); + } else if (delta < 0) { + ASSERT_MSG(count > 0, "Count will underflow!"); + } else { + ASSERT_MSG(false, "Delta must be non-zero!"); + } + + // Adds or subtracts 1, as count is a unsigned 8-bit value + count += delta; + + // Assume delta is either -1 or 1 + if (count == 0) { + if (uncache_bytes == 0) { + uncache_start_addr = page << Memory::PAGE_BITS; + } + + uncache_bytes += Memory::PAGE_SIZE; + } else if (uncache_bytes > 0) { + VideoCore::g_memory->RasterizerMarkRegionCached(uncache_start_addr, uncache_bytes, + false); + uncache_bytes = 0; + } + + if (count == 1 && delta > 0) { + if (cache_bytes == 0) { + cache_start_addr = page << Memory::PAGE_BITS; + } + + cache_bytes += Memory::PAGE_SIZE; + } else if (cache_bytes > 0) { + VideoCore::g_memory->RasterizerMarkRegionCached(cache_start_addr, cache_bytes, + true); + + cache_bytes = 0; + } + } + + if (uncache_bytes > 0) { + VideoCore::g_memory->RasterizerMarkRegionCached(uncache_start_addr, uncache_bytes, + false); + } + + if (cache_bytes > 0) { + VideoCore::g_memory->RasterizerMarkRegionCached(cache_start_addr, cache_bytes, + true); + } +} + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h new file mode 100644 index 000000000..cfb8a5967 --- /dev/null +++ b/src/video_core/rasterizer_accelerated.h @@ -0,0 +1,21 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include "video_core/rasterizer_interface.h" + +namespace VideoCore { + +class RasterizerAccelerated : public RasterizerInterface { +public: + RasterizerAccelerated() = default; + virtual ~RasterizerAccelerated() override = default; + + /// Increase/decrease the number of surface in pages touching the specified region + void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override; + +private: + std::array cached_pages{}; +}; +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h index e8cfc87cd..313ce8414 100644 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ b/src/video_core/rasterizer_cache/morton_swizzle.h @@ -85,7 +85,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr auto glbuf_next_tile = [&] { x = (x + 8) % stride; gl_buffer += 8 * aligned_bytes_per_pixel; - if (!x) { + if (x == 0) { y += 8; gl_buffer -= stride * 9 * aligned_bytes_per_pixel; } @@ -113,6 +113,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr LOG_ERROR(Render_OpenGL, "Out of bound texture"); break; } + MortonCopyTile(stride, tile_buffer, gl_buffer); tile_buffer += tile_size; current_paddr += tile_size; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 873e4273e..ca32fccc9 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -1,9 +1,8 @@ -// Copyright 2015 Citra Emulator Project +// Copyright 2022 Citra Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. #pragma once - #include #include #include "common/common_types.h" @@ -15,7 +14,7 @@ struct ScreenInfo; namespace Pica::Shader { struct OutputVertex; -} // namespace Pica::Shader +} namespace VideoCore { @@ -25,6 +24,7 @@ enum class LoadCallbackStage { Build, Complete, }; + using DiskResourceLoadCallback = std::function; class RasterizerInterface { @@ -42,6 +42,9 @@ public: /// Notify rasterizer that the specified PICA register has been changed virtual void NotifyPicaRegisterChanged(u32 id) = 0; + /// Increase/decrease the number of surface in pages touching the specified region + virtual void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) = 0; + /// Notify rasterizer that all caches should be flushed to 3DS memory virtual void FlushAll() = 0; diff --git a/src/video_core/regs_rasterizer.h b/src/video_core/regs_rasterizer.h index 94b9f7502..38e6b4dca 100644 --- a/src/video_core/regs_rasterizer.h +++ b/src/video_core/regs_rasterizer.h @@ -8,6 +8,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" +#include "common/vector_math.h" #include "video_core/pica_types.h" namespace Pica { diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 3954e13b4..fbd535ac6 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -184,6 +184,7 @@ struct TexturingRegs { const TextureConfig config; const TextureFormat format; }; + const std::array GetTextures() const { return {{ {static_cast(main_config.texture0_enable), texture0, texture0_format}, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 43159286c..5d37e6dac 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1781,9 +1781,8 @@ void RasterizerOpenGL::SyncColorWriteMask() { } } - auto IsColorWriteEnabled = [&](u32 value) { - return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE - : GL_FALSE; + auto IsColorWriteEnabled = [&](u32 value) -> bool { + return regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0; }; state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable); diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h index 9e6c216f2..079766637 100644 --- a/src/video_core/texture/texture_decode.h +++ b/src/video_core/texture/texture_decode.h @@ -15,6 +15,7 @@ size_t CalculateTileSize(TexturingRegs::TextureFormat format); struct TextureInfo { PAddr physical_address; + std::array cube_addresses; unsigned int width; unsigned int height; ptrdiff_t stride; diff --git a/src/video_core/transform_cache/morton_swizzle.h b/src/video_core/transform_cache/morton_swizzle.h new file mode 100644 index 000000000..0406d423c --- /dev/null +++ b/src/video_core/transform_cache/morton_swizzle.h @@ -0,0 +1,148 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include "common/alignment.h" +#include "core/memory.h" +#include "video_core/transform_cache/pixel_format.h" +#include "video_core/utils.h" +#include "video_core/video_core.h" + +namespace VideoCore { + +template +constexpr void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) { + constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; + constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); + for (u32 y = 0; y < 8; ++y) { + for (u32 x = 0; x < 8; ++x) { + u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel; + u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * aligned_bytes_per_pixel; + if constexpr (morton_to_gl) { + if constexpr (format == PixelFormat::D24S8) { + gl_ptr[0] = tile_ptr[3]; + std::memcpy(gl_ptr + 1, tile_ptr, 3); + } else { + std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel); + } + } else { + if constexpr (format == PixelFormat::D24S8) { + std::memcpy(tile_ptr, gl_ptr + 1, 3); + tile_ptr[3] = gl_ptr[0]; + } else { + std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel); + } + } + } + } +} + +template +constexpr void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) { + constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; + constexpr u32 tile_size = bytes_per_pixel * 64; + + constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format); + static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); + + const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size); + const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size); + const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size); + + ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end)); + + const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel; + u32 x = (begin_pixel_index % (stride * 8)) / 8; + u32 y = (begin_pixel_index / (stride * 8)) * 8; + + gl_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel; + + auto glbuf_next_tile = [&] { + x = (x + 8) % stride; + gl_buffer += 8 * aligned_bytes_per_pixel; + if (!x) { + y += 8; + gl_buffer -= stride * 9 * aligned_bytes_per_pixel; + } + }; + + u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start); + + if (start < aligned_start && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start], + std::min(aligned_start, end) - start); + + tile_buffer += aligned_start - start; + glbuf_next_tile(); + } + + const u8* const buffer_end = tile_buffer + aligned_end - aligned_start; + PAddr current_paddr = aligned_start; + while (tile_buffer < buffer_end) { + // Pokemon Super Mystery Dungeon will try to use textures that go beyond + // the end address of VRAM. Stop reading if reaches invalid address + if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) || + !VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) { + LOG_ERROR(Render_OpenGL, "Out of bound texture"); + break; + } + + MortonCopyTile(stride, tile_buffer, gl_buffer); + tile_buffer += tile_size; + current_paddr += tile_size; + glbuf_next_tile(); + } + + if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) { + std::array tmp_buf; + MortonCopyTile(stride, &tmp_buf[0], gl_buffer); + std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end); + } +} + +static constexpr std::array morton_to_gl_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +static constexpr std::array gl_to_morton_fns = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, // 5 - 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy // 17 +}; + +} // namespace OpenGL diff --git a/src/video_core/transform_cache/pixel_format.h b/src/video_core/transform_cache/pixel_format.h new file mode 100644 index 000000000..216546abe --- /dev/null +++ b/src/video_core/transform_cache/pixel_format.h @@ -0,0 +1,194 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "core/hw/gpu.h" +#include "video_core/regs_framebuffer.h" +#include "video_core/regs_texturing.h" + +namespace VideoCore { + +enum class PixelFormat : u8 { + // First 5 formats are shared between textures and color buffers + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, + // Texture-only formats + IA8 = 5, + RG8 = 6, + I8 = 7, + A8 = 8, + IA4 = 9, + I4 = 10, + A4 = 11, + ETC1 = 12, + ETC1A4 = 13, + // Depth buffer-only formats + D16 = 14, + D24 = 16, + D24S8 = 17, + Invalid = 255, +}; + +enum class SurfaceType { + Color = 0, + Texture = 1, + Depth = 2, + DepthStencil = 3, + Fill = 4, + Invalid = 5 +}; + +constexpr std::string_view PixelFormatAsString(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8: + return "RGBA8"; + case PixelFormat::RGB8: + return "RGB8"; + case PixelFormat::RGB5A1: + return "RGB5A1"; + case PixelFormat::RGB565: + return "RGB565"; + case PixelFormat::RGBA4: + return "RGBA4"; + case PixelFormat::IA8: + return "IA8"; + case PixelFormat::RG8: + return "RG8"; + case PixelFormat::I8: + return "I8"; + case PixelFormat::A8: + return "A8"; + case PixelFormat::IA4: + return "IA4"; + case PixelFormat::I4: + return "I4"; + case PixelFormat::A4: + return "A4"; + case PixelFormat::ETC1: + return "ETC1"; + case PixelFormat::ETC1A4: + return "ETC1A4"; + case PixelFormat::D16: + return "D16"; + case PixelFormat::D24: + return "D24"; + case PixelFormat::D24S8: + return "D24S8"; + default: + return "NotReal"; + } +} + +constexpr PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) { + const u32 format_index = static_cast(format); + return (format_index < 14) ? static_cast(format) : PixelFormat::Invalid; +} + +constexpr PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) { + const u32 format_index = static_cast(format); + return (format_index < 5) ? static_cast(format) : PixelFormat::Invalid; +} + +constexpr PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) { + const u32 format_index = static_cast(format); + return (format_index < 4) ? static_cast(format_index + 14) : PixelFormat::Invalid; +} + +constexpr PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) { + const u32 format_index = static_cast(format); + switch (format) { + // RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat + case GPU::Regs::PixelFormat::RGB565: + return PixelFormat::RGB565; + case GPU::Regs::PixelFormat::RGB5A1: + return PixelFormat::RGB5A1; + default: + return (format_index < 5) ? static_cast(format) : PixelFormat::Invalid; + } +} + +constexpr SurfaceType GetFormatType(PixelFormat pixel_format) { + const u32 format_index = static_cast(pixel_format); + if (format_index < 5) { + return SurfaceType::Color; + } + + if (format_index < 14) { + return SurfaceType::Texture; + } + + if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) { + return SurfaceType::Depth; + } + + if (pixel_format == PixelFormat::D24S8) { + return SurfaceType::DepthStencil; + } + + return SurfaceType::Invalid; +} + +constexpr bool CheckFormatsBlittable(PixelFormat source_format, PixelFormat dest_format) { + SurfaceType source_type = GetFormatType(source_format); + SurfaceType dest_type = GetFormatType(dest_format); + + if ((source_type == SurfaceType::Color || source_type == SurfaceType::Texture) && + (dest_type == SurfaceType::Color || dest_type == SurfaceType::Texture)) { + return true; + } + + if (source_type == SurfaceType::Depth && dest_type == SurfaceType::Depth) { + return true; + } + + if (source_type == SurfaceType::DepthStencil && dest_type == SurfaceType::DepthStencil) { + return true; + } + + return false; +} + +constexpr u32 GetFormatBpp(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8: + case PixelFormat::D24S8: + return 32; + case PixelFormat::RGB8: + case PixelFormat::D24: + return 24; + case PixelFormat::RGB5A1: + case PixelFormat::RGB565: + case PixelFormat::RGBA4: + case PixelFormat::IA8: + case PixelFormat::RG8: + case PixelFormat::D16: + return 16; + case PixelFormat::I8: + case PixelFormat::A8: + case PixelFormat::IA4: + case PixelFormat::ETC1A4: + return 8; + case PixelFormat::I4: + case PixelFormat::A4: + case PixelFormat::ETC1: + return 4; + default: + return 0; + } +} + +constexpr u32 GetBytesPerPixel(PixelFormat format) { + // OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type + if (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) { + return 4; + } + + return GetFormatBpp(format) / 8; +} + +} // namespace OpenGL diff --git a/src/video_core/transform_cache/rasterizer_cache.h b/src/video_core/transform_cache/rasterizer_cache.h new file mode 100644 index 000000000..9ef3522e7 --- /dev/null +++ b/src/video_core/transform_cache/rasterizer_cache.h @@ -0,0 +1,1314 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "common/alignment.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "core/memory.h" +#include "video_core/rasterizer_accelerated.h" +#include "video_core/transform_cache/slot_vector.h" +#include "video_core/transform_cache/types.h" +#include "video_core/transform_cache/surface.h" +#include "video_core/texture/texture_decode.h" +#include "video_core/transform_cache/utils.h" +#include "video_core/pica_state.h" + +namespace VideoCore { + +enum class DirtyFlags : u8 { + RenderTargets = 0, + ColorBuffer = 1, + DepthBuffer = 2 +}; + +DECLARE_ENUM_FLAG_OPERATORS(DirtyFlags); + +/// Container to push objects to be destroyed a few ticks in the future +template +class DelayedDestructionRing { +public: + void Tick() { + index = (index + 1) % TICKS_TO_DESTROY; + elements[index].clear(); + } + + void Push(T&& object) { + elements[index].push_back(std::move(object)); + } + +private: + size_t index = 0; + std::array, TICKS_TO_DESTROY> elements; +}; + +template +class RasterizerCache { + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + + /// Image view ID for null descriptors + static constexpr SurfaceViewId NULL_IMAGE_VIEW_ID{0}; + + using Runtime = typename P::Runtime; + //using Surface = typename P::Surface; + using SurfaceAlloc = typename P::SurfaceAlloc; + using SurfaceView = typename P::SurfaceView; + using Framebuffer = typename P::Framebuffer; + + template + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast(value); + } + }; + +public: + explicit RasterizerCache(Runtime& runtime, RasterizerAccelerated& rasterizer); + + /// Notify the cache that a new frame has been queued + void TickFrame(); + + /// Return a constant reference to the given image view id + [[nodiscard]] const SurfaceView& GetSurfaceView(SurfaceViewId id) const noexcept; + + /// Return a reference to the given image view id + [[nodiscard]] SurfaceView& GetSurfaceView(SurfaceViewId id) noexcept; + + /// Update bound render targets and upload memory if necessary + void UpdateRenderTargets(bool is_clear); + + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); + + /// Mark images in a range as modified from the CPU + void WriteMemory(PAddr cpu_addr, size_t size); + + /// Download contents of host images to guest memory in a region + void DownloadMemory(PAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapMemory(PAddr cpu_addr, size_t size); + + /// Attempts to perform a PICA texture copy using the config provided + bool TextureCopy(const GPU::Regs::DisplayTransferConfig& config); + + /// Attempts to perform a PICA display transfer using the config provided + bool DisplayTransfer(const GPU::Regs::DisplayTransferConfig& config); + + /// Invalidate the contents of the current color buffer + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateColorBuffer(); + + /// Invalidate the contents of the depth buffer + /// These contents become unspecified, the cache can assume aggressive optimizations. + void InvalidateDepthBuffer(); + + /// Try to find a cached surface view in the given CPU address + [[nodiscard]] SurfaceView* TryFindFramebufferImageView(PAddr cpu_addr); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGPUInvalidated(PAddr addr, size_t size); + +private: + /// Iterate over all page indices in a range + template + static void ForEachPage(PAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; + const u64 page_start = addr >> Memory::PAGE_BITS; + const u64 page_end = (addr + size - 1) >> Memory::PAGE_BITS; + + for (u64 page = page_start; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Iterates over all the images in a region calling func + template + void ForEachSurfaceInRegion(PAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result_t; + static constexpr bool BOOL_BREAK = std::is_same_v; + std::vector picked_surfaces; + + ForEachPage(cpu_addr, size, [this, &picked_surfaces, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + + for (const SurfaceId image_id : it->second) { + Surface& surface = slot_images[image_id]; + + if (True(surface.flags & SurfaceFlagBits::Picked)) { + continue; + } + if (!surface.Overlaps(cpu_addr, size)) { + continue; + } + + surface.flags |= SurfaceFlagBits::Picked; + picked_surfaces.push_back(image_id); + + if constexpr (BOOL_BREAK) { + if (func(image_id, surface)) { + return true; + } + + } else { + func(image_id, surface); + } + } + + if constexpr (BOOL_BREAK) { + return false; + } + }); + + for (const SurfaceId surface_id : picked_surfaces) { + slot_images[surface_id].flags &= ~SurfaceFlagBits::Picked; + } + } + + /// Find a surface in the address range that satisfies the condition function + template + [[nodiscard]] SurfaceId FindSurface(const SurfaceInfo& info, CondFunc&& func) { + SurfaceId image_id; + ForEachSurfaceInRegion(info.addr, info.byte_size, [&](SurfaceId existing_image_id, + Surface& existing_surface) { + if (func(info, existing_surface)) { + image_id = existing_image_id; + return true; + } + + return false; + }); + + return image_id; + } + + /// Find or create a surface from the given parameters + template + [[nodiscard]] SurfaceId FindOrCreateSurface(const SurfaceInfo& info, CondFunc&& func) { + if (const SurfaceId surface_id = FindSurface(info, func); surface_id) { + return surface_id; + } + + return CreateSurface(info); + } + + /// Returns a pair consisting of a source and destination surfaces matching the config + [[nodiscard]] std::pair GetTransferSurfacePair( + const GPU::Regs::DisplayTransferConfig& config); + + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); + + /// Refresh the contents (pixel data) of an image + void RefreshContents(Surface& image); + + /// Upload data from guest to an image + void UploadImageContents(Surface& image, auto& map, u32 buffer_offset); + + /// Find or create an image view from a guest descriptor + [[nodiscard]] SurfaceViewId FindImageView(const Pica::TexturingRegs::FullTextureConfig& config); + + /// Create a new image view from a guest descriptor + [[nodiscard]] SurfaceViewId CreateImageView(const Pica::TexturingRegs::FullTextureConfig& config); + + /// Creates a surface from the given parameters + [[nodiscard]] SurfaceId CreateSurface(const SurfaceInfo& info); + + /// Attempts to create a new surface by "stitching" existing surfaces + [[nodiscard]] SurfaceId StitchSurface(const SurfaceInfo& info); + + /// Find or create an image view for the given color buffer index + [[nodiscard]] SurfaceViewId FindColorBuffer(bool is_clear); + + /// Find or create an image view for the depth buffer + [[nodiscard]] SurfaceViewId FindDepthBuffer(bool is_clear); + + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] SurfaceViewId FindRenderTargetView(const SurfaceInfo& info, bool is_clear); + + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] SurfaceViewId FindOrEmplaceImageView(SurfaceId image_id, const SurfaceViewInfo& info); + + /// Register image in the page table + void RegisterImage(SurfaceId image); + + /// Unregister image from the page table + void UnregisterImage(SurfaceId image); + + /// Track CPU reads and writes for image + void TrackImage(Surface& image); + + /// Stop tracking CPU reads and writes for image + void UntrackSurface(Surface& image); + + /// Delete image from the cache + void DeleteImage(SurfaceId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(Surface& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(SurfaceId image_id); + + /// Prepare an image to be used + void PrepareImage(SurfaceId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(SurfaceViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(SurfaceId dst_id, SurfaceId src_id, std::span copies); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair RenderTargetFromImage( + SurfaceId, const SurfaceViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(SurfaceViewId id); + +private: + Runtime& runtime; + RasterizerAccelerated& rasterizer; + + RenderTargets render_targets; + DirtyFlags dirty_flags; + + std::unordered_map image_views; + std::unordered_map framebuffers; + std::unordered_map, IdentityHash> page_table; + + bool has_deleted_images = false; + + SlotVector slot_images; + SlotVector slot_image_views; + SlotVector slot_image_allocs; + SlotVector slot_framebuffers; + + std::vector uncommitted_downloads; + std::queue> committed_downloads; + + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing sentenced_images; + DelayedDestructionRing sentenced_image_view; + DelayedDestructionRing sentenced_framebuffers; + + std::unordered_map image_allocs_table; + + u64 modification_tick = 0; + u64 frame_tick = 0; +}; + +template +RasterizerCache

::RasterizerCache(Runtime& runtime, RasterizerAccelerated& rasterizer) + : runtime(runtime), rasterizer(rasterizer) { + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + slot_image_views.insert(runtime, NullSurfaceParams{}); +} + +template +void RasterizerCache

::TickFrame() { + // Tick sentenced resources in this order to ensure they are destroyed in the right order + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + frame_tick++; +} + +template +const typename P::SurfaceView& RasterizerCache

::GetSurfaceView(SurfaceViewId id) const noexcept { + return slot_image_views[id]; +} + +template +typename P::SurfaceView& RasterizerCache

::GetSurfaceView(SurfaceViewId id) noexcept { + return slot_image_views[id]; +} + +template +void RasterizerCache

::UpdateRenderTargets(bool is_clear) { + if (False(dirty_flags & DirtyFlags::RenderTargets)) { + return; + } + + dirty_flags &= ~DirtyFlags::RenderTargets; + + const auto BindView = [this, &is_clear](SurfaceViewId& id, DirtyFlags flag) { + if (True(dirty_flags & flag)) { + dirty_flags &= ~flag; + id = FindColorBuffer(is_clear); + } + + const SurfaceView& image_view = slot_image_views[id]; + PrepareImage(image_view.image_id, true, is_clear && IsFullClear(id)); + }; + + // Update color buffer + if (True(dirty_flags & DirtyFlags::ColorBuffer)) { + dirty_flags &= ~DirtyFlags::ColorBuffer; + render_targets.color_buffer_id = FindColorBuffer(is_clear); + } + + const SurfaceId color_id = render_targets.color_buffer_id; + const SurfaceView& color_surface_view = slot_image_views[color_id]; + PrepareImage(color_surface_view.image_id, true, is_clear && IsFullClear(color_id)); + + // Update depth buffer + if (True(dirty_flags & DirtyFlags::DepthBuffer)) { + dirty_flags &= ~DirtyFlags::DepthBuffer; + render_targets.depth_buffer_id = FindDepthBuffer(is_clear); + } + + const SurfaceId depth_id = render_targets.depth_buffer_id; + const SurfaceView& depth_surface_view = slot_image_views[depth_id]; + PrepareImage(depth_surface_view.image_id, true, is_clear && IsFullClear(depth_id)); +} + +template +typename P::Framebuffer* RasterizerCache

::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} + +template +FramebufferId RasterizerCache

::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + + if (!is_new) { + return framebuffer_id; + } + + const SurfaceView* color_buffer = + key.color_buffer_id ? &slot_image_views[key.color_buffer_id] : nullptr; + const SurfaceView* depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + + framebuffer_id = slot_framebuffers.insert(runtime, color_buffer, depth_buffer, key); + return framebuffer_id; +} + +template +void RasterizerCache

::WriteMemory(PAddr cpu_addr, size_t size) { + ForEachSurfaceInRegion(cpu_addr, size, [this](SurfaceId surface_id, Surface& surface) { + if (True(surface.flags & SurfaceFlagBits::CPUInvalidated)) { + return; + } + + surface.flags |= SurfaceFlagBits::CPUInvalidated; + UntrackSurface(surface); + }); +} + +template +void RasterizerCache

::DownloadMemory(PAddr cpu_addr, size_t size) { + std::vector download_surfaces; + ForEachSurfaceInRegion(cpu_addr, size, [this, &download_surfaces](SurfaceId surface_id, Surface& surface) { + // Skip surfaces that were not modified from the GPU + if (False(surface.flags & SurfaceFlagBits::GPUInvalidated)) { + return; + } + + // Don't download surfaces that the CPU has modified on the guest. + // We don't want to override anything the CPU has written there + if (True(surface.flags & SurfaceFlagBits::CPUInvalidated)) { + return; + } + + surface.flags &= ~SurfaceFlagBits::GPUInvalidated; + download_surfaces.push_back(surface_id); + }); + + if (download_surfaces.empty()) { + return; + } + + // Sort images from oldest to newest + std::ranges::sort(download_surfaces, [this](SurfaceId lhs, SurfaceId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + + + // TODO: Batch download + for (const SurfaceId surface_id : download_surfaces) { + const Surface& surface = slot_images[surface_id]; + auto staging_buffer = runtime.MapDownloadBuffer(surface.info.byte_size); + + // TODO: Download only what is needed + const BufferSurfaceCopy download_copy = { + .buffer_offset = 0, + .buffer_size = surface.info.byte_size, + .buffer_row_length = surface.info.size.width, + .buffer_image_height = surface.info.size.height, + .texture_level = 0, + .texture_offset = Offset{0, 0}, + .texture_extent = Extent{surface.info.size.width, surface.info.size.width} + }; + + runtime.DownloadMemory(surface, staging_buffer, download_copy); + //SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.Span()); + } +} + +template +void RasterizerCache

::UnmapMemory(PAddr cpu_addr, size_t size) { + std::vector deleted_images; + ForEachSurfaceInRegion(cpu_addr, size, [&](SurfaceId id, Surface&) { + deleted_images.push_back(id); + }); + + for (const SurfaceId id : deleted_images) { + Surface& image = slot_images[id]; + if (True(image.flags & SurfaceFlagBits::Tracked)) { + UntrackSurface(image); + } + + UnregisterImage(id); + DeleteImage(id); + } +} + +template +bool RasterizerCache

::TextureCopy(const GPU::Regs::DisplayTransferConfig& config) { + // Transfers must be 16-byte aligned + u32 copy_size = Common::AlignDown(config.texture_copy.size, 16); + if (copy_size == 0) { + return false; + } + + // Helper function used to perform some sanity checks on the copy operation + const auto SanityCheck = [©_size](u32& gap, u32& width) { + if (width == 0 && gap != 0) { + return false; + } + + if (gap == 0 || width >= copy_size) { + width = copy_size; + gap = 0; + } + + if (copy_size % width != 0) { + return false; + } + + return true; + }; + + u32 input_gap = config.texture_copy.input_gap * 16; + u32 input_width = config.texture_copy.input_width * 16; + if (!SanityCheck(input_gap, input_width)) { + return false; + } + + u32 output_gap = config.texture_copy.output_gap * 16; + u32 output_width = config.texture_copy.output_width * 16; + if (!SanityCheck(output_gap, output_width)) { + return false; + } + + // Find the source surface + const u32 src_width = input_width + input_gap; + const u32 src_height = copy_size / input_width; + const SurfaceInfo src_info = { + .addr = config.GetPhysicalInputAddress(), + .byte_size = (src_height - 1) * src_width + input_width, + .real_size = Extent{input_width, src_height}, + .size = Extent{src_width, src_height}, + }; + + SurfaceId src_id = FindOrCreateSurface(src_info, CanTexCopy); + const Surface& src_surface = slot_images[src_id]; + + // Find the destination surface + const u32 dst_width = PixelsInBytes(src_surface.info.is_tiled ? output_gap / 8 : output_gap); + const u32 dst_height = copy_size / output_width; + const SurfaceInfo dst_info = { + .addr = config.GetPhysicalOutputAddress(), + .byte_size = src_info.byte_size, + .real_size = Extent{output_width, dst_height}, + .size = Extent{dst_width, dst_height} + }; + + SurfaceId dst_id = FindOrCreateSurface(dst_info, CanTexCopy); + const Surface& dst_surface = slot_images[dst_id]; + + // Mark the destination surface as GPU invalidated + PrepareImage(dst_id, true, false); + + return runtime.BlitSurfaces(src_surface, src_rect, dst_surface, dst_rect); +} + +template +bool RasterizerCache

::DisplayTransfer(const GPU::Regs::DisplayTransferConfig& config) { + using ScalingMode = GPU::Regs::DisplayTransferConfig::ScalingMode; + + const SurfaceInfo src_info = { + .format = PixelFormatFromGPUPixelFormat(config.input_format), + .size = Extent{config.input_width, config.input_height}, + .is_tiled = !config.input_linear, + .real_size = Extent{config.output_width, config.output_height} + }; + + // Downscale if needed + const u32 dst_width = + config.scaling != ScalingMode::NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + const u32 dst_height = + config.scaling == ScalingMode::ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + + const SurfaceInfo dst_info = { + .format = PixelFormatFromGPUPixelFormat(config.output_format), + .size = Extent{dst_width, dst_height}, + .is_tiled = config.input_linear != config.dont_swizzle, + .real_size = Extent{dst_width, dst_height} + }; + + // Search the cache for any surfaces that can be used for the transfer + SurfaceId src_id, dst_id; + const PAddr src_addr = config.GetPhysicalInputAddress(); + const PAddr dst_addr = config.GetPhysicalOutputAddress(); + + do { + has_deleted_images = false; + + src_id = FindSurface(src_info, src_addr); + dst_id = FindSurface(dst_info, dst_addr); + + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + + if (!dst_id) { + dst_id = CreateSurface(dst_info, dst_addr); + } + + if (!src_id) { + src_id = CreateSurface(src_info, src_addr); + } + } while (has_deleted_images); + + // Mark the surfaces as GPU invalidated + const Surface& src_surface = slot_images[src_id]; + PrepareImage(src_id, false, false); + + const Surface& dst_surface = slot_images[dst_id]; + PrepareImage(dst_id, true, false); + + const s32 real_width = src_info.real_size.width; + const s32 real_height = src_info.real_size.height; + const std::array offsets = { + Offset{.x = 0, .y = 0}, + Offset{.x = real_width, .y = real_height}, + }; + + if (config.flip_vertically) { + std::swap(offsets[1].y, offsets[0].y); + } + + return runtime.BlitSurfaces(src_surface, dst_surface, offsets); +} + +template +void RasterizerCache

::InvalidateColorBuffer() { + SurfaceViewId& color_buffer_id = render_targets.color_buffer_id; + color_buffer_id = FindColorBuffer(false); + if (!color_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid color buffer!"); + return; + } + + // When invalidating a color buffer, the old contents are no longer relevant + SurfaceView& color_buffer = slot_image_views[color_buffer_id]; + Surface& image = slot_images[color_buffer.image_id]; + image.flags &= ~SurfaceFlagBits::CPUInvalidated; + image.flags &= ~SurfaceFlagBits::GPUInvalidated; + + runtime.InvalidateColorBuffer(color_buffer); +} + +template +void RasterizerCache

::InvalidateDepthBuffer() { + SurfaceViewId& depth_buffer_id = render_targets.depth_buffer_id; + depth_buffer_id = FindDepthBuffer(false); + if (!depth_buffer_id) { + LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer"); + return; + } + + // When invalidating the depth buffer, the old contents are no longer relevant + Surface& image = slot_images[slot_image_views[depth_buffer_id].image_id]; + image.flags &= ~SurfaceFlagBits::CPUInvalidated; + image.flags &= ~SurfaceFlagBits::GPUInvalidated; + + SurfaceView& depth_buffer = slot_image_views[depth_buffer_id]; + runtime.InvalidateDepthBuffer(depth_buffer); +} + +template +typename P::SurfaceView* RasterizerCache

::TryFindFramebufferImageView(PAddr addr) { + const auto it = page_table.find(addr >> Memory::PAGE_BITS); + if (it == page_table.end()) { + return nullptr; + } + + for (const SurfaceId& surface_id : it->second) { + const Surface& surface = slot_images[surface_id]; + if (surface.info.addr != addr) { + continue; + } + + if (surface.surface_view_ids.empty()) { + continue; + } + + return &slot_image_views[surface.surface_view_ids.at(0)]; + } + + return nullptr; +} + +template +bool RasterizerCache

::IsRegionGPUInvalidated(PAddr addr, size_t size) { + bool is_modified = false; + ForEachSurfaceInRegion(addr, size, [&is_modified](SurfaceId, Surface& image) { + if (False(image.flags & SurfaceFlagBits::GPUInvalidated)) { + return false; + } + + is_modified = true; + return true; + }); + + return is_modified; +} + +template +void RasterizerCache

::RefreshContents(Surface& image) { + if (False(image.flags & SurfaceFlagBits::CPUInvalidated)) { + // Only upload modified images + return; + } + + image.flags &= ~SurfaceFlagBits::CPUInvalidated; + TrackImage(image); + + auto map = runtime.MapUploadBuffer(image.info.byte_size); + UploadImageContents(image, map, 0); + runtime.InsertUploadMemoryBarrier(); +} + +template +template +void RasterizerCache

::UploadImageContents(Surface& image, MapBuffer& map, size_t buffer_offset) { + const std::span mapped_span = map.Span().subspan(buffer_offset); + const PAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & SurfaceFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, map, buffer_offset, uploads); + } else if (True(image.flags & SurfaceFlagBits::Converted)) { + std::vector unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(map, buffer_offset, copies); + } else if (image.info.type == ImageType::Buffer) { + const std::array copies{UploadBufferCopy(gpu_memory, gpu_addr, image, mapped_span)}; + image.UploadMemory(map, buffer_offset, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(map, buffer_offset, copies); + } +} + +template +SurfaceViewId RasterizerCache

::FindImageView(const Pica::TexturingRegs::FullTextureConfig& config) { + if (!IsValidAddress(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + + const auto [pair, is_new] = image_views.try_emplace(config); + SurfaceViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + + return image_view_id; +} + +template +SurfaceViewId RasterizerCache

::CreateImageView(const Pica::TexturingRegs::FullTextureConfig& config) { + const SurfaceInfo info(config); + const PAddr image_gpu_addr = config.Address() - config.BaseLayer() * info.layer_stride; + const SurfaceId image_id = FindOrCreateSurface(info, image_gpu_addr); + + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + + Surface& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const SurfaceViewInfo view_info(config, base.layer); + const SurfaceViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + + SurfaceView& image_view = slot_image_views[image_view_id]; + image_view.flags |= SurfaceViewFlagBits::Strong; + image.flags |= SurfaceFlagBits::Strong; + return image_view_id; +} + +template +SurfaceId RasterizerCache

::CreateSurface(const SurfaceInfo& info, PAddr base_addr) { + const SurfaceId image_id = StitchSurface(info, base_addr); + const Surface& image = slot_images[image_id]; + + const auto [it, is_new] = image_allocs_table.try_emplace(image.addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} + +template +SurfaceId RasterizerCache

::StitchSurface(const SurfaceInfo& info, PAddr base_addr) { + SurfaceInfo new_info = info; + const u32 surface_size = CalculateSurfaceSize(new_info); + std::vector overlap_ids; + + ForEachSurfaceInRegion(base_addr, surface_size, [&](SurfaceId overlap_id, Surface& overlap) { + const auto solution = ResolveOverlap(new_info, base_addr, overlap, true); + if (solution) { + base_addr = solution-> + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const Surface new_image{new_info, base_addr}; + + if (IsSubresource(new_info, overlap, base_addr, options)) { + left_aliased_ids.push_back(overlap_id); + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options)) { + right_aliased_ids.push_back(overlap_id); + } + }); + + const SurfaceId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Surface& new_image = slot_images[new_image_id]; + + RefreshContents(new_image); + + for (const SurfaceId overlap_id : overlap_ids) { + Surface& overlap = slot_images[overlap_id]; + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + + if (True(overlap.flags & SurfaceFlagBits::Tracked)) { + UntrackSurface(overlap); + } + + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + + Surface& new_image_base = new_image; + for (const SurfaceId aliased_id : right_aliased_ids) { + Surface& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + } + + for (const SurfaceId aliased_id : left_aliased_ids) { + Surface& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + } + + RegisterImage(new_image_id); + return new_image_id; +} + +template +std::pair RasterizerCache

::GetTransferSurfacePair( + const GPU::Regs::DisplayTransferConfig& config) { + + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const PAddr dst_addr = config.GetPhysicalInputAddress(); + const PAddr src_addr = config.GetPhysicalOutputAddress(); + const auto [src_info, dst_info] = MakeSurfaceInfosFromTransferConfig(config); + + SurfaceId dst_id; + SurfaceId src_id; + + do { + has_deleted_images = false; + dst_id = FindSurface(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindSurface(src_info, src_addr, FIND_OPTIONS); + const Surface* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const Surface* const src_image = src_id ? &slot_images[src_id] : nullptr; + + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + + return CopyImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} + +template +SurfaceViewId RasterizerCache

::FindColorBuffer(bool is_clear) { + const auto& regs = Pica::g_state.regs; + + const bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + + auto IsColorWriteEnabled = [&](u32 value) -> bool { + return regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0; + }; + + const bool color_writes = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable) || + IsColorWriteEnabled(regs.framebuffer.output_merger.green_enable) || + IsColorWriteEnabled(regs.framebuffer.output_merger.blue_enable) || + IsColorWriteEnabled(regs.framebuffer.output_merger.alpha_enable); + + const bool using_color_fb = shadow_rendering || color_writes; + + if (!using_color_fb) { + return SurfaceViewId{}; + } + + const PAddr color_addr = regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress(); + if (color_addr == 0) { + return SurfaceViewId{}; + } + + const SurfaceInfo info{regs}; + return FindRenderTargetView(info, color_addr, is_clear); +} + +template +SurfaceViewId RasterizerCache

::FindDepthBuffer(bool is_clear) { + const auto& regs = Pica::g_state.regs; + + const bool has_stencil = + regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; + + const bool stencil_test_enabled = regs.framebuffer.output_merger.stencil_test.enable; + + const u32 stencil_write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0) + ? static_cast(regs.framebuffer.output_merger.stencil_test.write_mask) + : 0; + + const bool depth_test_enabled = regs.framebuffer.output_merger.depth_test_enable == 1 || + regs.framebuffer.output_merger.depth_write_enable == 1; + + const bool depth_write_mask = (regs.framebuffer.framebuffer.allow_depth_stencil_write != 0 && + regs.framebuffer.output_merger.depth_write_enable); + + const bool write_depth_fb = + (depth_test_enabled && depth_write_mask) || + (stencil_test_enabled && stencil_write_mask != 0); + + const bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode == + Pica::FramebufferRegs::FragmentOperationMode::Shadow; + + const bool using_depth_fb = + !shadow_rendering && (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || + (has_stencil && stencil_test_enabled)); + + if (!using_depth_fb) { + return SurfaceViewId{}; + } + + const PAddr depth_addr = regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress(); + if (depth_addr == 0) { + return SurfaceViewId{}; + } + + const SurfaceInfo info{regs}; + return FindRenderTargetView(info, depth_addr, is_clear); +} + +template +SurfaceViewId RasterizerCache

::FindRenderTargetView(const SurfaceInfo& info, PAddr target_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const SurfaceId image_id = FindOrCreateSurface(info, target_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + + Surface& image = slot_images[image_id]; + const ImageViewType view_type = RenderTarGetSurfaceViewType(info); + SubresourceBase base = SubresourceBase{.level = 0, .layer = 0}; + + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = 1}, + }; + + return FindOrEmplaceImageView(image_id, SurfaceViewInfo{view_type, info.format, range}); +} + +template +SurfaceViewId RasterizerCache

::FindOrEmplaceImageView(SurfaceId image_id, const SurfaceViewInfo& info) { + Surface& image = slot_images[image_id]; + if (const SurfaceViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + + const SurfaceViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} + +template +void RasterizerCache

::RegisterImage(SurfaceId image_id) { + Surface& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & SurfaceFlagBits::Registered), + "Trying to register an already registered image"); + + image.flags |= SurfaceFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { page_table[page].push_back(image_id); }); +} + +template +void RasterizerCache

::UnregisterImage(SurfaceId image_id) { + Surface& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & SurfaceFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~SurfaceFlagBits::Registered; + + ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << Memory::PAGE_BITS); + return; + } + + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << Memory::PAGE_BITS); + return; + } + + image_ids.erase(vector_it); + }); +} + +template +void RasterizerCache

::TrackImage(Surface& image) { + ASSERT(False(image.flags & SurfaceFlagBits::Tracked)); + image.flags |= SurfaceFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); +} + +template +void RasterizerCache

::UntrackSurface(Surface& image) { + ASSERT(True(image.flags & SurfaceFlagBits::Tracked)); + image.flags &= ~SurfaceFlagBits::Tracked; + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); +} + +template +void RasterizerCache

::DeleteImage(SurfaceId image_id) { + Surface& image = slot_images[image_id]; + const PAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & SurfaceFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & SurfaceFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + dirty_flags |= DirtyFlags::RenderTargets; + dirty_flags |= DirtyFlags::ColorBuffer; + dirty_flags |= DirtyFlags::DepthBuffer; + + // Check if any view has been bound as a render target and unbind it + const std::span image_view_ids = image.image_view_ids; + for (const SurfaceViewId image_view_id : image_view_ids) { + if (render_targets.color_buffer_id == image_view_id) { + render_targets.color_buffer_id = SurfaceViewId{}; + } + + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = SurfaceViewId{}; + } + } + + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + // Iterate over all aliased images and remove any references to the to-be-deleted image + for (const AliasedSurface& alias : image.aliased_images) { + Surface& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedSurface& other_alias) { + return other_alias.id == image_id; + }); + + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + + for (const SurfaceViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); + + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + + has_deleted_images = true; +} + +template +void RasterizerCache

::RemoveImageViewReferences(std::span removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} + +template +void RasterizerCache

::RemoveFramebuffers(std::span removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} + +template +void RasterizerCache

::MarkModification(Surface& image) noexcept { + image.flags |= SurfaceFlagBits::GPUInvalidated; + image.modification_tick = ++modification_tick; +} + +template +void RasterizerCache

::SynchronizeAliases(SurfaceId image_id) { + std::vector aliased_images; + Surface& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + + for (const AliasedSurface& aliased : image.aliased_images) { + Surface& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + + if (aliased_images.empty()) { + return; + } + + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const Surface& lhs_image = slot_images[lhs->id]; + const Surface& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} + +template +void RasterizerCache

::PrepareImage(SurfaceId image_id, bool is_modification, bool invalidate) { + Surface& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(SurfaceFlagBits::CPUInvalidated | SurfaceFlagBits::GPUInvalidated); + + if (False(image.flags & SurfaceFlagBits::Tracked)) { + TrackImage(image); + } + + } else { + RefreshContents(image); + } + + if (is_modification) { + MarkModification(image); + } + + image.frame_tick = frame_tick; +} + +template +void RasterizerCache

::CopyImage(SurfaceId dst_id, SurfaceId src_id, std::span copies) { + Surface& dst = slot_images[dst_id]; + Surface& src = slot_images[src_id]; + + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + return runtime.CopyImage(dst, src, copies); + } + + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + + for (const SurfaceCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const SurfaceViewInfo dst_view_info{ImageViewType::e2D, dst.info.format, dst_range}; + const SurfaceViewInfo src_view_info{ImageViewType::e2D, src.info.format, src_range}; + + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const SurfaceViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + SurfaceView& dst_view = slot_image_views[dst_view_id]; + SurfaceView& src_view = slot_image_views[src_view_id]; + + [[maybe_unused]] const Extent expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + + UNIMPLEMENTED_IF(copy.extent != expected_size); + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} + +template +std::pair RasterizerCache

::RenderTargetFromImage( + SurfaceId image_id, const SurfaceViewInfo& view_info) { + const SurfaceViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const Surface& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::Color; + const SurfaceViewId color_view_id = is_color ? view_id : SurfaceViewId{}; + const SurfaceViewId depth_view_id = is_color ? SurfaceViewId{} : view_id; + const Extent extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + + return {framebuffer_id, view_id}; +} + +template +bool RasterizerCache

::IsFullClear(SurfaceViewId id) { + if (!id) { + return true; + } + + const SurfaceView& image_view = slot_image_views[id]; + const Surface& image = slot_images[image_view.image_id]; + const Extent size = image_view.size; + const auto& regs = Pica::g_state.regs; + + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + + // Make sure the clear covers all texels in the subresource + auto& scissor = regs.rasterizer.scissor_test; + return scissor.x1 == 0 && scissor.y1 == 0 && scissor.x2 >= size.width && + scissor.y2 >= size.height; +} + +} // namespace VideoCommon diff --git a/src/video_core/transform_cache/slot_vector.h b/src/video_core/transform_cache/slot_vector.h new file mode 100644 index 000000000..909c45234 --- /dev/null +++ b/src/video_core/transform_cache/slot_vector.h @@ -0,0 +1,226 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" + +namespace VideoCore { + +struct SlotId { + static constexpr u32 INVALID_INDEX = std::numeric_limits::max(); + + constexpr auto operator<=>(const SlotId&) const noexcept = default; + + constexpr explicit operator bool() const noexcept { + return index != INVALID_INDEX; + } + + u32 index = INVALID_INDEX; +}; + +template +requires std::is_nothrow_move_assignable_v && std::is_nothrow_move_constructible_v +class SlotVector { +public: + class Iterator { + friend SlotVector; + + public: + constexpr Iterator() = default; + + Iterator& operator++() noexcept { + const u64* const bitset = slot_vector->stored_bitset.data(); + const u32 size = static_cast(slot_vector->stored_bitset.size()) * 64; + if (id.index < size) { + do { + ++id.index; + } while (id.index < size && !IsValid(bitset)); + if (id.index == size) { + id.index = SlotId::INVALID_INDEX; + } + } + return *this; + } + + Iterator operator++(int) noexcept { + const Iterator copy{*this}; + ++*this; + return copy; + } + + bool operator==(const Iterator& other) const noexcept { + return id.index == other.id.index; + } + + bool operator!=(const Iterator& other) const noexcept { + return id.index != other.id.index; + } + + std::pair operator*() const noexcept { + return {id, std::addressof((*slot_vector)[id])}; + } + + T* operator->() const noexcept { + return std::addressof((*slot_vector)[id]); + } + + private: + Iterator(SlotVector* slot_vector_, SlotId id_) noexcept + : slot_vector{slot_vector_}, id{id_} {} + + bool IsValid(const u64* bitset) const noexcept { + return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; + } + + SlotVector* slot_vector; + SlotId id; + }; + + ~SlotVector() noexcept { + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + if ((bits & 1) != 0) { + values[index + bit].object.~T(); + } + } + index += 64; + } + delete[] values; + } + + [[nodiscard]] T& operator[](SlotId id) noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + [[nodiscard]] const T& operator[](SlotId id) const noexcept { + ValidateIndex(id); + return values[id.index].object; + } + + template + [[nodiscard]] SlotId insert(Args&&... args) noexcept { + const u32 index = FreeValueIndex(); + new (&values[index].object) T(std::forward(args)...); + SetStorageBit(index); + + return SlotId{index}; + } + + void erase(SlotId id) noexcept { + values[id.index].object.~T(); + free_list.push_back(id.index); + ResetStorageBit(id.index); + } + + [[nodiscard]] Iterator begin() noexcept { + const auto it = std::find_if(stored_bitset.begin(), stored_bitset.end(), + [](u64 value) { return value != 0; }); + + if (it == stored_bitset.end()) { + return end(); + } + + const u32 word_index = static_cast(std::distance(it, stored_bitset.begin())); + const SlotId first_id{word_index * 64 + static_cast(std::countr_zero(*it))}; + return Iterator(this, first_id); + } + + [[nodiscard]] Iterator end() noexcept { + return Iterator(this, SlotId{SlotId::INVALID_INDEX}); + } + +private: + struct NonTrivialDummy { + NonTrivialDummy() noexcept {} + }; + + union Entry { + Entry() noexcept : dummy{} {} + ~Entry() noexcept {} + + NonTrivialDummy dummy; + T object; + }; + + void SetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] |= u64(1) << (index % 64); + } + + void ResetStorageBit(u32 index) noexcept { + stored_bitset[index / 64] &= ~(u64(1) << (index % 64)); + } + + bool ReadStorageBit(u32 index) noexcept { + return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0; + } + + void ValidateIndex(SlotId id) const noexcept { + DEBUG_ASSERT(id); + DEBUG_ASSERT(id.index / 64 < stored_bitset.size()); + DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0); + } + + [[nodiscard]] u32 FreeValueIndex() noexcept { + if (free_list.empty()) { + Reserve(values_capacity ? (values_capacity << 1) : 1); + } + const u32 free_index = free_list.back(); + free_list.pop_back(); + return free_index; + } + + void Reserve(size_t new_capacity) noexcept { + Entry* const new_values = new Entry[new_capacity]; + size_t index = 0; + for (u64 bits : stored_bitset) { + for (size_t bit = 0; bits; ++bit, bits >>= 1) { + const size_t i = index + bit; + if ((bits & 1) == 0) { + continue; + } + T& old_value = values[i].object; + new (&new_values[i].object) T(std::move(old_value)); + old_value.~T(); + } + index += 64; + } + + stored_bitset.resize((new_capacity + 63) / 64); + + const size_t old_free_size = free_list.size(); + free_list.resize(old_free_size + (new_capacity - values_capacity)); + std::iota(free_list.begin() + old_free_size, free_list.end(), + static_cast(values_capacity)); + + delete[] values; + values = new_values; + values_capacity = new_capacity; + } + + Entry* values = nullptr; + size_t values_capacity = 0; + + std::vector stored_bitset; + std::vector free_list; +}; + +} // namespace VideoCore + +template <> +struct std::hash { + size_t operator()(const VideoCore::SlotId& id) const noexcept { + return std::hash{}(id.index); + } +}; diff --git a/src/video_core/transform_cache/surface.cpp b/src/video_core/transform_cache/surface.cpp new file mode 100644 index 000000000..57a347413 --- /dev/null +++ b/src/video_core/transform_cache/surface.cpp @@ -0,0 +1,32 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/transform_cache/surface.h" +#include "video_core/transform_cache/utils.h" + +namespace VideoCore { + +Surface::Surface(const SurfaceInfo& info) + : info(info), mip_level_offsets(CalculateMipLevelOffsets(info)) { + +} + +[[nodiscard]] std::optional Surface::IsMipLevel(PAddr other_addr) { + const u32 offset = other_addr - info.addr; + if (other_addr < info.addr || offset > info.byte_size) { + return std::nullopt; + } + + // Check if the address is referencing a mip level + const auto end = mip_level_offsets.begin() + info.levels; + const auto it = std::find(mip_level_offsets.begin(), end, offset); + + if (it == end) { + return std::nullopt; + } + + return *it; +} + +} // namespace VideoCore diff --git a/src/video_core/transform_cache/surface.h b/src/video_core/transform_cache/surface.h new file mode 100644 index 000000000..6a4ed0b18 --- /dev/null +++ b/src/video_core/transform_cache/surface.h @@ -0,0 +1,140 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include +#include +#include "common/hash.h" +#include "video_core/transform_cache/pixel_format.h" +#include "video_core/transform_cache/types.h" +#include "video_core/transform_cache/slot_vector.h" + +namespace VideoCore { + +constexpr u32 MAX_PICA_LEVELS = 8; + +using SurfaceId = SlotId; +using SurfaceViewId = SlotId; +using SurfaceAllocId = SlotId; +using FramebufferId = SlotId; + +enum class SurfaceFlagBits : u32 { + AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU + RequiresConvertion = 1 << 1, ///< Guest format is not supported natively and it has to be converted + GPUInvalidated = 1 << 2, ///< Contents have been modified from the host GPU + CPUInvalidated = 1 << 3, ///< Contents have been modified from the guest CPU + Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT + Registered = 1 << 5, ///< True when the image is registered + Picked = 1 << 6, ///< Temporary flag to mark the image as picked +}; + +DECLARE_ENUM_FLAG_OPERATORS(SurfaceFlagBits); + +struct SurfaceInfo { + auto operator<=>(const SurfaceInfo& other) const noexcept = default; + + VAddr addr = 0; + u32 byte_size = 0; + VAddr addr_end = 0; + PixelFormat format = PixelFormat::Invalid; + u32 levels = 1; + bool is_tiled = false; + + /** + * The size member dictates what dimentions the allocated texture will have. + * That sometimes might include padding, especially when the surface is being used + * as a framebuffer, where games commonly allocate a 256x512 buffer and only render to the + * lower 240x400 (LCD resolution) portion. This is done due to hardware limitations + * regarding texture sizes by the PICA and seems to be cheaper than rendering to the + * entire 256x512 region and downsampling it. The real_size dictates the actual size + * of the surface and is used in display transfer operations to crop the additional padding. + **/ + Extent real_size{0, 0}; + Extent size{0, 0}; +}; + +struct NullSurfaceParams {}; + +/// Properties used to create and locate a SurfaceView +struct SurfaceViewInfo { + auto operator<=>(const SurfaceViewInfo& other) const noexcept = default; + + [[nodiscard]] bool IsRenderTarget() const noexcept; + + SurfaceViewType type{}; + PixelFormat format{}; + u32 layers = 1; +}; + +struct Surface { + explicit Surface(const SurfaceInfo& info); + + [[nodiscard]] std::optional IsMipLevel(PAddr other_addr); + + [[nodiscard]] SurfaceViewId FindView(const SurfaceViewInfo& view_info) const noexcept; + + void TrackView(const SurfaceViewInfo& view_info, SurfaceViewId image_view_id); + + [[nodiscard]] bool Overlaps(PAddr overlap_addr, u32 overlap_size) const noexcept { + const PAddr overlap_end = overlap_addr + overlap_size; + return info.addr < overlap_end && overlap_addr < info.addr_end; + } + + SurfaceInfo info; + SurfaceFlagBits flags = SurfaceFlagBits::CPUInvalidated; + + u64 modification_tick = 0; + u64 frame_tick = 0; + + std::array mip_level_offsets{}; + std::vector surface_view_infos; + std::vector surface_view_ids; +}; + +struct SurfaceView { + explicit SurfaceView(const SurfaceViewInfo& info, + const SurfaceInfo& surface_info, SurfaceId surface_id); + + SurfaceId image_id{}; + PixelFormat format{}; + SurfaceViewType type{}; + u32 layers = 1; + Extent size{0, 0}; + + u64 invalidation_tick = 0; + u64 modification_tick = 0; +}; + +/// Framebuffer properties used to lookup a framebuffer +struct RenderTargets { + constexpr auto operator<=>(const RenderTargets&) const noexcept = default; + + constexpr bool Contains(std::span elements) const noexcept { + const auto contains = [elements](SurfaceViewId item) { + return std::ranges::find(elements, item) != elements.end(); + }; + + return contains(color_buffer_id) || contains(depth_buffer_id); + } + + SurfaceViewId color_buffer_id; + SurfaceViewId depth_buffer_id; + Extent size; +}; + +} // namespace VideoCore + + +namespace std { + +template <> +struct hash { + size_t operator()(const VideoCore::RenderTargets& rt) const noexcept { + return Common::ComputeHash64(&rt, sizeof(VideoCore::RenderTargets)); + } +}; + +} // namespace std diff --git a/src/video_core/transform_cache/types.h b/src/video_core/transform_cache/types.h new file mode 100644 index 000000000..868e6c73b --- /dev/null +++ b/src/video_core/transform_cache/types.h @@ -0,0 +1,57 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "common/common_types.h" + +namespace VideoCore { + +enum class SurfaceViewType : u32 { + e2D, + eCube, + eShadow2D, + eProjection, + eShadowCube +}; + +struct Offset { + constexpr auto operator<=>(const Offset&) const noexcept = default; + + s32 x = 0; + s32 y = 0; +}; + +struct Extent { + constexpr auto operator<=>(const Extent&) const noexcept = default; + + u32 width = 1; + u32 height = 1; +}; + +struct SurfaceCopy { + u32 src_level; + u32 dst_level; + Offset src_offset; + Offset dst_offset; + Extent extent; +}; + +struct BufferSurfaceCopy { + u32 buffer_offset; + u32 buffer_size; + u32 buffer_row_length; + u32 buffer_image_height; + u32 texture_level; + Offset texture_offset; + Extent texture_extent; +}; + +struct BufferCopy { + u32 src_offset; + u32 dst_offset; + u32 size; +}; + +} // namespace VideoCore diff --git a/src/video_core/transform_cache/utils.h b/src/video_core/transform_cache/utils.h new file mode 100644 index 000000000..75bc4c32a --- /dev/null +++ b/src/video_core/transform_cache/utils.h @@ -0,0 +1,120 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include "video_core/transform_cache/surface.h" +#include "video_core/transform_cache/pixel_format.h" + +namespace VideoCore { + +enum class MatchFlags { + Invalid = 1 << 0, ///< Candidate is allowed to be invalid + Exact = 1 << 1, ///< Candidate must match image exactly + SubRect = 1 << 2, ///< Candidate is fully encompassed by image + Copy = 1 << 3, ///< Candidate can be used as a copy source + Expand = 1 << 4, ///< Candidate fully encompasses image + TexCopy = 1 << 5 ///< Candidate can be used for texture/display transfer +}; + +DECLARE_ENUM_FLAG_OPERATORS(MatchFlags); + +[[nodiscard]] constexpr bool IsBlockAligned(u32 size, const Surface& surface) { + // Morton tiled imaged are block instead of pixel aligned + const u32 pixels = surface.info.is_tiled ? 64 : 1; + return (size % (pixels * GetFormatBpp(surface.info.format)) / 8) == 0; +} + +[[nodiscard]] constexpr u32 PixelsInBytes(u32 size, PixelFormat format) { + return size * 8 / GetFormatBpp(format); +} + +[[nodiscard]] constexpr u32 BytesInPixels(u32 pixels, PixelFormat format) { + return pixels * GetFormatBpp(format) / 8; +} + +[[nodiscard]] constexpr auto MakeSurfaceCopyInfosFromTransferConfig( + const GPU::Regs::DisplayTransferConfig& config) -> std::pair { + using ScalingMode = GPU::Regs::DisplayTransferConfig::ScalingMode; + + const SurfaceInfo source_info = { + .format = PixelFormatFromGPUPixelFormat(config.input_format), + .size = Extent{config.output_width, config.output_height}, + .is_tiled = !config.input_linear + }; + + const u32 dest_width = config.scaling != ScalingMode::NoScale ? config.output_width.Value() / 2 + : config.output_width.Value(); + const u32 dest_height = config.scaling == ScalingMode::ScaleXY ? config.output_height.Value() / 2 + : config.output_height.Value(); + const SurfaceInfo dest_info = { + .format = PixelFormatFromGPUPixelFormat(config.output_format), + .size = Extent{dest_width, dest_height}, + .is_tiled = config.input_linear != config.dont_swizzle + }; + + return std::make_pair(source_info, dest_info); +} + +[[nodiscard]] constexpr auto CalculateMipLevelOffsets(const SurfaceInfo& info) noexcept + -> std::array { + ASSERT(info.levels <= MAX_PICA_LEVELS); + + const u32 bytes_per_pixel = GetBytesPerPixel(info.format); + u32 width = info.size.width; + u32 height = info.size.height; + + std::array offsets{}; + u32 offset = 0; + for (s32 level = 0; level < info.levels; level++) { + offsets[level] = offset; + offset += width * height * bytes_per_pixel; + + width >>= 1; + height >>= 1; + } + + return offsets; +} + +[[nodiscard]] constexpr u32 CalculateSurfaceSize(const SurfaceInfo& info) noexcept { + const u32 bytes_per_pixel = GetBytesPerPixel(info.format); + u32 width = info.size.width; + u32 height = info.size.height; + + u32 size = 0; + for (s32 level = 0; level < info.levels; level++) { + size += width * height * bytes_per_pixel; + + width >>= 1; + height >>= 1; + } + + return size; +} + +// Helper function used to detect a compatible copy surface +[[nodiscard]] constexpr bool CanTexCopy(const SurfaceInfo& info, const Surface& surface) { + const auto& candidate_info = surface.info; + if (candidate_info.format == PixelFormat::Invalid) { + return false; + } + + const u32 copy_width = info.real_size.width; + if (info.size.width != info.real_size.width) { + const u32 stride = candidate_info.size.width; + const u32 tile_dim = candidate_info.is_tiled ? 8 : 1; + const u32 tile_stride = BytesInPixels(stride * tile_dim, candidate_info.format); + + const u32 offset = info.addr - candidate_info.addr; + return IsBlockAligned(offset, surface) && + IsBlockAligned(copy_width, surface) && + (info.size.height == 1 || stride == tile_stride) && + (offset % tile_stride) + copy_width <= tile_stride; + } + + return true; +}; + +} // namespace VideoCore