Compare commits
1 Commits
shoo-soc
...
tex-cache-
Author | SHA1 | Date | |
---|---|---|---|
a6deaaaa80 |
@ -134,7 +134,7 @@ message(STATUS "Target architecture: ${ARCHITECTURE}")
|
||||
# Configure C++ standard
|
||||
# ===========================
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
# set up output paths for executable binaries
|
||||
|
@ -59,6 +59,61 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
|
||||
|
||||
#endif // _MSC_VER ndef
|
||||
|
||||
#define DECLARE_ENUM_FLAG_OPERATORS(type) \
|
||||
[[nodiscard]] constexpr type operator|(type a, type b) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<type>(static_cast<T>(a) | static_cast<T>(b)); \
|
||||
} \
|
||||
[[nodiscard]] constexpr type operator&(type a, type b) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<type>(static_cast<T>(a) & static_cast<T>(b)); \
|
||||
} \
|
||||
[[nodiscard]] constexpr type operator^(type a, type b) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b)); \
|
||||
} \
|
||||
[[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<type>(static_cast<T>(a) << static_cast<T>(b)); \
|
||||
} \
|
||||
[[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b)); \
|
||||
} \
|
||||
constexpr type& operator|=(type& a, type b) noexcept { \
|
||||
a = a | b; \
|
||||
return a; \
|
||||
} \
|
||||
constexpr type& operator&=(type& a, type b) noexcept { \
|
||||
a = a & b; \
|
||||
return a; \
|
||||
} \
|
||||
constexpr type& operator^=(type& a, type b) noexcept { \
|
||||
a = a ^ b; \
|
||||
return a; \
|
||||
} \
|
||||
constexpr type& operator<<=(type& a, type b) noexcept { \
|
||||
a = a << b; \
|
||||
return a; \
|
||||
} \
|
||||
constexpr type& operator>>=(type& a, type b) noexcept { \
|
||||
a = a >> b; \
|
||||
return a; \
|
||||
} \
|
||||
[[nodiscard]] constexpr type operator~(type key) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<type>(~static_cast<T>(key)); \
|
||||
} \
|
||||
[[nodiscard]] constexpr bool True(type key) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<T>(key) != 0; \
|
||||
} \
|
||||
[[nodiscard]] constexpr bool False(type key) noexcept { \
|
||||
using T = std::underlying_type_t<type>; \
|
||||
return static_cast<T>(key) == 0; \
|
||||
}
|
||||
|
||||
|
||||
// Generic function to get last error message.
|
||||
// Call directly after the command or use the error num.
|
||||
// This function might change the error code.
|
||||
|
@ -191,8 +191,7 @@ struct Regs {
|
||||
enum ScalingMode : u32 {
|
||||
NoScale = 0, // Doesn't scale the image
|
||||
ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter
|
||||
ScaleXY =
|
||||
2, // Downscales the image in half in both the X and Y axes and applies a box filter
|
||||
ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter
|
||||
};
|
||||
|
||||
union {
|
||||
|
@ -12,6 +12,8 @@ add_library(video_core STATIC
|
||||
pica_types.h
|
||||
primitive_assembly.cpp
|
||||
primitive_assembly.h
|
||||
rasterizer_accelerated.cpp
|
||||
rasterizer_accelerated.h
|
||||
rasterizer_interface.h
|
||||
regs.cpp
|
||||
regs.h
|
||||
@ -102,6 +104,14 @@ add_library(video_core STATIC
|
||||
texture/etc1.h
|
||||
texture/texture_decode.cpp
|
||||
texture/texture_decode.h
|
||||
transform_cache/morton_swizzle.h
|
||||
transform_cache/pixel_format.h
|
||||
transform_cache/rasterizer_cache.h
|
||||
transform_cache/slot_vector.h
|
||||
transform_cache/surface.cpp
|
||||
transform_cache/surface.h
|
||||
transform_cache/types.h
|
||||
transform_cache/utils.h
|
||||
utils.h
|
||||
vertex_loader.cpp
|
||||
vertex_loader.h
|
||||
|
74
src/video_core/rasterizer_accelerated.cpp
Normal file
74
src/video_core/rasterizer_accelerated.cpp
Normal file
@ -0,0 +1,74 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <limits>
|
||||
#include "core/memory.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
|
||||
const u32 page_start = addr >> Memory::PAGE_BITS;
|
||||
const u32 page_end = ((addr + size - 1) >> Memory::PAGE_BITS);
|
||||
|
||||
u32 uncache_start_addr = 0;
|
||||
u32 cache_start_addr = 0;
|
||||
u32 uncache_bytes = 0;
|
||||
u32 cache_bytes = 0;
|
||||
|
||||
for (u32 page = page_start; page != page_end; page++) {
|
||||
auto& count = cached_pages.at(page);
|
||||
|
||||
// Ensure no overflow happens
|
||||
if (delta > 0) {
|
||||
ASSERT_MSG(count < std::numeric_limits<u16>::max(), "Count will overflow!");
|
||||
} else if (delta < 0) {
|
||||
ASSERT_MSG(count > 0, "Count will underflow!");
|
||||
} else {
|
||||
ASSERT_MSG(false, "Delta must be non-zero!");
|
||||
}
|
||||
|
||||
// Adds or subtracts 1, as count is a unsigned 8-bit value
|
||||
count += delta;
|
||||
|
||||
// Assume delta is either -1 or 1
|
||||
if (count == 0) {
|
||||
if (uncache_bytes == 0) {
|
||||
uncache_start_addr = page << Memory::PAGE_BITS;
|
||||
}
|
||||
|
||||
uncache_bytes += Memory::PAGE_SIZE;
|
||||
} else if (uncache_bytes > 0) {
|
||||
VideoCore::g_memory->RasterizerMarkRegionCached(uncache_start_addr, uncache_bytes,
|
||||
false);
|
||||
uncache_bytes = 0;
|
||||
}
|
||||
|
||||
if (count == 1 && delta > 0) {
|
||||
if (cache_bytes == 0) {
|
||||
cache_start_addr = page << Memory::PAGE_BITS;
|
||||
}
|
||||
|
||||
cache_bytes += Memory::PAGE_SIZE;
|
||||
} else if (cache_bytes > 0) {
|
||||
VideoCore::g_memory->RasterizerMarkRegionCached(cache_start_addr, cache_bytes,
|
||||
true);
|
||||
|
||||
cache_bytes = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (uncache_bytes > 0) {
|
||||
VideoCore::g_memory->RasterizerMarkRegionCached(uncache_start_addr, uncache_bytes,
|
||||
false);
|
||||
}
|
||||
|
||||
if (cache_bytes > 0) {
|
||||
VideoCore::g_memory->RasterizerMarkRegionCached(cache_start_addr, cache_bytes,
|
||||
true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
21
src/video_core/rasterizer_accelerated.h
Normal file
21
src/video_core/rasterizer_accelerated.h
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
class RasterizerAccelerated : public RasterizerInterface {
|
||||
public:
|
||||
RasterizerAccelerated() = default;
|
||||
virtual ~RasterizerAccelerated() override = default;
|
||||
|
||||
/// Increase/decrease the number of surface in pages touching the specified region
|
||||
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
|
||||
|
||||
private:
|
||||
std::array<u16, 0x18000> cached_pages{};
|
||||
};
|
||||
} // namespace VideoCore
|
@ -85,7 +85,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr
|
||||
auto glbuf_next_tile = [&] {
|
||||
x = (x + 8) % stride;
|
||||
gl_buffer += 8 * aligned_bytes_per_pixel;
|
||||
if (!x) {
|
||||
if (x == 0) {
|
||||
y += 8;
|
||||
gl_buffer -= stride * 9 * aligned_bytes_per_pixel;
|
||||
}
|
||||
@ -113,6 +113,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr
|
||||
LOG_ERROR(Render_OpenGL, "Out of bound texture");
|
||||
break;
|
||||
}
|
||||
|
||||
MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
|
||||
tile_buffer += tile_size;
|
||||
current_paddr += tile_size;
|
||||
|
@ -1,9 +1,8 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <functional>
|
||||
#include "common/common_types.h"
|
||||
@ -15,7 +14,7 @@ struct ScreenInfo;
|
||||
|
||||
namespace Pica::Shader {
|
||||
struct OutputVertex;
|
||||
} // namespace Pica::Shader
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
@ -25,6 +24,7 @@ enum class LoadCallbackStage {
|
||||
Build,
|
||||
Complete,
|
||||
};
|
||||
|
||||
using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;
|
||||
|
||||
class RasterizerInterface {
|
||||
@ -42,6 +42,9 @@ public:
|
||||
/// Notify rasterizer that the specified PICA register has been changed
|
||||
virtual void NotifyPicaRegisterChanged(u32 id) = 0;
|
||||
|
||||
/// Increase/decrease the number of surface in pages touching the specified region
|
||||
virtual void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) = 0;
|
||||
|
||||
/// Notify rasterizer that all caches should be flushed to 3DS memory
|
||||
virtual void FlushAll() = 0;
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/vector_math.h"
|
||||
#include "video_core/pica_types.h"
|
||||
|
||||
namespace Pica {
|
||||
|
@ -184,6 +184,7 @@ struct TexturingRegs {
|
||||
const TextureConfig config;
|
||||
const TextureFormat format;
|
||||
};
|
||||
|
||||
const std::array<FullTextureConfig, 3> GetTextures() const {
|
||||
return {{
|
||||
{static_cast<bool>(main_config.texture0_enable), texture0, texture0_format},
|
||||
|
@ -1781,9 +1781,8 @@ void RasterizerOpenGL::SyncColorWriteMask() {
|
||||
}
|
||||
}
|
||||
|
||||
auto IsColorWriteEnabled = [&](u32 value) {
|
||||
return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE
|
||||
: GL_FALSE;
|
||||
auto IsColorWriteEnabled = [&](u32 value) -> bool {
|
||||
return regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0;
|
||||
};
|
||||
|
||||
state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable);
|
||||
|
@ -15,6 +15,7 @@ size_t CalculateTileSize(TexturingRegs::TextureFormat format);
|
||||
|
||||
struct TextureInfo {
|
||||
PAddr physical_address;
|
||||
std::array<PAddr, 5> cube_addresses;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
ptrdiff_t stride;
|
||||
|
148
src/video_core/transform_cache/morton_swizzle.h
Normal file
148
src/video_core/transform_cache/morton_swizzle.h
Normal file
@ -0,0 +1,148 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include "common/alignment.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/transform_cache/pixel_format.h"
|
||||
#include "video_core/utils.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
template <bool morton_to_gl, PixelFormat format>
|
||||
constexpr void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
for (u32 y = 0; y < 8; ++y) {
|
||||
for (u32 x = 0; x < 8; ++x) {
|
||||
u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
|
||||
u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * aligned_bytes_per_pixel;
|
||||
if constexpr (morton_to_gl) {
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
gl_ptr[0] = tile_ptr[3];
|
||||
std::memcpy(gl_ptr + 1, tile_ptr, 3);
|
||||
} else {
|
||||
std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
|
||||
}
|
||||
} else {
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
std::memcpy(tile_ptr, gl_ptr + 1, 3);
|
||||
tile_ptr[3] = gl_ptr[0];
|
||||
} else {
|
||||
std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool morton_to_gl, PixelFormat format>
|
||||
constexpr void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 tile_size = bytes_per_pixel * 64;
|
||||
|
||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||
|
||||
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
|
||||
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
|
||||
const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
|
||||
|
||||
ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end));
|
||||
|
||||
const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel;
|
||||
u32 x = (begin_pixel_index % (stride * 8)) / 8;
|
||||
u32 y = (begin_pixel_index / (stride * 8)) * 8;
|
||||
|
||||
gl_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
||||
|
||||
auto glbuf_next_tile = [&] {
|
||||
x = (x + 8) % stride;
|
||||
gl_buffer += 8 * aligned_bytes_per_pixel;
|
||||
if (!x) {
|
||||
y += 8;
|
||||
gl_buffer -= stride * 9 * aligned_bytes_per_pixel;
|
||||
}
|
||||
};
|
||||
|
||||
u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start);
|
||||
|
||||
if (start < aligned_start && !morton_to_gl) {
|
||||
std::array<u8, tile_size> tmp_buf;
|
||||
MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
|
||||
std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start],
|
||||
std::min(aligned_start, end) - start);
|
||||
|
||||
tile_buffer += aligned_start - start;
|
||||
glbuf_next_tile();
|
||||
}
|
||||
|
||||
const u8* const buffer_end = tile_buffer + aligned_end - aligned_start;
|
||||
PAddr current_paddr = aligned_start;
|
||||
while (tile_buffer < buffer_end) {
|
||||
// Pokemon Super Mystery Dungeon will try to use textures that go beyond
|
||||
// the end address of VRAM. Stop reading if reaches invalid address
|
||||
if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) ||
|
||||
!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) {
|
||||
LOG_ERROR(Render_OpenGL, "Out of bound texture");
|
||||
break;
|
||||
}
|
||||
|
||||
MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
|
||||
tile_buffer += tile_size;
|
||||
current_paddr += tile_size;
|
||||
glbuf_next_tile();
|
||||
}
|
||||
|
||||
if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) {
|
||||
std::array<u8, tile_size> tmp_buf;
|
||||
MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
|
||||
std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = {
|
||||
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<true, PixelFormat::RGB8>, // 1
|
||||
MortonCopy<true, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4>, // 4
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr, // 5 - 13
|
||||
MortonCopy<true, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<true, PixelFormat::D24>, // 16
|
||||
MortonCopy<true, PixelFormat::D24S8> // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = {
|
||||
MortonCopy<false, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8>, // 1
|
||||
MortonCopy<false, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<false, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<false, PixelFormat::RGBA4>, // 4
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr, // 5 - 13
|
||||
MortonCopy<false, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<false, PixelFormat::D24>, // 16
|
||||
MortonCopy<false, PixelFormat::D24S8> // 17
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
194
src/video_core/transform_cache/pixel_format.h
Normal file
194
src/video_core/transform_cache/pixel_format.h
Normal file
@ -0,0 +1,194 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <string_view>
|
||||
#include "core/hw/gpu.h"
|
||||
#include "video_core/regs_framebuffer.h"
|
||||
#include "video_core/regs_texturing.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class PixelFormat : u8 {
|
||||
// First 5 formats are shared between textures and color buffers
|
||||
RGBA8 = 0,
|
||||
RGB8 = 1,
|
||||
RGB5A1 = 2,
|
||||
RGB565 = 3,
|
||||
RGBA4 = 4,
|
||||
// Texture-only formats
|
||||
IA8 = 5,
|
||||
RG8 = 6,
|
||||
I8 = 7,
|
||||
A8 = 8,
|
||||
IA4 = 9,
|
||||
I4 = 10,
|
||||
A4 = 11,
|
||||
ETC1 = 12,
|
||||
ETC1A4 = 13,
|
||||
// Depth buffer-only formats
|
||||
D16 = 14,
|
||||
D24 = 16,
|
||||
D24S8 = 17,
|
||||
Invalid = 255,
|
||||
};
|
||||
|
||||
enum class SurfaceType {
|
||||
Color = 0,
|
||||
Texture = 1,
|
||||
Depth = 2,
|
||||
DepthStencil = 3,
|
||||
Fill = 4,
|
||||
Invalid = 5
|
||||
};
|
||||
|
||||
constexpr std::string_view PixelFormatAsString(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::RGBA8:
|
||||
return "RGBA8";
|
||||
case PixelFormat::RGB8:
|
||||
return "RGB8";
|
||||
case PixelFormat::RGB5A1:
|
||||
return "RGB5A1";
|
||||
case PixelFormat::RGB565:
|
||||
return "RGB565";
|
||||
case PixelFormat::RGBA4:
|
||||
return "RGBA4";
|
||||
case PixelFormat::IA8:
|
||||
return "IA8";
|
||||
case PixelFormat::RG8:
|
||||
return "RG8";
|
||||
case PixelFormat::I8:
|
||||
return "I8";
|
||||
case PixelFormat::A8:
|
||||
return "A8";
|
||||
case PixelFormat::IA4:
|
||||
return "IA4";
|
||||
case PixelFormat::I4:
|
||||
return "I4";
|
||||
case PixelFormat::A4:
|
||||
return "A4";
|
||||
case PixelFormat::ETC1:
|
||||
return "ETC1";
|
||||
case PixelFormat::ETC1A4:
|
||||
return "ETC1A4";
|
||||
case PixelFormat::D16:
|
||||
return "D16";
|
||||
case PixelFormat::D24:
|
||||
return "D24";
|
||||
case PixelFormat::D24S8:
|
||||
return "D24S8";
|
||||
default:
|
||||
return "NotReal";
|
||||
}
|
||||
}
|
||||
|
||||
constexpr PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
|
||||
const u32 format_index = static_cast<u32>(format);
|
||||
return (format_index < 14) ? static_cast<PixelFormat>(format) : PixelFormat::Invalid;
|
||||
}
|
||||
|
||||
constexpr PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) {
|
||||
const u32 format_index = static_cast<u32>(format);
|
||||
return (format_index < 5) ? static_cast<PixelFormat>(format) : PixelFormat::Invalid;
|
||||
}
|
||||
|
||||
constexpr PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) {
|
||||
const u32 format_index = static_cast<u32>(format);
|
||||
return (format_index < 4) ? static_cast<PixelFormat>(format_index + 14) : PixelFormat::Invalid;
|
||||
}
|
||||
|
||||
constexpr PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
|
||||
const u32 format_index = static_cast<u32>(format);
|
||||
switch (format) {
|
||||
// RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
|
||||
case GPU::Regs::PixelFormat::RGB565:
|
||||
return PixelFormat::RGB565;
|
||||
case GPU::Regs::PixelFormat::RGB5A1:
|
||||
return PixelFormat::RGB5A1;
|
||||
default:
|
||||
return (format_index < 5) ? static_cast<PixelFormat>(format) : PixelFormat::Invalid;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr SurfaceType GetFormatType(PixelFormat pixel_format) {
|
||||
const u32 format_index = static_cast<u32>(pixel_format);
|
||||
if (format_index < 5) {
|
||||
return SurfaceType::Color;
|
||||
}
|
||||
|
||||
if (format_index < 14) {
|
||||
return SurfaceType::Texture;
|
||||
}
|
||||
|
||||
if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
|
||||
return SurfaceType::Depth;
|
||||
}
|
||||
|
||||
if (pixel_format == PixelFormat::D24S8) {
|
||||
return SurfaceType::DepthStencil;
|
||||
}
|
||||
|
||||
return SurfaceType::Invalid;
|
||||
}
|
||||
|
||||
constexpr bool CheckFormatsBlittable(PixelFormat source_format, PixelFormat dest_format) {
|
||||
SurfaceType source_type = GetFormatType(source_format);
|
||||
SurfaceType dest_type = GetFormatType(dest_format);
|
||||
|
||||
if ((source_type == SurfaceType::Color || source_type == SurfaceType::Texture) &&
|
||||
(dest_type == SurfaceType::Color || dest_type == SurfaceType::Texture)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (source_type == SurfaceType::Depth && dest_type == SurfaceType::Depth) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (source_type == SurfaceType::DepthStencil && dest_type == SurfaceType::DepthStencil) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
constexpr u32 GetFormatBpp(PixelFormat format) {
|
||||
switch (format) {
|
||||
case PixelFormat::RGBA8:
|
||||
case PixelFormat::D24S8:
|
||||
return 32;
|
||||
case PixelFormat::RGB8:
|
||||
case PixelFormat::D24:
|
||||
return 24;
|
||||
case PixelFormat::RGB5A1:
|
||||
case PixelFormat::RGB565:
|
||||
case PixelFormat::RGBA4:
|
||||
case PixelFormat::IA8:
|
||||
case PixelFormat::RG8:
|
||||
case PixelFormat::D16:
|
||||
return 16;
|
||||
case PixelFormat::I8:
|
||||
case PixelFormat::A8:
|
||||
case PixelFormat::IA4:
|
||||
case PixelFormat::ETC1A4:
|
||||
return 8;
|
||||
case PixelFormat::I4:
|
||||
case PixelFormat::A4:
|
||||
case PixelFormat::ETC1:
|
||||
return 4;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr u32 GetBytesPerPixel(PixelFormat format) {
|
||||
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
|
||||
if (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
return GetFormatBpp(format) / 8;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
1314
src/video_core/transform_cache/rasterizer_cache.h
Normal file
1314
src/video_core/transform_cache/rasterizer_cache.h
Normal file
File diff suppressed because it is too large
Load Diff
226
src/video_core/transform_cache/slot_vector.h
Normal file
226
src/video_core/transform_cache/slot_vector.h
Normal file
@ -0,0 +1,226 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <numeric>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
struct SlotId {
|
||||
static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
|
||||
|
||||
constexpr auto operator<=>(const SlotId&) const noexcept = default;
|
||||
|
||||
constexpr explicit operator bool() const noexcept {
|
||||
return index != INVALID_INDEX;
|
||||
}
|
||||
|
||||
u32 index = INVALID_INDEX;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T>
|
||||
class SlotVector {
|
||||
public:
|
||||
class Iterator {
|
||||
friend SlotVector<T>;
|
||||
|
||||
public:
|
||||
constexpr Iterator() = default;
|
||||
|
||||
Iterator& operator++() noexcept {
|
||||
const u64* const bitset = slot_vector->stored_bitset.data();
|
||||
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
|
||||
if (id.index < size) {
|
||||
do {
|
||||
++id.index;
|
||||
} while (id.index < size && !IsValid(bitset));
|
||||
if (id.index == size) {
|
||||
id.index = SlotId::INVALID_INDEX;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Iterator operator++(int) noexcept {
|
||||
const Iterator copy{*this};
|
||||
++*this;
|
||||
return copy;
|
||||
}
|
||||
|
||||
bool operator==(const Iterator& other) const noexcept {
|
||||
return id.index == other.id.index;
|
||||
}
|
||||
|
||||
bool operator!=(const Iterator& other) const noexcept {
|
||||
return id.index != other.id.index;
|
||||
}
|
||||
|
||||
std::pair<SlotId, T*> operator*() const noexcept {
|
||||
return {id, std::addressof((*slot_vector)[id])};
|
||||
}
|
||||
|
||||
T* operator->() const noexcept {
|
||||
return std::addressof((*slot_vector)[id]);
|
||||
}
|
||||
|
||||
private:
|
||||
Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
|
||||
: slot_vector{slot_vector_}, id{id_} {}
|
||||
|
||||
bool IsValid(const u64* bitset) const noexcept {
|
||||
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
SlotVector<T>* slot_vector;
|
||||
SlotId id;
|
||||
};
|
||||
|
||||
~SlotVector() noexcept {
|
||||
size_t index = 0;
|
||||
for (u64 bits : stored_bitset) {
|
||||
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
|
||||
if ((bits & 1) != 0) {
|
||||
values[index + bit].object.~T();
|
||||
}
|
||||
}
|
||||
index += 64;
|
||||
}
|
||||
delete[] values;
|
||||
}
|
||||
|
||||
[[nodiscard]] T& operator[](SlotId id) noexcept {
|
||||
ValidateIndex(id);
|
||||
return values[id.index].object;
|
||||
}
|
||||
|
||||
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
|
||||
ValidateIndex(id);
|
||||
return values[id.index].object;
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
|
||||
const u32 index = FreeValueIndex();
|
||||
new (&values[index].object) T(std::forward<Args>(args)...);
|
||||
SetStorageBit(index);
|
||||
|
||||
return SlotId{index};
|
||||
}
|
||||
|
||||
void erase(SlotId id) noexcept {
|
||||
values[id.index].object.~T();
|
||||
free_list.push_back(id.index);
|
||||
ResetStorageBit(id.index);
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator begin() noexcept {
|
||||
const auto it = std::find_if(stored_bitset.begin(), stored_bitset.end(),
|
||||
[](u64 value) { return value != 0; });
|
||||
|
||||
if (it == stored_bitset.end()) {
|
||||
return end();
|
||||
}
|
||||
|
||||
const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
|
||||
const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
|
||||
return Iterator(this, first_id);
|
||||
}
|
||||
|
||||
[[nodiscard]] Iterator end() noexcept {
|
||||
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
|
||||
}
|
||||
|
||||
private:
|
||||
struct NonTrivialDummy {
|
||||
NonTrivialDummy() noexcept {}
|
||||
};
|
||||
|
||||
union Entry {
|
||||
Entry() noexcept : dummy{} {}
|
||||
~Entry() noexcept {}
|
||||
|
||||
NonTrivialDummy dummy;
|
||||
T object;
|
||||
};
|
||||
|
||||
void SetStorageBit(u32 index) noexcept {
|
||||
stored_bitset[index / 64] |= u64(1) << (index % 64);
|
||||
}
|
||||
|
||||
void ResetStorageBit(u32 index) noexcept {
|
||||
stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
|
||||
}
|
||||
|
||||
bool ReadStorageBit(u32 index) noexcept {
|
||||
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
void ValidateIndex(SlotId id) const noexcept {
|
||||
DEBUG_ASSERT(id);
|
||||
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
|
||||
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
|
||||
}
|
||||
|
||||
[[nodiscard]] u32 FreeValueIndex() noexcept {
|
||||
if (free_list.empty()) {
|
||||
Reserve(values_capacity ? (values_capacity << 1) : 1);
|
||||
}
|
||||
const u32 free_index = free_list.back();
|
||||
free_list.pop_back();
|
||||
return free_index;
|
||||
}
|
||||
|
||||
void Reserve(size_t new_capacity) noexcept {
|
||||
Entry* const new_values = new Entry[new_capacity];
|
||||
size_t index = 0;
|
||||
for (u64 bits : stored_bitset) {
|
||||
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
|
||||
const size_t i = index + bit;
|
||||
if ((bits & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
T& old_value = values[i].object;
|
||||
new (&new_values[i].object) T(std::move(old_value));
|
||||
old_value.~T();
|
||||
}
|
||||
index += 64;
|
||||
}
|
||||
|
||||
stored_bitset.resize((new_capacity + 63) / 64);
|
||||
|
||||
const size_t old_free_size = free_list.size();
|
||||
free_list.resize(old_free_size + (new_capacity - values_capacity));
|
||||
std::iota(free_list.begin() + old_free_size, free_list.end(),
|
||||
static_cast<u32>(values_capacity));
|
||||
|
||||
delete[] values;
|
||||
values = new_values;
|
||||
values_capacity = new_capacity;
|
||||
}
|
||||
|
||||
Entry* values = nullptr;
|
||||
size_t values_capacity = 0;
|
||||
|
||||
std::vector<u64> stored_bitset;
|
||||
std::vector<u32> free_list;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
template <>
|
||||
struct std::hash<VideoCore::SlotId> {
|
||||
size_t operator()(const VideoCore::SlotId& id) const noexcept {
|
||||
return std::hash<u32>{}(id.index);
|
||||
}
|
||||
};
|
32
src/video_core/transform_cache/surface.cpp
Normal file
32
src/video_core/transform_cache/surface.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/transform_cache/surface.h"
|
||||
#include "video_core/transform_cache/utils.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
Surface::Surface(const SurfaceInfo& info)
|
||||
: info(info), mip_level_offsets(CalculateMipLevelOffsets(info)) {
|
||||
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u32> Surface::IsMipLevel(PAddr other_addr) {
|
||||
const u32 offset = other_addr - info.addr;
|
||||
if (other_addr < info.addr || offset > info.byte_size) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
// Check if the address is referencing a mip level
|
||||
const auto end = mip_level_offsets.begin() + info.levels;
|
||||
const auto it = std::find(mip_level_offsets.begin(), end, offset);
|
||||
|
||||
if (it == end) {
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
return *it;
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
140
src/video_core/transform_cache/surface.h
Normal file
140
src/video_core/transform_cache/surface.h
Normal file
@ -0,0 +1,140 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <span>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include "common/hash.h"
|
||||
#include "video_core/transform_cache/pixel_format.h"
|
||||
#include "video_core/transform_cache/types.h"
|
||||
#include "video_core/transform_cache/slot_vector.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
constexpr u32 MAX_PICA_LEVELS = 8;
|
||||
|
||||
using SurfaceId = SlotId;
|
||||
using SurfaceViewId = SlotId;
|
||||
using SurfaceAllocId = SlotId;
|
||||
using FramebufferId = SlotId;
|
||||
|
||||
enum class SurfaceFlagBits : u32 {
|
||||
AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
|
||||
RequiresConvertion = 1 << 1, ///< Guest format is not supported natively and it has to be converted
|
||||
GPUInvalidated = 1 << 2, ///< Contents have been modified from the host GPU
|
||||
CPUInvalidated = 1 << 3, ///< Contents have been modified from the guest CPU
|
||||
Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
|
||||
Registered = 1 << 5, ///< True when the image is registered
|
||||
Picked = 1 << 6, ///< Temporary flag to mark the image as picked
|
||||
};
|
||||
|
||||
DECLARE_ENUM_FLAG_OPERATORS(SurfaceFlagBits);
|
||||
|
||||
struct SurfaceInfo {
|
||||
auto operator<=>(const SurfaceInfo& other) const noexcept = default;
|
||||
|
||||
VAddr addr = 0;
|
||||
u32 byte_size = 0;
|
||||
VAddr addr_end = 0;
|
||||
PixelFormat format = PixelFormat::Invalid;
|
||||
u32 levels = 1;
|
||||
bool is_tiled = false;
|
||||
|
||||
/**
|
||||
* The size member dictates what dimentions the allocated texture will have.
|
||||
* That sometimes might include padding, especially when the surface is being used
|
||||
* as a framebuffer, where games commonly allocate a 256x512 buffer and only render to the
|
||||
* lower 240x400 (LCD resolution) portion. This is done due to hardware limitations
|
||||
* regarding texture sizes by the PICA and seems to be cheaper than rendering to the
|
||||
* entire 256x512 region and downsampling it. The real_size dictates the actual size
|
||||
* of the surface and is used in display transfer operations to crop the additional padding.
|
||||
**/
|
||||
Extent real_size{0, 0};
|
||||
Extent size{0, 0};
|
||||
};
|
||||
|
||||
struct NullSurfaceParams {};
|
||||
|
||||
/// Properties used to create and locate a SurfaceView
|
||||
struct SurfaceViewInfo {
|
||||
auto operator<=>(const SurfaceViewInfo& other) const noexcept = default;
|
||||
|
||||
[[nodiscard]] bool IsRenderTarget() const noexcept;
|
||||
|
||||
SurfaceViewType type{};
|
||||
PixelFormat format{};
|
||||
u32 layers = 1;
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
explicit Surface(const SurfaceInfo& info);
|
||||
|
||||
[[nodiscard]] std::optional<u32> IsMipLevel(PAddr other_addr);
|
||||
|
||||
[[nodiscard]] SurfaceViewId FindView(const SurfaceViewInfo& view_info) const noexcept;
|
||||
|
||||
void TrackView(const SurfaceViewInfo& view_info, SurfaceViewId image_view_id);
|
||||
|
||||
[[nodiscard]] bool Overlaps(PAddr overlap_addr, u32 overlap_size) const noexcept {
|
||||
const PAddr overlap_end = overlap_addr + overlap_size;
|
||||
return info.addr < overlap_end && overlap_addr < info.addr_end;
|
||||
}
|
||||
|
||||
SurfaceInfo info;
|
||||
SurfaceFlagBits flags = SurfaceFlagBits::CPUInvalidated;
|
||||
|
||||
u64 modification_tick = 0;
|
||||
u64 frame_tick = 0;
|
||||
|
||||
std::array<u32, MAX_PICA_LEVELS> mip_level_offsets{};
|
||||
std::vector<SurfaceViewInfo> surface_view_infos;
|
||||
std::vector<SurfaceViewId> surface_view_ids;
|
||||
};
|
||||
|
||||
struct SurfaceView {
|
||||
explicit SurfaceView(const SurfaceViewInfo& info,
|
||||
const SurfaceInfo& surface_info, SurfaceId surface_id);
|
||||
|
||||
SurfaceId image_id{};
|
||||
PixelFormat format{};
|
||||
SurfaceViewType type{};
|
||||
u32 layers = 1;
|
||||
Extent size{0, 0};
|
||||
|
||||
u64 invalidation_tick = 0;
|
||||
u64 modification_tick = 0;
|
||||
};
|
||||
|
||||
/// Framebuffer properties used to lookup a framebuffer
|
||||
struct RenderTargets {
|
||||
constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
|
||||
|
||||
constexpr bool Contains(std::span<const SurfaceViewId> elements) const noexcept {
|
||||
const auto contains = [elements](SurfaceViewId item) {
|
||||
return std::ranges::find(elements, item) != elements.end();
|
||||
};
|
||||
|
||||
return contains(color_buffer_id) || contains(depth_buffer_id);
|
||||
}
|
||||
|
||||
SurfaceViewId color_buffer_id;
|
||||
SurfaceViewId depth_buffer_id;
|
||||
Extent size;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<VideoCore::RenderTargets> {
|
||||
size_t operator()(const VideoCore::RenderTargets& rt) const noexcept {
|
||||
return Common::ComputeHash64(&rt, sizeof(VideoCore::RenderTargets));
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
57
src/video_core/transform_cache/types.h
Normal file
57
src/video_core/transform_cache/types.h
Normal file
@ -0,0 +1,57 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <compare>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class SurfaceViewType : u32 {
|
||||
e2D,
|
||||
eCube,
|
||||
eShadow2D,
|
||||
eProjection,
|
||||
eShadowCube
|
||||
};
|
||||
|
||||
struct Offset {
|
||||
constexpr auto operator<=>(const Offset&) const noexcept = default;
|
||||
|
||||
s32 x = 0;
|
||||
s32 y = 0;
|
||||
};
|
||||
|
||||
struct Extent {
|
||||
constexpr auto operator<=>(const Extent&) const noexcept = default;
|
||||
|
||||
u32 width = 1;
|
||||
u32 height = 1;
|
||||
};
|
||||
|
||||
struct SurfaceCopy {
|
||||
u32 src_level;
|
||||
u32 dst_level;
|
||||
Offset src_offset;
|
||||
Offset dst_offset;
|
||||
Extent extent;
|
||||
};
|
||||
|
||||
struct BufferSurfaceCopy {
|
||||
u32 buffer_offset;
|
||||
u32 buffer_size;
|
||||
u32 buffer_row_length;
|
||||
u32 buffer_image_height;
|
||||
u32 texture_level;
|
||||
Offset texture_offset;
|
||||
Extent texture_extent;
|
||||
};
|
||||
|
||||
struct BufferCopy {
|
||||
u32 src_offset;
|
||||
u32 dst_offset;
|
||||
u32 size;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
120
src/video_core/transform_cache/utils.h
Normal file
120
src/video_core/transform_cache/utils.h
Normal file
@ -0,0 +1,120 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <utility>
|
||||
#include "video_core/transform_cache/surface.h"
|
||||
#include "video_core/transform_cache/pixel_format.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class MatchFlags {
|
||||
Invalid = 1 << 0, ///< Candidate is allowed to be invalid
|
||||
Exact = 1 << 1, ///< Candidate must match image exactly
|
||||
SubRect = 1 << 2, ///< Candidate is fully encompassed by image
|
||||
Copy = 1 << 3, ///< Candidate can be used as a copy source
|
||||
Expand = 1 << 4, ///< Candidate fully encompasses image
|
||||
TexCopy = 1 << 5 ///< Candidate can be used for texture/display transfer
|
||||
};
|
||||
|
||||
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
|
||||
|
||||
[[nodiscard]] constexpr bool IsBlockAligned(u32 size, const Surface& surface) {
|
||||
// Morton tiled imaged are block instead of pixel aligned
|
||||
const u32 pixels = surface.info.is_tiled ? 64 : 1;
|
||||
return (size % (pixels * GetFormatBpp(surface.info.format)) / 8) == 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 PixelsInBytes(u32 size, PixelFormat format) {
|
||||
return size * 8 / GetFormatBpp(format);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 BytesInPixels(u32 pixels, PixelFormat format) {
|
||||
return pixels * GetFormatBpp(format) / 8;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr auto MakeSurfaceCopyInfosFromTransferConfig(
|
||||
const GPU::Regs::DisplayTransferConfig& config) -> std::pair<SurfaceInfo, SurfaceInfo> {
|
||||
using ScalingMode = GPU::Regs::DisplayTransferConfig::ScalingMode;
|
||||
|
||||
const SurfaceInfo source_info = {
|
||||
.format = PixelFormatFromGPUPixelFormat(config.input_format),
|
||||
.size = Extent{config.output_width, config.output_height},
|
||||
.is_tiled = !config.input_linear
|
||||
};
|
||||
|
||||
const u32 dest_width = config.scaling != ScalingMode::NoScale ? config.output_width.Value() / 2
|
||||
: config.output_width.Value();
|
||||
const u32 dest_height = config.scaling == ScalingMode::ScaleXY ? config.output_height.Value() / 2
|
||||
: config.output_height.Value();
|
||||
const SurfaceInfo dest_info = {
|
||||
.format = PixelFormatFromGPUPixelFormat(config.output_format),
|
||||
.size = Extent{dest_width, dest_height},
|
||||
.is_tiled = config.input_linear != config.dont_swizzle
|
||||
};
|
||||
|
||||
return std::make_pair(source_info, dest_info);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr auto CalculateMipLevelOffsets(const SurfaceInfo& info) noexcept
|
||||
-> std::array<u32, MAX_PICA_LEVELS> {
|
||||
ASSERT(info.levels <= MAX_PICA_LEVELS);
|
||||
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(info.format);
|
||||
u32 width = info.size.width;
|
||||
u32 height = info.size.height;
|
||||
|
||||
std::array<u32, MAX_PICA_LEVELS> offsets{};
|
||||
u32 offset = 0;
|
||||
for (s32 level = 0; level < info.levels; level++) {
|
||||
offsets[level] = offset;
|
||||
offset += width * height * bytes_per_pixel;
|
||||
|
||||
width >>= 1;
|
||||
height >>= 1;
|
||||
}
|
||||
|
||||
return offsets;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr u32 CalculateSurfaceSize(const SurfaceInfo& info) noexcept {
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(info.format);
|
||||
u32 width = info.size.width;
|
||||
u32 height = info.size.height;
|
||||
|
||||
u32 size = 0;
|
||||
for (s32 level = 0; level < info.levels; level++) {
|
||||
size += width * height * bytes_per_pixel;
|
||||
|
||||
width >>= 1;
|
||||
height >>= 1;
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
// Helper function used to detect a compatible copy surface
|
||||
[[nodiscard]] constexpr bool CanTexCopy(const SurfaceInfo& info, const Surface& surface) {
|
||||
const auto& candidate_info = surface.info;
|
||||
if (candidate_info.format == PixelFormat::Invalid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u32 copy_width = info.real_size.width;
|
||||
if (info.size.width != info.real_size.width) {
|
||||
const u32 stride = candidate_info.size.width;
|
||||
const u32 tile_dim = candidate_info.is_tiled ? 8 : 1;
|
||||
const u32 tile_stride = BytesInPixels(stride * tile_dim, candidate_info.format);
|
||||
|
||||
const u32 offset = info.addr - candidate_info.addr;
|
||||
return IsBlockAligned(offset, surface) &&
|
||||
IsBlockAligned(copy_width, surface) &&
|
||||
(info.size.height == 1 || stride == tile_stride) &&
|
||||
(offset % tile_stride) + copy_width <= tile_stride;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
Reference in New Issue
Block a user