Compare commits

...

1 Commits

Author SHA1 Message Date
a6deaaaa80 Start new texcache 2022-08-26 11:22:34 +03:00
20 changed files with 2406 additions and 10 deletions

View File

@ -134,7 +134,7 @@ message(STATUS "Target architecture: ${ARCHITECTURE}")
# Configure C++ standard
# ===========================
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# set up output paths for executable binaries

View File

@ -59,6 +59,61 @@ __declspec(dllimport) void __stdcall DebugBreak(void);
#endif // _MSC_VER ndef
#define DECLARE_ENUM_FLAG_OPERATORS(type) \
[[nodiscard]] constexpr type operator|(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) | static_cast<T>(b)); \
} \
[[nodiscard]] constexpr type operator&(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) & static_cast<T>(b)); \
} \
[[nodiscard]] constexpr type operator^(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b)); \
} \
[[nodiscard]] constexpr type operator<<(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) << static_cast<T>(b)); \
} \
[[nodiscard]] constexpr type operator>>(type a, type b) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(static_cast<T>(a) >> static_cast<T>(b)); \
} \
constexpr type& operator|=(type& a, type b) noexcept { \
a = a | b; \
return a; \
} \
constexpr type& operator&=(type& a, type b) noexcept { \
a = a & b; \
return a; \
} \
constexpr type& operator^=(type& a, type b) noexcept { \
a = a ^ b; \
return a; \
} \
constexpr type& operator<<=(type& a, type b) noexcept { \
a = a << b; \
return a; \
} \
constexpr type& operator>>=(type& a, type b) noexcept { \
a = a >> b; \
return a; \
} \
[[nodiscard]] constexpr type operator~(type key) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<type>(~static_cast<T>(key)); \
} \
[[nodiscard]] constexpr bool True(type key) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<T>(key) != 0; \
} \
[[nodiscard]] constexpr bool False(type key) noexcept { \
using T = std::underlying_type_t<type>; \
return static_cast<T>(key) == 0; \
}
// Generic function to get last error message.
// Call directly after the command or use the error num.
// This function might change the error code.

View File

@ -191,8 +191,7 @@ struct Regs {
enum ScalingMode : u32 {
NoScale = 0, // Doesn't scale the image
ScaleX = 1, // Downscales the image in half in the X axis and applies a box filter
ScaleXY =
2, // Downscales the image in half in both the X and Y axes and applies a box filter
ScaleXY = 2, // Downscales the image in half in both the X and Y axes and applies a box filter
};
union {

View File

@ -12,6 +12,8 @@ add_library(video_core STATIC
pica_types.h
primitive_assembly.cpp
primitive_assembly.h
rasterizer_accelerated.cpp
rasterizer_accelerated.h
rasterizer_interface.h
regs.cpp
regs.h
@ -102,6 +104,14 @@ add_library(video_core STATIC
texture/etc1.h
texture/texture_decode.cpp
texture/texture_decode.h
transform_cache/morton_swizzle.h
transform_cache/pixel_format.h
transform_cache/rasterizer_cache.h
transform_cache/slot_vector.h
transform_cache/surface.cpp
transform_cache/surface.h
transform_cache/types.h
transform_cache/utils.h
utils.h
vertex_loader.cpp
vertex_loader.h

View File

@ -0,0 +1,74 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <limits>
#include "core/memory.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/video_core.h"
namespace VideoCore {
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
const u32 page_start = addr >> Memory::PAGE_BITS;
const u32 page_end = ((addr + size - 1) >> Memory::PAGE_BITS);
u32 uncache_start_addr = 0;
u32 cache_start_addr = 0;
u32 uncache_bytes = 0;
u32 cache_bytes = 0;
for (u32 page = page_start; page != page_end; page++) {
auto& count = cached_pages.at(page);
// Ensure no overflow happens
if (delta > 0) {
ASSERT_MSG(count < std::numeric_limits<u16>::max(), "Count will overflow!");
} else if (delta < 0) {
ASSERT_MSG(count > 0, "Count will underflow!");
} else {
ASSERT_MSG(false, "Delta must be non-zero!");
}
// Adds or subtracts 1, as count is a unsigned 8-bit value
count += delta;
// Assume delta is either -1 or 1
if (count == 0) {
if (uncache_bytes == 0) {
uncache_start_addr = page << Memory::PAGE_BITS;
}
uncache_bytes += Memory::PAGE_SIZE;
} else if (uncache_bytes > 0) {
VideoCore::g_memory->RasterizerMarkRegionCached(uncache_start_addr, uncache_bytes,
false);
uncache_bytes = 0;
}
if (count == 1 && delta > 0) {
if (cache_bytes == 0) {
cache_start_addr = page << Memory::PAGE_BITS;
}
cache_bytes += Memory::PAGE_SIZE;
} else if (cache_bytes > 0) {
VideoCore::g_memory->RasterizerMarkRegionCached(cache_start_addr, cache_bytes,
true);
cache_bytes = 0;
}
}
if (uncache_bytes > 0) {
VideoCore::g_memory->RasterizerMarkRegionCached(uncache_start_addr, uncache_bytes,
false);
}
if (cache_bytes > 0) {
VideoCore::g_memory->RasterizerMarkRegionCached(cache_start_addr, cache_bytes,
true);
}
}
} // namespace VideoCore

View File

@ -0,0 +1,21 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "video_core/rasterizer_interface.h"
namespace VideoCore {
class RasterizerAccelerated : public RasterizerInterface {
public:
RasterizerAccelerated() = default;
virtual ~RasterizerAccelerated() override = default;
/// Increase/decrease the number of surface in pages touching the specified region
void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) override;
private:
std::array<u16, 0x18000> cached_pages{};
};
} // namespace VideoCore

View File

@ -85,7 +85,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr
auto glbuf_next_tile = [&] {
x = (x + 8) % stride;
gl_buffer += 8 * aligned_bytes_per_pixel;
if (!x) {
if (x == 0) {
y += 8;
gl_buffer -= stride * 9 * aligned_bytes_per_pixel;
}
@ -113,6 +113,7 @@ static void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr
LOG_ERROR(Render_OpenGL, "Out of bound texture");
break;
}
MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
tile_buffer += tile_size;
current_paddr += tile_size;

View File

@ -1,9 +1,8 @@
// Copyright 2015 Citra Emulator Project
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <atomic>
#include <functional>
#include "common/common_types.h"
@ -15,7 +14,7 @@ struct ScreenInfo;
namespace Pica::Shader {
struct OutputVertex;
} // namespace Pica::Shader
}
namespace VideoCore {
@ -25,6 +24,7 @@ enum class LoadCallbackStage {
Build,
Complete,
};
using DiskResourceLoadCallback = std::function<void(LoadCallbackStage, std::size_t, std::size_t)>;
class RasterizerInterface {
@ -42,6 +42,9 @@ public:
/// Notify rasterizer that the specified PICA register has been changed
virtual void NotifyPicaRegisterChanged(u32 id) = 0;
/// Increase/decrease the number of surface in pages touching the specified region
virtual void UpdatePagesCachedCount(PAddr addr, u32 size, int delta) = 0;
/// Notify rasterizer that all caches should be flushed to 3DS memory
virtual void FlushAll() = 0;

View File

@ -8,6 +8,7 @@
#include "common/bit_field.h"
#include "common/common_funcs.h"
#include "common/common_types.h"
#include "common/vector_math.h"
#include "video_core/pica_types.h"
namespace Pica {

View File

@ -184,6 +184,7 @@ struct TexturingRegs {
const TextureConfig config;
const TextureFormat format;
};
const std::array<FullTextureConfig, 3> GetTextures() const {
return {{
{static_cast<bool>(main_config.texture0_enable), texture0, texture0_format},

View File

@ -1781,9 +1781,8 @@ void RasterizerOpenGL::SyncColorWriteMask() {
}
}
auto IsColorWriteEnabled = [&](u32 value) {
return (regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0) ? GL_TRUE
: GL_FALSE;
auto IsColorWriteEnabled = [&](u32 value) -> bool {
return regs.framebuffer.framebuffer.allow_color_write != 0 && value != 0;
};
state.color_mask.red_enabled = IsColorWriteEnabled(regs.framebuffer.output_merger.red_enable);

View File

@ -15,6 +15,7 @@ size_t CalculateTileSize(TexturingRegs::TextureFormat format);
struct TextureInfo {
PAddr physical_address;
std::array<PAddr, 5> cube_addresses;
unsigned int width;
unsigned int height;
ptrdiff_t stride;

View File

@ -0,0 +1,148 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/alignment.h"
#include "core/memory.h"
#include "video_core/transform_cache/pixel_format.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
namespace VideoCore {
template <bool morton_to_gl, PixelFormat format>
constexpr void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
for (u32 y = 0; y < 8; ++y) {
for (u32 x = 0; x < 8; ++x) {
u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * aligned_bytes_per_pixel;
if constexpr (morton_to_gl) {
if constexpr (format == PixelFormat::D24S8) {
gl_ptr[0] = tile_ptr[3];
std::memcpy(gl_ptr + 1, tile_ptr, 3);
} else {
std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
}
} else {
if constexpr (format == PixelFormat::D24S8) {
std::memcpy(tile_ptr, gl_ptr + 1, 3);
tile_ptr[3] = gl_ptr[0];
} else {
std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
}
}
}
}
}
template <bool morton_to_gl, PixelFormat format>
constexpr void MortonCopy(u32 stride, u32 height, u8* gl_buffer, PAddr base, PAddr start, PAddr end) {
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
constexpr u32 tile_size = bytes_per_pixel * 64;
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
const PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
ASSERT(!morton_to_gl || (aligned_start == start && aligned_end == end));
const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel;
u32 x = (begin_pixel_index % (stride * 8)) / 8;
u32 y = (begin_pixel_index / (stride * 8)) * 8;
gl_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
auto glbuf_next_tile = [&] {
x = (x + 8) % stride;
gl_buffer += 8 * aligned_bytes_per_pixel;
if (!x) {
y += 8;
gl_buffer -= stride * 9 * aligned_bytes_per_pixel;
}
};
u8* tile_buffer = VideoCore::g_memory->GetPhysicalPointer(start);
if (start < aligned_start && !morton_to_gl) {
std::array<u8, tile_size> tmp_buf;
MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
std::memcpy(tile_buffer, &tmp_buf[start - aligned_down_start],
std::min(aligned_start, end) - start);
tile_buffer += aligned_start - start;
glbuf_next_tile();
}
const u8* const buffer_end = tile_buffer + aligned_end - aligned_start;
PAddr current_paddr = aligned_start;
while (tile_buffer < buffer_end) {
// Pokemon Super Mystery Dungeon will try to use textures that go beyond
// the end address of VRAM. Stop reading if reaches invalid address
if (!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr) ||
!VideoCore::g_memory->IsValidPhysicalAddress(current_paddr + tile_size)) {
LOG_ERROR(Render_OpenGL, "Out of bound texture");
break;
}
MortonCopyTile<morton_to_gl, format>(stride, tile_buffer, gl_buffer);
tile_buffer += tile_size;
current_paddr += tile_size;
glbuf_next_tile();
}
if (end > std::max(aligned_start, aligned_end) && !morton_to_gl) {
std::array<u8, tile_size> tmp_buf;
MortonCopyTile<morton_to_gl, format>(stride, &tmp_buf[0], gl_buffer);
std::memcpy(tile_buffer, &tmp_buf[0], end - aligned_end);
}
}
static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> morton_to_gl_fns = {
MortonCopy<true, PixelFormat::RGBA8>, // 0
MortonCopy<true, PixelFormat::RGB8>, // 1
MortonCopy<true, PixelFormat::RGB5A1>, // 2
MortonCopy<true, PixelFormat::RGB565>, // 3
MortonCopy<true, PixelFormat::RGBA4>, // 4
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr, // 5 - 13
MortonCopy<true, PixelFormat::D16>, // 14
nullptr, // 15
MortonCopy<true, PixelFormat::D24>, // 16
MortonCopy<true, PixelFormat::D24S8> // 17
};
static constexpr std::array<void (*)(u32, u32, u8*, PAddr, PAddr, PAddr), 18> gl_to_morton_fns = {
MortonCopy<false, PixelFormat::RGBA8>, // 0
MortonCopy<false, PixelFormat::RGB8>, // 1
MortonCopy<false, PixelFormat::RGB5A1>, // 2
MortonCopy<false, PixelFormat::RGB565>, // 3
MortonCopy<false, PixelFormat::RGBA4>, // 4
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr, // 5 - 13
MortonCopy<false, PixelFormat::D16>, // 14
nullptr, // 15
MortonCopy<false, PixelFormat::D24>, // 16
MortonCopy<false, PixelFormat::D24S8> // 17
};
} // namespace OpenGL

View File

@ -0,0 +1,194 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <string_view>
#include "core/hw/gpu.h"
#include "video_core/regs_framebuffer.h"
#include "video_core/regs_texturing.h"
namespace VideoCore {
enum class PixelFormat : u8 {
// First 5 formats are shared between textures and color buffers
RGBA8 = 0,
RGB8 = 1,
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
// Texture-only formats
IA8 = 5,
RG8 = 6,
I8 = 7,
A8 = 8,
IA4 = 9,
I4 = 10,
A4 = 11,
ETC1 = 12,
ETC1A4 = 13,
// Depth buffer-only formats
D16 = 14,
D24 = 16,
D24S8 = 17,
Invalid = 255,
};
enum class SurfaceType {
Color = 0,
Texture = 1,
Depth = 2,
DepthStencil = 3,
Fill = 4,
Invalid = 5
};
constexpr std::string_view PixelFormatAsString(PixelFormat format) {
switch (format) {
case PixelFormat::RGBA8:
return "RGBA8";
case PixelFormat::RGB8:
return "RGB8";
case PixelFormat::RGB5A1:
return "RGB5A1";
case PixelFormat::RGB565:
return "RGB565";
case PixelFormat::RGBA4:
return "RGBA4";
case PixelFormat::IA8:
return "IA8";
case PixelFormat::RG8:
return "RG8";
case PixelFormat::I8:
return "I8";
case PixelFormat::A8:
return "A8";
case PixelFormat::IA4:
return "IA4";
case PixelFormat::I4:
return "I4";
case PixelFormat::A4:
return "A4";
case PixelFormat::ETC1:
return "ETC1";
case PixelFormat::ETC1A4:
return "ETC1A4";
case PixelFormat::D16:
return "D16";
case PixelFormat::D24:
return "D24";
case PixelFormat::D24S8:
return "D24S8";
default:
return "NotReal";
}
}
constexpr PixelFormat PixelFormatFromTextureFormat(Pica::TexturingRegs::TextureFormat format) {
const u32 format_index = static_cast<u32>(format);
return (format_index < 14) ? static_cast<PixelFormat>(format) : PixelFormat::Invalid;
}
constexpr PixelFormat PixelFormatFromColorFormat(Pica::FramebufferRegs::ColorFormat format) {
const u32 format_index = static_cast<u32>(format);
return (format_index < 5) ? static_cast<PixelFormat>(format) : PixelFormat::Invalid;
}
constexpr PixelFormat PixelFormatFromDepthFormat(Pica::FramebufferRegs::DepthFormat format) {
const u32 format_index = static_cast<u32>(format);
return (format_index < 4) ? static_cast<PixelFormat>(format_index + 14) : PixelFormat::Invalid;
}
constexpr PixelFormat PixelFormatFromGPUPixelFormat(GPU::Regs::PixelFormat format) {
const u32 format_index = static_cast<u32>(format);
switch (format) {
// RGB565 and RGB5A1 are switched in PixelFormat compared to ColorFormat
case GPU::Regs::PixelFormat::RGB565:
return PixelFormat::RGB565;
case GPU::Regs::PixelFormat::RGB5A1:
return PixelFormat::RGB5A1;
default:
return (format_index < 5) ? static_cast<PixelFormat>(format) : PixelFormat::Invalid;
}
}
constexpr SurfaceType GetFormatType(PixelFormat pixel_format) {
const u32 format_index = static_cast<u32>(pixel_format);
if (format_index < 5) {
return SurfaceType::Color;
}
if (format_index < 14) {
return SurfaceType::Texture;
}
if (pixel_format == PixelFormat::D16 || pixel_format == PixelFormat::D24) {
return SurfaceType::Depth;
}
if (pixel_format == PixelFormat::D24S8) {
return SurfaceType::DepthStencil;
}
return SurfaceType::Invalid;
}
constexpr bool CheckFormatsBlittable(PixelFormat source_format, PixelFormat dest_format) {
SurfaceType source_type = GetFormatType(source_format);
SurfaceType dest_type = GetFormatType(dest_format);
if ((source_type == SurfaceType::Color || source_type == SurfaceType::Texture) &&
(dest_type == SurfaceType::Color || dest_type == SurfaceType::Texture)) {
return true;
}
if (source_type == SurfaceType::Depth && dest_type == SurfaceType::Depth) {
return true;
}
if (source_type == SurfaceType::DepthStencil && dest_type == SurfaceType::DepthStencil) {
return true;
}
return false;
}
constexpr u32 GetFormatBpp(PixelFormat format) {
switch (format) {
case PixelFormat::RGBA8:
case PixelFormat::D24S8:
return 32;
case PixelFormat::RGB8:
case PixelFormat::D24:
return 24;
case PixelFormat::RGB5A1:
case PixelFormat::RGB565:
case PixelFormat::RGBA4:
case PixelFormat::IA8:
case PixelFormat::RG8:
case PixelFormat::D16:
return 16;
case PixelFormat::I8:
case PixelFormat::A8:
case PixelFormat::IA4:
case PixelFormat::ETC1A4:
return 8;
case PixelFormat::I4:
case PixelFormat::A4:
case PixelFormat::ETC1:
return 4;
default:
return 0;
}
}
constexpr u32 GetBytesPerPixel(PixelFormat format) {
// OpenGL needs 4 bpp alignment for D24 since using GL_UNSIGNED_INT as type
if (format == PixelFormat::D24 || GetFormatType(format) == SurfaceType::Texture) {
return 4;
}
return GetFormatBpp(format) / 8;
}
} // namespace OpenGL

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,226 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <bit>
#include <numeric>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/assert.h"
#include "common/common_types.h"
namespace VideoCore {
struct SlotId {
static constexpr u32 INVALID_INDEX = std::numeric_limits<u32>::max();
constexpr auto operator<=>(const SlotId&) const noexcept = default;
constexpr explicit operator bool() const noexcept {
return index != INVALID_INDEX;
}
u32 index = INVALID_INDEX;
};
template <class T>
requires std::is_nothrow_move_assignable_v<T> && std::is_nothrow_move_constructible_v<T>
class SlotVector {
public:
class Iterator {
friend SlotVector<T>;
public:
constexpr Iterator() = default;
Iterator& operator++() noexcept {
const u64* const bitset = slot_vector->stored_bitset.data();
const u32 size = static_cast<u32>(slot_vector->stored_bitset.size()) * 64;
if (id.index < size) {
do {
++id.index;
} while (id.index < size && !IsValid(bitset));
if (id.index == size) {
id.index = SlotId::INVALID_INDEX;
}
}
return *this;
}
Iterator operator++(int) noexcept {
const Iterator copy{*this};
++*this;
return copy;
}
bool operator==(const Iterator& other) const noexcept {
return id.index == other.id.index;
}
bool operator!=(const Iterator& other) const noexcept {
return id.index != other.id.index;
}
std::pair<SlotId, T*> operator*() const noexcept {
return {id, std::addressof((*slot_vector)[id])};
}
T* operator->() const noexcept {
return std::addressof((*slot_vector)[id]);
}
private:
Iterator(SlotVector<T>* slot_vector_, SlotId id_) noexcept
: slot_vector{slot_vector_}, id{id_} {}
bool IsValid(const u64* bitset) const noexcept {
return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0;
}
SlotVector<T>* slot_vector;
SlotId id;
};
~SlotVector() noexcept {
size_t index = 0;
for (u64 bits : stored_bitset) {
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
if ((bits & 1) != 0) {
values[index + bit].object.~T();
}
}
index += 64;
}
delete[] values;
}
[[nodiscard]] T& operator[](SlotId id) noexcept {
ValidateIndex(id);
return values[id.index].object;
}
[[nodiscard]] const T& operator[](SlotId id) const noexcept {
ValidateIndex(id);
return values[id.index].object;
}
template <typename... Args>
[[nodiscard]] SlotId insert(Args&&... args) noexcept {
const u32 index = FreeValueIndex();
new (&values[index].object) T(std::forward<Args>(args)...);
SetStorageBit(index);
return SlotId{index};
}
void erase(SlotId id) noexcept {
values[id.index].object.~T();
free_list.push_back(id.index);
ResetStorageBit(id.index);
}
[[nodiscard]] Iterator begin() noexcept {
const auto it = std::find_if(stored_bitset.begin(), stored_bitset.end(),
[](u64 value) { return value != 0; });
if (it == stored_bitset.end()) {
return end();
}
const u32 word_index = static_cast<u32>(std::distance(it, stored_bitset.begin()));
const SlotId first_id{word_index * 64 + static_cast<u32>(std::countr_zero(*it))};
return Iterator(this, first_id);
}
[[nodiscard]] Iterator end() noexcept {
return Iterator(this, SlotId{SlotId::INVALID_INDEX});
}
private:
struct NonTrivialDummy {
NonTrivialDummy() noexcept {}
};
union Entry {
Entry() noexcept : dummy{} {}
~Entry() noexcept {}
NonTrivialDummy dummy;
T object;
};
void SetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] |= u64(1) << (index % 64);
}
void ResetStorageBit(u32 index) noexcept {
stored_bitset[index / 64] &= ~(u64(1) << (index % 64));
}
bool ReadStorageBit(u32 index) noexcept {
return ((stored_bitset[index / 64] >> (index % 64)) & 1) != 0;
}
void ValidateIndex(SlotId id) const noexcept {
DEBUG_ASSERT(id);
DEBUG_ASSERT(id.index / 64 < stored_bitset.size());
DEBUG_ASSERT(((stored_bitset[id.index / 64] >> (id.index % 64)) & 1) != 0);
}
[[nodiscard]] u32 FreeValueIndex() noexcept {
if (free_list.empty()) {
Reserve(values_capacity ? (values_capacity << 1) : 1);
}
const u32 free_index = free_list.back();
free_list.pop_back();
return free_index;
}
void Reserve(size_t new_capacity) noexcept {
Entry* const new_values = new Entry[new_capacity];
size_t index = 0;
for (u64 bits : stored_bitset) {
for (size_t bit = 0; bits; ++bit, bits >>= 1) {
const size_t i = index + bit;
if ((bits & 1) == 0) {
continue;
}
T& old_value = values[i].object;
new (&new_values[i].object) T(std::move(old_value));
old_value.~T();
}
index += 64;
}
stored_bitset.resize((new_capacity + 63) / 64);
const size_t old_free_size = free_list.size();
free_list.resize(old_free_size + (new_capacity - values_capacity));
std::iota(free_list.begin() + old_free_size, free_list.end(),
static_cast<u32>(values_capacity));
delete[] values;
values = new_values;
values_capacity = new_capacity;
}
Entry* values = nullptr;
size_t values_capacity = 0;
std::vector<u64> stored_bitset;
std::vector<u32> free_list;
};
} // namespace VideoCore
template <>
struct std::hash<VideoCore::SlotId> {
size_t operator()(const VideoCore::SlotId& id) const noexcept {
return std::hash<u32>{}(id.index);
}
};

View File

@ -0,0 +1,32 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/transform_cache/surface.h"
#include "video_core/transform_cache/utils.h"
namespace VideoCore {
Surface::Surface(const SurfaceInfo& info)
: info(info), mip_level_offsets(CalculateMipLevelOffsets(info)) {
}
[[nodiscard]] std::optional<u32> Surface::IsMipLevel(PAddr other_addr) {
const u32 offset = other_addr - info.addr;
if (other_addr < info.addr || offset > info.byte_size) {
return std::nullopt;
}
// Check if the address is referencing a mip level
const auto end = mip_level_offsets.begin() + info.levels;
const auto it = std::find(mip_level_offsets.begin(), end, offset);
if (it == end) {
return std::nullopt;
}
return *it;
}
} // namespace VideoCore

View File

@ -0,0 +1,140 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <algorithm>
#include <span>
#include <optional>
#include <type_traits>
#include "common/hash.h"
#include "video_core/transform_cache/pixel_format.h"
#include "video_core/transform_cache/types.h"
#include "video_core/transform_cache/slot_vector.h"
namespace VideoCore {
constexpr u32 MAX_PICA_LEVELS = 8;
using SurfaceId = SlotId;
using SurfaceViewId = SlotId;
using SurfaceAllocId = SlotId;
using FramebufferId = SlotId;
enum class SurfaceFlagBits : u32 {
AcceleratedUpload = 1 << 0, ///< Upload can be accelerated in the GPU
RequiresConvertion = 1 << 1, ///< Guest format is not supported natively and it has to be converted
GPUInvalidated = 1 << 2, ///< Contents have been modified from the host GPU
CPUInvalidated = 1 << 3, ///< Contents have been modified from the guest CPU
Tracked = 1 << 4, ///< Writes and reads are being hooked from the CPU JIT
Registered = 1 << 5, ///< True when the image is registered
Picked = 1 << 6, ///< Temporary flag to mark the image as picked
};
DECLARE_ENUM_FLAG_OPERATORS(SurfaceFlagBits);
struct SurfaceInfo {
auto operator<=>(const SurfaceInfo& other) const noexcept = default;
VAddr addr = 0;
u32 byte_size = 0;
VAddr addr_end = 0;
PixelFormat format = PixelFormat::Invalid;
u32 levels = 1;
bool is_tiled = false;
/**
* The size member dictates what dimentions the allocated texture will have.
* That sometimes might include padding, especially when the surface is being used
* as a framebuffer, where games commonly allocate a 256x512 buffer and only render to the
* lower 240x400 (LCD resolution) portion. This is done due to hardware limitations
* regarding texture sizes by the PICA and seems to be cheaper than rendering to the
* entire 256x512 region and downsampling it. The real_size dictates the actual size
* of the surface and is used in display transfer operations to crop the additional padding.
**/
Extent real_size{0, 0};
Extent size{0, 0};
};
struct NullSurfaceParams {};
/// Properties used to create and locate a SurfaceView
struct SurfaceViewInfo {
auto operator<=>(const SurfaceViewInfo& other) const noexcept = default;
[[nodiscard]] bool IsRenderTarget() const noexcept;
SurfaceViewType type{};
PixelFormat format{};
u32 layers = 1;
};
struct Surface {
explicit Surface(const SurfaceInfo& info);
[[nodiscard]] std::optional<u32> IsMipLevel(PAddr other_addr);
[[nodiscard]] SurfaceViewId FindView(const SurfaceViewInfo& view_info) const noexcept;
void TrackView(const SurfaceViewInfo& view_info, SurfaceViewId image_view_id);
[[nodiscard]] bool Overlaps(PAddr overlap_addr, u32 overlap_size) const noexcept {
const PAddr overlap_end = overlap_addr + overlap_size;
return info.addr < overlap_end && overlap_addr < info.addr_end;
}
SurfaceInfo info;
SurfaceFlagBits flags = SurfaceFlagBits::CPUInvalidated;
u64 modification_tick = 0;
u64 frame_tick = 0;
std::array<u32, MAX_PICA_LEVELS> mip_level_offsets{};
std::vector<SurfaceViewInfo> surface_view_infos;
std::vector<SurfaceViewId> surface_view_ids;
};
struct SurfaceView {
explicit SurfaceView(const SurfaceViewInfo& info,
const SurfaceInfo& surface_info, SurfaceId surface_id);
SurfaceId image_id{};
PixelFormat format{};
SurfaceViewType type{};
u32 layers = 1;
Extent size{0, 0};
u64 invalidation_tick = 0;
u64 modification_tick = 0;
};
/// Framebuffer properties used to lookup a framebuffer
struct RenderTargets {
constexpr auto operator<=>(const RenderTargets&) const noexcept = default;
constexpr bool Contains(std::span<const SurfaceViewId> elements) const noexcept {
const auto contains = [elements](SurfaceViewId item) {
return std::ranges::find(elements, item) != elements.end();
};
return contains(color_buffer_id) || contains(depth_buffer_id);
}
SurfaceViewId color_buffer_id;
SurfaceViewId depth_buffer_id;
Extent size;
};
} // namespace VideoCore
namespace std {
template <>
struct hash<VideoCore::RenderTargets> {
size_t operator()(const VideoCore::RenderTargets& rt) const noexcept {
return Common::ComputeHash64(&rt, sizeof(VideoCore::RenderTargets));
}
};
} // namespace std

View File

@ -0,0 +1,57 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <compare>
#include "common/common_types.h"
namespace VideoCore {
enum class SurfaceViewType : u32 {
e2D,
eCube,
eShadow2D,
eProjection,
eShadowCube
};
struct Offset {
constexpr auto operator<=>(const Offset&) const noexcept = default;
s32 x = 0;
s32 y = 0;
};
struct Extent {
constexpr auto operator<=>(const Extent&) const noexcept = default;
u32 width = 1;
u32 height = 1;
};
struct SurfaceCopy {
u32 src_level;
u32 dst_level;
Offset src_offset;
Offset dst_offset;
Extent extent;
};
struct BufferSurfaceCopy {
u32 buffer_offset;
u32 buffer_size;
u32 buffer_row_length;
u32 buffer_image_height;
u32 texture_level;
Offset texture_offset;
Extent texture_extent;
};
struct BufferCopy {
u32 src_offset;
u32 dst_offset;
u32 size;
};
} // namespace VideoCore

View File

@ -0,0 +1,120 @@
// Copyright 2022 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <utility>
#include "video_core/transform_cache/surface.h"
#include "video_core/transform_cache/pixel_format.h"
namespace VideoCore {
enum class MatchFlags {
Invalid = 1 << 0, ///< Candidate is allowed to be invalid
Exact = 1 << 1, ///< Candidate must match image exactly
SubRect = 1 << 2, ///< Candidate is fully encompassed by image
Copy = 1 << 3, ///< Candidate can be used as a copy source
Expand = 1 << 4, ///< Candidate fully encompasses image
TexCopy = 1 << 5 ///< Candidate can be used for texture/display transfer
};
DECLARE_ENUM_FLAG_OPERATORS(MatchFlags);
[[nodiscard]] constexpr bool IsBlockAligned(u32 size, const Surface& surface) {
// Morton tiled imaged are block instead of pixel aligned
const u32 pixels = surface.info.is_tiled ? 64 : 1;
return (size % (pixels * GetFormatBpp(surface.info.format)) / 8) == 0;
}
[[nodiscard]] constexpr u32 PixelsInBytes(u32 size, PixelFormat format) {
return size * 8 / GetFormatBpp(format);
}
[[nodiscard]] constexpr u32 BytesInPixels(u32 pixels, PixelFormat format) {
return pixels * GetFormatBpp(format) / 8;
}
[[nodiscard]] constexpr auto MakeSurfaceCopyInfosFromTransferConfig(
const GPU::Regs::DisplayTransferConfig& config) -> std::pair<SurfaceInfo, SurfaceInfo> {
using ScalingMode = GPU::Regs::DisplayTransferConfig::ScalingMode;
const SurfaceInfo source_info = {
.format = PixelFormatFromGPUPixelFormat(config.input_format),
.size = Extent{config.output_width, config.output_height},
.is_tiled = !config.input_linear
};
const u32 dest_width = config.scaling != ScalingMode::NoScale ? config.output_width.Value() / 2
: config.output_width.Value();
const u32 dest_height = config.scaling == ScalingMode::ScaleXY ? config.output_height.Value() / 2
: config.output_height.Value();
const SurfaceInfo dest_info = {
.format = PixelFormatFromGPUPixelFormat(config.output_format),
.size = Extent{dest_width, dest_height},
.is_tiled = config.input_linear != config.dont_swizzle
};
return std::make_pair(source_info, dest_info);
}
[[nodiscard]] constexpr auto CalculateMipLevelOffsets(const SurfaceInfo& info) noexcept
-> std::array<u32, MAX_PICA_LEVELS> {
ASSERT(info.levels <= MAX_PICA_LEVELS);
const u32 bytes_per_pixel = GetBytesPerPixel(info.format);
u32 width = info.size.width;
u32 height = info.size.height;
std::array<u32, MAX_PICA_LEVELS> offsets{};
u32 offset = 0;
for (s32 level = 0; level < info.levels; level++) {
offsets[level] = offset;
offset += width * height * bytes_per_pixel;
width >>= 1;
height >>= 1;
}
return offsets;
}
[[nodiscard]] constexpr u32 CalculateSurfaceSize(const SurfaceInfo& info) noexcept {
const u32 bytes_per_pixel = GetBytesPerPixel(info.format);
u32 width = info.size.width;
u32 height = info.size.height;
u32 size = 0;
for (s32 level = 0; level < info.levels; level++) {
size += width * height * bytes_per_pixel;
width >>= 1;
height >>= 1;
}
return size;
}
// Helper function used to detect a compatible copy surface
[[nodiscard]] constexpr bool CanTexCopy(const SurfaceInfo& info, const Surface& surface) {
const auto& candidate_info = surface.info;
if (candidate_info.format == PixelFormat::Invalid) {
return false;
}
const u32 copy_width = info.real_size.width;
if (info.size.width != info.real_size.width) {
const u32 stride = candidate_info.size.width;
const u32 tile_dim = candidate_info.is_tiled ? 8 : 1;
const u32 tile_stride = BytesInPixels(stride * tile_dim, candidate_info.format);
const u32 offset = info.addr - candidate_info.addr;
return IsBlockAligned(offset, surface) &&
IsBlockAligned(copy_width, surface) &&
(info.size.height == 1 || stride == tile_stride) &&
(offset % tile_stride) + copy_width <= tile_stride;
}
return true;
};
} // namespace VideoCore