morton_swizzle: Implement texture formats in UNSWIZZLE_TABLE
* I can now remove that loop that has been messing with my OCD
This commit is contained in:
@ -105,11 +105,11 @@ public:
|
||||
return cptr;
|
||||
}
|
||||
|
||||
std::span<std::byte> GetBytes(std::size_t size) {
|
||||
auto GetWriteBytes(std::size_t size) {
|
||||
return std::span{reinterpret_cast<std::byte*>(cptr), size > csize ? csize : size};
|
||||
}
|
||||
|
||||
std::span<const std::byte> GetBytes(std::size_t size) const {
|
||||
auto GetReadBytes(std::size_t size) const {
|
||||
return std::span{reinterpret_cast<const std::byte*>(cptr), size > csize ? csize : size};
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ namespace VideoCore {
|
||||
|
||||
void RasterizerAccelerated::UpdatePagesCachedCount(PAddr addr, u32 size, int delta) {
|
||||
const u32 page_start = addr >> Memory::CITRA_PAGE_BITS;
|
||||
const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS);
|
||||
const u32 page_end = ((addr + size - 1) >> Memory::CITRA_PAGE_BITS) + 1;
|
||||
|
||||
u32 uncache_start_addr = 0;
|
||||
u32 cache_start_addr = 0;
|
||||
|
@ -26,16 +26,14 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceLoad, "RasterizerCache", "Surface Loa
|
||||
void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
||||
DEBUG_ASSERT(load_start >= addr && load_end <= end);
|
||||
|
||||
// We start reading from addr instead of load_start, otherwise LookupTexture
|
||||
// in UnswizzleTexture will not sample texels correctly
|
||||
auto source_ptr = VideoCore::g_memory->GetPhysicalRef(addr);
|
||||
auto source_ptr = VideoCore::g_memory->GetPhysicalRef(load_start);
|
||||
if (!source_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto start_offset = load_start - addr;
|
||||
const auto texture_data = source_ptr.GetBytes(load_end - addr);
|
||||
const auto upload_size = texture_data.size() - start_offset;
|
||||
const auto upload_data = source_ptr.GetWriteBytes(load_end - load_start);
|
||||
const auto upload_size = static_cast<u32>(upload_data.size());
|
||||
|
||||
if (gl_buffer.empty()) {
|
||||
gl_buffer.resize(width * height * GetBytesPerPixel(pixel_format));
|
||||
@ -46,8 +44,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
||||
if (!is_tiled) {
|
||||
ASSERT(type == SurfaceType::Color);
|
||||
|
||||
auto upload_data = texture_data.subspan(start_offset, upload_size);
|
||||
auto dest_buffer = std::span{gl_buffer}.subspan(start_offset, upload_size);
|
||||
const auto dest_buffer = std::span{gl_buffer.begin() + start_offset, upload_size};
|
||||
if (pixel_format == PixelFormat::RGBA8 && GLES) {
|
||||
Pica::Texture::ConvertABGRToRGBA(upload_data, dest_buffer);
|
||||
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
||||
@ -56,7 +53,7 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
||||
std::memcpy(dest_buffer.data(), upload_data.data(), upload_size);
|
||||
}
|
||||
} else {
|
||||
UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer);
|
||||
UnswizzleTexture(*this, start_offset, upload_data, gl_buffer);
|
||||
}
|
||||
}
|
||||
|
||||
@ -64,15 +61,14 @@ MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Fl
|
||||
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
||||
DEBUG_ASSERT(flush_start >= addr && flush_end <= end);
|
||||
|
||||
auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(addr);
|
||||
auto dest_ptr = VideoCore::g_memory->GetPhysicalRef(flush_start);
|
||||
if (!dest_ptr) [[unlikely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto start_offset = flush_start - addr;
|
||||
const auto end_offset = flush_end - addr;
|
||||
const auto texture_data = dest_ptr.GetBytes(flush_end - addr);
|
||||
const auto download_size = texture_data.size() - start_offset;
|
||||
const auto download_dest = dest_ptr.GetWriteBytes(flush_end - flush_start);
|
||||
const auto download_size = static_cast<u32>(download_dest.size());
|
||||
|
||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceFlush);
|
||||
|
||||
@ -84,9 +80,9 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
||||
std::memcpy(backup_data.data(), &dest_ptr[coarse_start_offset], backup_bytes);
|
||||
}
|
||||
|
||||
for (u32 offset = coarse_start_offset; offset < end_offset; offset += fill_size) {
|
||||
for (u32 offset = coarse_start_offset; offset < download_size; offset += fill_size) {
|
||||
std::memcpy(&dest_ptr[offset], &fill_data[0],
|
||||
std::min(fill_size, end_offset - offset));
|
||||
std::min(fill_size, download_size - offset));
|
||||
}
|
||||
|
||||
if (backup_bytes)
|
||||
@ -94,17 +90,16 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
||||
} else if (!is_tiled) {
|
||||
ASSERT(type == SurfaceType::Color);
|
||||
|
||||
auto dest_buffer = texture_data.subspan(start_offset, download_size);
|
||||
auto download_data = std::span{gl_buffer}.subspan(start_offset, download_size);
|
||||
const auto download_data = std::span{gl_buffer.begin() + start_offset, download_size};
|
||||
if (pixel_format == PixelFormat::RGBA8 && GLES) {
|
||||
Pica::Texture::ConvertABGRToRGBA(gl_buffer, download_data);
|
||||
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
||||
Pica::Texture::ConvertBGRToRGB(gl_buffer, download_data);
|
||||
} else {
|
||||
std::memcpy(dest_buffer.data(), download_data.data(), download_size);
|
||||
std::memcpy(download_dest.data(), download_data.data(), download_size);
|
||||
}
|
||||
} else {
|
||||
SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data);
|
||||
SwizzleTexture(*this, start_offset, gl_buffer, download_dest);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,92 +5,179 @@
|
||||
#pragma once
|
||||
#include <span>
|
||||
#include <bit>
|
||||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "core/memory.h"
|
||||
#include "common/color.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/texture/etc1.h"
|
||||
#include "video_core/utils.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
inline u32 MakeInt(std::span<std::byte> bytes) {
|
||||
u32 integer{};
|
||||
std::memcpy(&integer, bytes.data(), sizeof(u32));
|
||||
template <typename T>
|
||||
inline T MakeInt(const std::byte* bytes) {
|
||||
T integer{};
|
||||
std::memcpy(&integer, bytes, sizeof(T));
|
||||
|
||||
return integer;
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
inline void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &d24s8, sizeof(u32));
|
||||
} else if constexpr (format == PixelFormat::IA8) {
|
||||
std::memset(dest, static_cast<int>(source[1]), 3);
|
||||
dest[3] = source[0];
|
||||
} else if constexpr (format == PixelFormat::RG8) {
|
||||
const auto rgba = Color::DecodeRG8(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, rgba.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::I8) {
|
||||
std::memset(dest, static_cast<int>(source[0]), 3);
|
||||
dest[3] = std::byte{255};
|
||||
} else if constexpr (format == PixelFormat::A8) {
|
||||
std::memset(dest, 0, 3);
|
||||
dest[3] = source[0];
|
||||
} else if constexpr (format == PixelFormat::IA4) {
|
||||
const u8 ia4 = static_cast<const u8>(source[0]);
|
||||
std::memset(dest, Color::Convert4To8(ia4 >> 4), 3);
|
||||
dest[3] = std::byte{Color::Convert4To8(ia4 & 0xF)};
|
||||
} else if (format == PixelFormat::RGBA8 && GLES) {
|
||||
const u32 abgr = MakeInt<u32>(source);
|
||||
const u32 rgba = std::byteswap(abgr);
|
||||
std::memcpy(dest, &rgba, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGB8 && GLES) {
|
||||
dest[0] = source[2];
|
||||
dest[1] = source[1];
|
||||
dest[2] = source[0];
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
inline void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
||||
const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
|
||||
const u8 pixel = Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
|
||||
|
||||
if constexpr (format == PixelFormat::I4) {
|
||||
std::memset(dest_pixel, static_cast<int>(pixel), 3);
|
||||
dest_pixel[3] = std::byte{255};
|
||||
} else {
|
||||
std::memset(dest_pixel, 0, 3);
|
||||
dest_pixel[3] = std::byte{pixel};
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
inline void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
constexpr u32 subtile_width = 4;
|
||||
constexpr u32 subtile_height = 4;
|
||||
constexpr bool has_alpha = format == PixelFormat::ETC1A4;
|
||||
constexpr std::size_t subtile_size = has_alpha ? 16 : 8;
|
||||
|
||||
const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height);
|
||||
x %= subtile_width;
|
||||
y %= subtile_height;
|
||||
|
||||
const std::byte* subtile_ptr = source_tile + subtile_index * subtile_size;
|
||||
|
||||
u8 alpha = 255;
|
||||
if constexpr (has_alpha) {
|
||||
u64_le packed_alpha;
|
||||
std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64));
|
||||
subtile_ptr += sizeof(u64);
|
||||
|
||||
alpha = Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF);
|
||||
}
|
||||
|
||||
const u64_le subtile_data = MakeInt<u64_le>(subtile_ptr);
|
||||
const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y);
|
||||
|
||||
// Copy the uncompressed pixel to the destination
|
||||
std::memcpy(dest_pixel, rgb.AsArray(), 3);
|
||||
dest_pixel[3] = std::byte{alpha};
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
inline void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &s8d24, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGBA8 && GLES) {
|
||||
const u32 abgr = std::byteswap(MakeInt<u32>(source));
|
||||
std::memcpy(dest, &abgr, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGB8 && GLES) {
|
||||
dest[0] = source[2];
|
||||
dest[1] = source[1];
|
||||
dest[2] = source[0];
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::span<std::byte> linear_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 linear_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
||||
constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4;
|
||||
|
||||
for (u32 y = 0; y < 8; y++) {
|
||||
for (u32 x = 0; x < 8; x++) {
|
||||
const u32 tile_offset = VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
|
||||
const u32 linear_offset = ((7 - y) * stride + x) * linear_bytes_per_pixel;
|
||||
auto tile_pixel = tile_buffer.subspan(tile_offset, bytes_per_pixel);
|
||||
auto linear_pixel = linear_buffer.subspan(linear_offset, linear_bytes_per_pixel);
|
||||
|
||||
const auto tiled_pixel = tile_buffer.subspan(VideoCore::MortonInterleave(x, y) * bytes_per_pixel,
|
||||
bytes_per_pixel);
|
||||
const auto linear_pixel = linear_buffer.subspan(((7 - y) * stride + x) * linear_bytes_per_pixel,
|
||||
linear_bytes_per_pixel);
|
||||
if constexpr (morton_to_linear) {
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 s8d24 = MakeInt(tile_pixel);
|
||||
const u32 d24s8 = std::rotl(s8d24, 8);
|
||||
std::memcpy(linear_pixel.data(), &d24s8, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGBA8 && GLES) {
|
||||
const u32 abgr = MakeInt(tile_pixel);
|
||||
const u32 rgba = std::byteswap(abgr);
|
||||
std::memcpy(linear_pixel.data(), &rgba, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGB8 && GLES) {
|
||||
std::memcpy(linear_pixel.data(), tile_pixel.data(), 3);
|
||||
std::swap(linear_pixel[0], linear_pixel[2]);
|
||||
if constexpr (is_compressed) {
|
||||
DecodePixelETC1<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else if constexpr (is_4bit) {
|
||||
DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else {
|
||||
std::memcpy(linear_pixel.data(), tile_pixel.data(), bytes_per_pixel);
|
||||
DecodePixel<format>(tiled_pixel.data(), linear_pixel.data());
|
||||
}
|
||||
} else {
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 d24s8 = MakeInt(linear_pixel);
|
||||
const u32 s8d24 = std::rotr(d24s8, 8);
|
||||
std::memcpy(tile_pixel.data(), &s8d24, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGBA8 && GLES) {
|
||||
const u32 rgba = MakeInt(linear_pixel);
|
||||
const u32 abgr = std::byteswap(rgba);
|
||||
std::memcpy(tile_pixel.data(), &abgr, sizeof(u32));
|
||||
} else if (format == PixelFormat::RGB8 && GLES) {
|
||||
std::memcpy(tile_pixel.data(), linear_pixel.data(), 3);
|
||||
std::swap(tile_pixel[0], tile_pixel[2]);
|
||||
} else {
|
||||
std::memcpy(tile_pixel.data(), linear_pixel.data(), bytes_per_pixel);
|
||||
}
|
||||
EncodePixel<format>(linear_pixel.data(), tiled_pixel.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer,
|
||||
PAddr base, PAddr start, PAddr end) {
|
||||
static void MortonCopy(u32 stride, u32 height, u32 start_offset,
|
||||
std::span<std::byte> linear_buffer,
|
||||
std::span<std::byte> tiled_buffer) {
|
||||
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||
|
||||
constexpr u32 tile_size = bytes_per_pixel * 64;
|
||||
// We could use bytes_per_pixel here but it should be avoided because it
|
||||
// becomes zero for 4-bit textures!
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
|
||||
const u32 end_offset = start_offset + static_cast<u32>(tiled_buffer.size());
|
||||
|
||||
// This only applies for D24 format, by shifting the span one byte all pixels
|
||||
// are written properly without byteswap
|
||||
u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
|
||||
// Does this line have any significance?
|
||||
//u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
|
||||
u32 linear_offset = 0;
|
||||
u32 tiled_offset = 0;
|
||||
|
||||
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
|
||||
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
|
||||
PAddr aligned_end = base + Common::AlignDown(end - base, tile_size);
|
||||
const PAddr aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
||||
const PAddr aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
||||
PAddr aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
||||
|
||||
ASSERT(!morton_to_linear || (aligned_start == start && aligned_end == end));
|
||||
ASSERT(!morton_to_linear || (aligned_start_offset == start_offset && aligned_end_offset == end_offset));
|
||||
|
||||
const u32 begin_pixel_index = (aligned_down_start - base) / bytes_per_pixel;
|
||||
const u32 begin_pixel_index = aligned_down_start_offset * 8 / GetFormatBpp(format);
|
||||
u32 x = (begin_pixel_index % (stride * 8)) / 8;
|
||||
u32 y = (begin_pixel_index / (stride * 8)) * 8;
|
||||
|
||||
@ -112,29 +199,21 @@ static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffe
|
||||
}
|
||||
};
|
||||
|
||||
// If during a texture download the start coordinate is inside a tile, swizzle
|
||||
// the tile to a temporary buffer and copy the part we are interested in
|
||||
if (start < aligned_start && !morton_to_linear) {
|
||||
// If during a texture download the start coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (start_offset < aligned_start_offset && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
|
||||
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
|
||||
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start - aligned_down_start,
|
||||
std::min(aligned_start, end) - start);
|
||||
|
||||
tiled_offset += aligned_start - start;
|
||||
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
|
||||
std::min(aligned_start_offset, end_offset) - start_offset);
|
||||
|
||||
tiled_offset += aligned_start_offset - start_offset;
|
||||
linear_next_tile();
|
||||
}
|
||||
|
||||
// Pokemon Super Mystery Dungeon will try to use textures that go beyond
|
||||
// the end address of VRAM. Clamp the address to the end of VRAM if that happens
|
||||
// TODO: Move this to the rasterizer cache
|
||||
if (const u32 clamped_end = VideoCore::g_memory->ClampPhysicalAddress(aligned_start, aligned_end);
|
||||
clamped_end != aligned_end) {
|
||||
LOG_ERROR(Render_OpenGL, "Out of bound texture read address {:#x}, clamping to {:#x}", aligned_end, clamped_end);
|
||||
aligned_end = clamped_end;
|
||||
}
|
||||
|
||||
const u32 buffer_end = tiled_offset + aligned_end - aligned_start;
|
||||
const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
|
||||
while (tiled_offset < buffer_end) {
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
|
||||
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
||||
@ -143,15 +222,17 @@ static void MortonCopy(u32 stride, u32 height, std::span<std::byte> linear_buffe
|
||||
linear_next_tile();
|
||||
}
|
||||
|
||||
if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) {
|
||||
// If during a texture download the end coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_size);
|
||||
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
|
||||
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end - aligned_end);
|
||||
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), end_offset - aligned_end_offset);
|
||||
}
|
||||
}
|
||||
|
||||
using MortonFunc = void (*)(u32, u32, std::span<std::byte>, std::span<std::byte>, PAddr, PAddr, PAddr);
|
||||
using MortonFunc = void (*)(u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
||||
@ -159,15 +240,15 @@ static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||
MortonCopy<true, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4>, // 4
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr, // 5 - 13
|
||||
MortonCopy<true, PixelFormat::IA8>, // 5
|
||||
MortonCopy<true, PixelFormat::RG8>, // 6
|
||||
MortonCopy<true, PixelFormat::I8>, // 7
|
||||
MortonCopy<true, PixelFormat::A8>, // 8
|
||||
MortonCopy<true, PixelFormat::IA4>, // 9
|
||||
MortonCopy<true, PixelFormat::I4>, // 10
|
||||
MortonCopy<true, PixelFormat::A4>, // 11
|
||||
MortonCopy<true, PixelFormat::ETC1>, // 12
|
||||
MortonCopy<true, PixelFormat::ETC1A4>, // 13
|
||||
MortonCopy<true, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<true, PixelFormat::D24>, // 16
|
||||
|
@ -180,7 +180,7 @@ constexpr u32 GetFormatBpp(PixelFormat format) {
|
||||
case PixelFormat::ETC1:
|
||||
return 4;
|
||||
default:
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,11 +5,13 @@
|
||||
#pragma once
|
||||
#include <glad/glad.h>
|
||||
#include "common/assert.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/texture/texture_decode.h"
|
||||
#include "video_core/rasterizer_cache/morton_swizzle.h"
|
||||
#include "video_core/rasterizer_cache/surface_params.h"
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_opengl/gl_vars.h"
|
||||
#include "video_core/video_core.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
@ -57,45 +59,18 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
|
||||
return tex_tuple;
|
||||
}
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
|
||||
|
||||
// TODO: Move memory access out of the morton function
|
||||
SwizzleImpl(params.stride, params.height, source_linear, dest_tiled, params.addr, flush_start, flush_end);
|
||||
SwizzleImpl(params.stride, params.height, start_offset, source_linear, dest_tiled);
|
||||
}
|
||||
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
|
||||
// TODO: Integrate this to UNSWIZZLE_TABLE
|
||||
if (params.type == SurfaceType::Texture) {
|
||||
Pica::Texture::TextureInfo tex_info{};
|
||||
tex_info.width = params.width;
|
||||
tex_info.height = params.height;
|
||||
tex_info.format = static_cast<Pica::TexturingRegs::TextureFormat>(params.pixel_format);
|
||||
tex_info.SetDefaultStride();
|
||||
tex_info.physical_address = params.addr;
|
||||
|
||||
const SurfaceInterval load_interval(load_start, load_end);
|
||||
const auto rect = params.GetSubRect(params.FromInterval(load_interval));
|
||||
DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval);
|
||||
|
||||
const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data());
|
||||
for (u32 y = rect.bottom; y < rect.top; y++) {
|
||||
for (u32 x = rect.left; x < rect.right; x++) {
|
||||
auto vec4 =
|
||||
Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info);
|
||||
const std::size_t offset = (x + (params.width * y)) * 4;
|
||||
std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
|
||||
UnswizzleImpl(params.stride, params.height, dest_linear, source_tiled, params.addr, load_start, load_end);
|
||||
}
|
||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
|
||||
UnswizzleImpl(params.stride, params.height, start_offset, dest_linear, source_tiled);
|
||||
}
|
||||
|
||||
ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data) {
|
||||
|
@ -50,22 +50,22 @@ struct TextureCubeConfig {
|
||||
|
||||
class SurfaceParams;
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
|
||||
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
|
||||
|
||||
/**
|
||||
* Converts a morton swizzled texture to linear format.
|
||||
*
|
||||
* @param params Structure used to query the surface information.
|
||||
* @param load_start, load_end The address range to unswizzle texture data.
|
||||
* @param source_tiled The source swizzled data. The span begins at surface base address not load_start.
|
||||
* @param start_offset Is the offset at which the source_tiled span begins
|
||||
* @param source_tiled The source morton swizzled data.
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
*/
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
||||
void UnswizzleTexture(const SurfaceParams& params, u32 start_offset,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
|
||||
|
||||
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
|
Reference in New Issue
Block a user