morton_swizzle: Optimize and use std::span
This commit is contained in:
@ -34,19 +34,11 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
|||||||
const bool need_swap =
|
const bool need_swap =
|
||||||
GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);
|
GLES && (pixel_format == PixelFormat::RGBA8 || pixel_format == PixelFormat::RGB8);
|
||||||
|
|
||||||
const u8* texture_ptr = VideoCore::g_memory->GetPhysicalPointer(addr);
|
u8* texture_ptr = VideoCore::g_memory->GetPhysicalPointer(addr);
|
||||||
if (texture_ptr == nullptr) {
|
if (texture_ptr == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 byte_size = width * height * GetBytesPerPixel(pixel_format);
|
|
||||||
const auto texture_data = std::span<const std::byte>{reinterpret_cast<const std::byte*>(texture_ptr),
|
|
||||||
byte_size};
|
|
||||||
|
|
||||||
if (gl_buffer.empty()) {
|
|
||||||
gl_buffer.resize(byte_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Should probably be done in ::Memory:: and check for other regions too
|
// TODO: Should probably be done in ::Memory:: and check for other regions too
|
||||||
if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END)
|
if (load_start < Memory::VRAM_VADDR_END && load_end > Memory::VRAM_VADDR_END)
|
||||||
load_end = Memory::VRAM_VADDR_END;
|
load_end = Memory::VRAM_VADDR_END;
|
||||||
@ -54,10 +46,16 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
|||||||
if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR)
|
if (load_start < Memory::VRAM_VADDR && load_end > Memory::VRAM_VADDR)
|
||||||
load_start = Memory::VRAM_VADDR;
|
load_start = Memory::VRAM_VADDR;
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
|
||||||
|
|
||||||
ASSERT(load_start >= addr && load_end <= end);
|
ASSERT(load_start >= addr && load_end <= end);
|
||||||
|
|
||||||
const u32 start_offset = load_start - addr;
|
const u32 start_offset = load_start - addr;
|
||||||
|
const u32 byte_size = width * height * GetBytesPerPixel(pixel_format);
|
||||||
|
|
||||||
|
if (gl_buffer.empty()) {
|
||||||
|
gl_buffer.resize(byte_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(RasterizerCache_SurfaceLoad);
|
||||||
|
|
||||||
if (!is_tiled) {
|
if (!is_tiled) {
|
||||||
ASSERT(type == SurfaceType::Color);
|
ASSERT(type == SurfaceType::Color);
|
||||||
@ -66,32 +64,30 @@ void CachedSurface::LoadGLBuffer(PAddr load_start, PAddr load_end) {
|
|||||||
// cannot fully test this
|
// cannot fully test this
|
||||||
if (pixel_format == PixelFormat::RGBA8) {
|
if (pixel_format == PixelFormat::RGBA8) {
|
||||||
for (std::size_t i = start_offset; i < load_end - addr; i += 4) {
|
for (std::size_t i = start_offset; i < load_end - addr; i += 4) {
|
||||||
gl_buffer[i] = texture_ptr[i + 3];
|
gl_buffer[i] = (std::byte)texture_ptr[i + 3];
|
||||||
gl_buffer[i + 1] = texture_ptr[i + 2];
|
gl_buffer[i + 1] = (std::byte)texture_ptr[i + 2];
|
||||||
gl_buffer[i + 2] = texture_ptr[i + 1];
|
gl_buffer[i + 2] = (std::byte)texture_ptr[i + 1];
|
||||||
gl_buffer[i + 3] = texture_ptr[i];
|
gl_buffer[i + 3] = (std::byte)texture_ptr[i];
|
||||||
}
|
}
|
||||||
} else if (pixel_format == PixelFormat::RGB8) {
|
} else if (pixel_format == PixelFormat::RGB8) {
|
||||||
for (std::size_t i = start_offset; i < load_end - addr; i += 3) {
|
for (std::size_t i = start_offset; i < load_end - addr; i += 3) {
|
||||||
gl_buffer[i] = texture_ptr[i + 2];
|
gl_buffer[i] = (std::byte)texture_ptr[i + 2];
|
||||||
gl_buffer[i + 1] = texture_ptr[i + 1];
|
gl_buffer[i + 1] = (std::byte)texture_ptr[i + 1];
|
||||||
gl_buffer[i + 2] = texture_ptr[i];
|
gl_buffer[i + 2] = (std::byte)texture_ptr[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::memcpy(&gl_buffer[start_offset], texture_ptr + start_offset,
|
std::memcpy(gl_buffer.data() + start_offset, texture_ptr + start_offset, load_end - load_start);
|
||||||
load_end - load_start);
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const auto dest_data = std::span<std::byte>{reinterpret_cast<std::byte*>(gl_buffer.data()),
|
std::span<std::byte> texture_data{(std::byte*)texture_ptr, byte_size};
|
||||||
byte_size};
|
UnswizzleTexture(*this, load_start, load_end, texture_data, gl_buffer);
|
||||||
UnswizzleTexture(*this, load_start, load_end, texture_data, dest_data);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64));
|
MICROPROFILE_DEFINE(RasterizerCache_SurfaceFlush, "RasterizerCache", "Surface Flush", MP_RGB(128, 192, 64));
|
||||||
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
||||||
u8* const dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr);
|
u8* dst_buffer = VideoCore::g_memory->GetPhysicalPointer(addr);
|
||||||
if (dst_buffer == nullptr) {
|
if (dst_buffer == nullptr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -132,25 +128,24 @@ void CachedSurface::FlushGLBuffer(PAddr flush_start, PAddr flush_end) {
|
|||||||
ASSERT(type == SurfaceType::Color);
|
ASSERT(type == SurfaceType::Color);
|
||||||
if (pixel_format == PixelFormat::RGBA8 && GLES) {
|
if (pixel_format == PixelFormat::RGBA8 && GLES) {
|
||||||
for (std::size_t i = start_offset; i < flush_end - addr; i += 4) {
|
for (std::size_t i = start_offset; i < flush_end - addr; i += 4) {
|
||||||
dst_buffer[i] = gl_buffer[i + 3];
|
dst_buffer[i] = (u8)gl_buffer[i + 3];
|
||||||
dst_buffer[i + 1] = gl_buffer[i + 2];
|
dst_buffer[i + 1] = (u8)gl_buffer[i + 2];
|
||||||
dst_buffer[i + 2] = gl_buffer[i + 1];
|
dst_buffer[i + 2] = (u8)gl_buffer[i + 1];
|
||||||
dst_buffer[i + 3] = gl_buffer[i];
|
dst_buffer[i + 3] = (u8)gl_buffer[i];
|
||||||
}
|
}
|
||||||
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
} else if (pixel_format == PixelFormat::RGB8 && GLES) {
|
||||||
for (std::size_t i = start_offset; i < flush_end - addr; i += 3) {
|
for (std::size_t i = start_offset; i < flush_end - addr; i += 3) {
|
||||||
dst_buffer[i] = gl_buffer[i + 2];
|
dst_buffer[i] = (u8)gl_buffer[i + 2];
|
||||||
dst_buffer[i + 1] = gl_buffer[i + 1];
|
dst_buffer[i + 1] = (u8)gl_buffer[i + 1];
|
||||||
dst_buffer[i + 2] = gl_buffer[i];
|
dst_buffer[i + 2] = (u8)gl_buffer[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset],
|
std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset],
|
||||||
flush_end - flush_start);
|
flush_end - flush_start);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const auto source_data = std::span<std::byte>{reinterpret_cast<std::byte*>(gl_buffer.data()),
|
std::span<std::byte> texture_data{(std::byte*)dst_buffer + start_offset, byte_size};
|
||||||
byte_size};
|
SwizzleTexture(*this, flush_start, flush_end, gl_buffer, texture_data);
|
||||||
SwizzleTexture(*this, flush_start, flush_end, source_data, {});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -421,7 +416,7 @@ void CachedSurface::DownloadGLTexture(const Common::Rectangle<u32>& rect) {
|
|||||||
.region = rect
|
.region = rect
|
||||||
};
|
};
|
||||||
|
|
||||||
runtime.ReadTexture(texture, subresource, tuple, gl_buffer.data());
|
runtime.ReadTexture(texture, subresource, tuple, (u8*)gl_buffer.data());
|
||||||
}
|
}
|
||||||
|
|
||||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||||
|
@ -104,7 +104,7 @@ public:
|
|||||||
public:
|
public:
|
||||||
bool registered = false;
|
bool registered = false;
|
||||||
SurfaceRegions invalid_regions;
|
SurfaceRegions invalid_regions;
|
||||||
std::vector<u8> gl_buffer;
|
std::vector<std::byte> gl_buffer;
|
||||||
|
|
||||||
// Number of bytes to read from fill_data
|
// Number of bytes to read from fill_data
|
||||||
u32 fill_size = 0;
|
u32 fill_size = 0;
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <span>
|
||||||
|
#include <bit>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||||
@ -12,50 +14,54 @@
|
|||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
inline u32 MakeInt(std::span<std::byte> bytes) {
|
||||||
|
u32 integer{};
|
||||||
|
std::memcpy(&integer, bytes.data(), sizeof(u32));
|
||||||
|
|
||||||
|
return integer;
|
||||||
|
}
|
||||||
|
|
||||||
template <bool morton_to_linear, PixelFormat format>
|
template <bool morton_to_linear, PixelFormat format>
|
||||||
static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* linear_buffer) {
|
inline void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer, std::span<std::byte> linear_buffer) {
|
||||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
constexpr u32 linear_bytes_per_pixel = GetBytesPerPixel(format);
|
||||||
|
|
||||||
for (u32 y = 0; y < 8; y++) {
|
for (u32 y = 0; y < 8; y++) {
|
||||||
for (u32 x = 0; x < 8; x++) {
|
for (u32 x = 0; x < 8; x++) {
|
||||||
u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
|
const u32 tile_offset = VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
|
||||||
u8* linear_ptr = linear_buffer + ((7 - y) * stride + x) * aligned_bytes_per_pixel;
|
const u32 linear_offset = ((7 - y) * stride + x) * linear_bytes_per_pixel;
|
||||||
|
auto tile_pixel = tile_buffer.subspan(tile_offset, bytes_per_pixel);
|
||||||
|
auto linear_pixel = linear_buffer.subspan(linear_offset, linear_bytes_per_pixel);
|
||||||
|
|
||||||
if constexpr (morton_to_linear) {
|
if constexpr (morton_to_linear) {
|
||||||
if constexpr (format == PixelFormat::D24S8) {
|
if constexpr (format == PixelFormat::D24S8) {
|
||||||
linear_ptr[0] = tile_ptr[3];
|
const u32 s8d24 = MakeInt(tile_pixel);
|
||||||
std::memcpy(linear_ptr + 1, tile_ptr, 3);
|
const u32 d24s8 = std::rotl(s8d24, 8);
|
||||||
|
std::memcpy(linear_pixel.data(), &d24s8, sizeof(u32));
|
||||||
} else if (format == PixelFormat::RGBA8 && GLES) {
|
} else if (format == PixelFormat::RGBA8 && GLES) {
|
||||||
// because GLES does not have ABGR format
|
const u32 abgr = MakeInt(tile_pixel);
|
||||||
// so we will do byteswapping here
|
const u32 rgba = std::byteswap(abgr);
|
||||||
linear_ptr[0] = tile_ptr[3];
|
std::memcpy(linear_pixel.data(), &rgba, sizeof(u32));
|
||||||
linear_ptr[1] = tile_ptr[2];
|
|
||||||
linear_ptr[2] = tile_ptr[1];
|
|
||||||
linear_ptr[3] = tile_ptr[0];
|
|
||||||
} else if (format == PixelFormat::RGB8 && GLES) {
|
} else if (format == PixelFormat::RGB8 && GLES) {
|
||||||
linear_ptr[0] = tile_ptr[2];
|
std::memcpy(linear_pixel.data(), tile_pixel.data(), 3);
|
||||||
linear_ptr[1] = tile_ptr[1];
|
std::swap(linear_pixel[0], linear_pixel[2]);
|
||||||
linear_ptr[2] = tile_ptr[0];
|
|
||||||
} else {
|
} else {
|
||||||
std::memcpy(linear_ptr, tile_ptr, bytes_per_pixel);
|
std::memcpy(linear_pixel.data(), tile_pixel.data(), bytes_per_pixel);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if constexpr (format == PixelFormat::D24S8) {
|
if constexpr (format == PixelFormat::D24S8) {
|
||||||
std::memcpy(tile_ptr, linear_ptr + 1, 3);
|
const u32 d24s8 = MakeInt(linear_pixel);
|
||||||
tile_ptr[3] = linear_ptr[0];
|
const u32 s8d24 = std::rotr(d24s8, 8);
|
||||||
|
std::memcpy(tile_pixel.data(), &s8d24, sizeof(u32));
|
||||||
} else if (format == PixelFormat::RGBA8 && GLES) {
|
} else if (format == PixelFormat::RGBA8 && GLES) {
|
||||||
// because GLES does not have ABGR format
|
const u32 rgba = MakeInt(linear_pixel);
|
||||||
// so we will do byteswapping here
|
const u32 abgr = std::byteswap(rgba);
|
||||||
tile_ptr[0] = linear_ptr[3];
|
std::memcpy(tile_pixel.data(), &abgr, sizeof(u32));
|
||||||
tile_ptr[1] = linear_ptr[2];
|
|
||||||
tile_ptr[2] = linear_ptr[1];
|
|
||||||
tile_ptr[3] = linear_ptr[0];
|
|
||||||
} else if (format == PixelFormat::RGB8 && GLES) {
|
} else if (format == PixelFormat::RGB8 && GLES) {
|
||||||
tile_ptr[0] = linear_ptr[2];
|
std::memcpy(tile_pixel.data(), linear_pixel.data(), 3);
|
||||||
tile_ptr[1] = linear_ptr[1];
|
std::swap(tile_pixel[0], tile_pixel[2]);
|
||||||
tile_ptr[2] = linear_ptr[0];
|
|
||||||
} else {
|
} else {
|
||||||
std::memcpy(tile_ptr, linear_ptr, bytes_per_pixel);
|
std::memcpy(tile_pixel.data(), linear_pixel.data(), bytes_per_pixel);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -63,13 +69,20 @@ static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* linear_buffer) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <bool morton_to_linear, PixelFormat format>
|
template <bool morton_to_linear, PixelFormat format>
|
||||||
static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PAddr start, PAddr end) {
|
static void MortonCopy(u32 stride, u32 height,
|
||||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
std::span<std::byte> linear_buffer, std::span<std::byte> tiled_buffer,
|
||||||
constexpr u32 tile_size = bytes_per_pixel * 64;
|
PAddr base, PAddr start, PAddr end) {
|
||||||
|
|
||||||
|
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||||
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
constexpr u32 aligned_bytes_per_pixel = GetBytesPerPixel(format);
|
||||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||||
linear_buffer += aligned_bytes_per_pixel - bytes_per_pixel;
|
|
||||||
|
constexpr u32 tile_size = bytes_per_pixel * 64;
|
||||||
|
const u32 linear_tile_size = (7 * stride + 8) * aligned_bytes_per_pixel;
|
||||||
|
|
||||||
|
// This only applies for D24 format, by shifting the span one byte all pixels
|
||||||
|
// are written properly without byteswap
|
||||||
|
u32 linear_offset = aligned_bytes_per_pixel - bytes_per_pixel;
|
||||||
|
|
||||||
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
|
const PAddr aligned_down_start = base + Common::AlignDown(start - base, tile_size);
|
||||||
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
|
const PAddr aligned_start = base + Common::AlignUp(start - base, tile_size);
|
||||||
@ -84,18 +97,19 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA
|
|||||||
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
||||||
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
||||||
// the shader we read/write the linear buffer backwards
|
// the shader we read/write the linear buffer backwards
|
||||||
linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
//linear_buffer += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
||||||
|
linear_offset += ((height - 8 - y) * stride + x) * aligned_bytes_per_pixel;
|
||||||
|
|
||||||
auto linear_next_tile = [&] {
|
auto linear_next_tile = [&] {
|
||||||
x = (x + 8) % stride;
|
x = (x + 8) % stride;
|
||||||
linear_buffer += 8 * aligned_bytes_per_pixel;
|
linear_offset += 8 * aligned_bytes_per_pixel;
|
||||||
if (!x) {
|
if (!x) {
|
||||||
y = (y + 8) % height;
|
y = (y + 8) % height;
|
||||||
if (!y) {
|
if (!y) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
linear_buffer -= stride * 9 * aligned_bytes_per_pixel;
|
linear_offset -= stride * 9 * aligned_bytes_per_pixel;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -104,8 +118,10 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA
|
|||||||
// If during a texture download the start coordinate is inside a tile, swizzle
|
// If during a texture download the start coordinate is inside a tile, swizzle
|
||||||
// the tile to a temporary buffer and copy the part we are interested in
|
// the tile to a temporary buffer and copy the part we are interested in
|
||||||
if (start < aligned_start && !morton_to_linear) {
|
if (start < aligned_start && !morton_to_linear) {
|
||||||
std::array<u8, tile_size> tmp_buf;
|
std::array<std::byte, tile_size> tmp_buf;
|
||||||
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf.data(), linear_buffer);
|
std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset);
|
||||||
|
|
||||||
|
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
|
||||||
std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start,
|
std::memcpy(tile_buffer, tmp_buf.data() + start - aligned_down_start,
|
||||||
std::min(aligned_start, end) - start);
|
std::min(aligned_start, end) - start);
|
||||||
|
|
||||||
@ -124,19 +140,23 @@ static void MortonCopy(u32 stride, u32 height, u8* linear_buffer, PAddr base, PA
|
|||||||
|
|
||||||
const u8* buffer_end = tile_buffer + aligned_end - aligned_start;
|
const u8* buffer_end = tile_buffer + aligned_end - aligned_start;
|
||||||
while (tile_buffer < buffer_end) {
|
while (tile_buffer < buffer_end) {
|
||||||
MortonCopyTile<morton_to_linear, format>(stride, tile_buffer, linear_buffer);
|
std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset);
|
||||||
|
auto tiled_data = std::span<std::byte>{(std::byte*)tile_buffer, tile_size};
|
||||||
|
|
||||||
|
MortonCopyTile<morton_to_linear, format>(stride, tiled_data, linear_data);
|
||||||
tile_buffer += tile_size;
|
tile_buffer += tile_size;
|
||||||
linear_next_tile();
|
linear_next_tile();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) {
|
if (end > std::max(aligned_start, aligned_end) && !morton_to_linear) {
|
||||||
std::array<u8, tile_size> tmp_buf;
|
std::array<std::byte, tile_size> tmp_buf;
|
||||||
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf.data(), linear_buffer);
|
std::span<std::byte> linear_data = linear_buffer.last(linear_buffer.size() - linear_offset);
|
||||||
|
MortonCopyTile<morton_to_linear, format>(stride, tmp_buf, linear_data);
|
||||||
std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end);
|
std::memcpy(tile_buffer, tmp_buf.data(), end - aligned_end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
using MortonFunc = void (*)(u32, u32, u8*, PAddr, PAddr, PAddr);
|
using MortonFunc = void (*)(u32, u32, std::span<std::byte>, std::span<std::byte>, PAddr, PAddr, PAddr);
|
||||||
|
|
||||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||||
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
||||||
|
@ -58,17 +58,16 @@ const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
|
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
|
||||||
std::span<std::byte> source, std::span<std::byte> dest) {
|
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled) {
|
||||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||||
const MortonFunc swizzle = SWIZZLE_TABLE[func_index];
|
const MortonFunc SwizzleImpl = SWIZZLE_TABLE[func_index];
|
||||||
u8* source_data = reinterpret_cast<u8*>(source.data());
|
|
||||||
|
|
||||||
// TODO: Move memory access out of the morton function
|
// TODO: Move memory access out of the morton function
|
||||||
swizzle(params.stride, params.height, source_data, params.addr, flush_start, flush_end);
|
SwizzleImpl(params.stride, params.height, source_linear, dest_tiled, params.addr, flush_start, flush_end);
|
||||||
}
|
}
|
||||||
|
|
||||||
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
||||||
std::span<const std::byte> source, std::span<std::byte> dest) {
|
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear) {
|
||||||
// TODO: Integrate this to UNSWIZZLE_TABLE
|
// TODO: Integrate this to UNSWIZZLE_TABLE
|
||||||
if (params.type == SurfaceType::Texture) {
|
if (params.type == SurfaceType::Texture) {
|
||||||
Pica::Texture::TextureInfo tex_info{};
|
Pica::Texture::TextureInfo tex_info{};
|
||||||
@ -82,21 +81,19 @@ void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
|||||||
const auto rect = params.GetSubRect(params.FromInterval(load_interval));
|
const auto rect = params.GetSubRect(params.FromInterval(load_interval));
|
||||||
DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval);
|
DEBUG_ASSERT(params.FromInterval(load_interval).GetInterval() == load_interval);
|
||||||
|
|
||||||
const u8* source_data = reinterpret_cast<const u8*>(source.data());
|
const u8* source_data = reinterpret_cast<const u8*>(source_tiled.data());
|
||||||
for (u32 y = rect.bottom; y < rect.top; y++) {
|
for (u32 y = rect.bottom; y < rect.top; y++) {
|
||||||
for (u32 x = rect.left; x < rect.right; x++) {
|
for (u32 x = rect.left; x < rect.right; x++) {
|
||||||
auto vec4 =
|
auto vec4 =
|
||||||
Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info);
|
Pica::Texture::LookupTexture(source_data, x, params.height - 1 - y, tex_info);
|
||||||
const std::size_t offset = (x + (params.width * y)) * 4;
|
const std::size_t offset = (x + (params.width * y)) * 4;
|
||||||
std::memcpy(dest.data() + offset, vec4.AsArray(), 4);
|
std::memcpy(dest_linear.data() + offset, vec4.AsArray(), 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const u32 func_index = static_cast<u32>(params.pixel_format);
|
const u32 func_index = static_cast<u32>(params.pixel_format);
|
||||||
const MortonFunc deswizzle = UNSWIZZLE_TABLE[func_index];
|
const MortonFunc UnswizzleImpl = UNSWIZZLE_TABLE[func_index];
|
||||||
u8* dest_data = reinterpret_cast<u8*>(dest.data());
|
UnswizzleImpl(params.stride, params.height, dest_linear, source_tiled, params.addr, load_start, load_end);
|
||||||
|
|
||||||
deswizzle(params.stride, params.height, dest_data, params.addr, load_start, load_end);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -51,10 +51,10 @@ struct TextureCubeConfig {
|
|||||||
class SurfaceParams;
|
class SurfaceParams;
|
||||||
|
|
||||||
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
|
void SwizzleTexture(const SurfaceParams& params, u32 flush_start, u32 flush_end,
|
||||||
std::span<std::byte> source, std::span<std::byte> dest);
|
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled);
|
||||||
|
|
||||||
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
void UnswizzleTexture(const SurfaceParams& params, u32 load_start, u32 load_end,
|
||||||
std::span<const std::byte> source, std::span<std::byte> dest);
|
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear);
|
||||||
|
|
||||||
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user