video_core: De-duplicate texture format conversion logic. (#21)
* video_core: De-duplicate texture format conversion logic. * video_core: Replace std::byte with u8 and remove excess linear texture converters. * video_core: Remove implicit RGBA conversions from convert table for now, add comments explaining omissions.
This commit is contained in:
@ -52,6 +52,11 @@ namespace Common::Color {
|
||||
return value >> 2;
|
||||
}
|
||||
|
||||
/// Averages the RGB components of a color
|
||||
[[nodiscard]] constexpr u8 AverageRgbComponents(const Common::Vec4<u8>& color) {
|
||||
return (static_cast<u32>(color.r()) + color.g() + color.b()) / 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in RGBA8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
@ -115,6 +120,44 @@ namespace Common::Color {
|
||||
Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF)};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in IA8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
[[nodiscard]] inline Common::Vec4<u8> DecodeIA8(const u8* bytes) {
|
||||
return {bytes[1], bytes[1], bytes[1], bytes[0]};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in I8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
[[nodiscard]] inline Common::Vec4<u8> DecodeI8(const u8* bytes) {
|
||||
return {bytes[0], bytes[0], bytes[0], 255};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in A8 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
[[nodiscard]] inline Common::Vec4<u8> DecodeA8(const u8* bytes) {
|
||||
return {0, 0, 0, bytes[0]};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a color stored in IA4 format
|
||||
* @param bytes Pointer to encoded source color
|
||||
* @return Result color decoded as Common::Vec4<u8>
|
||||
*/
|
||||
[[nodiscard]] inline Common::Vec4<u8> DecodeIA4(const u8* bytes) {
|
||||
u8 i = Common::Color::Convert4To8((bytes[0] & 0xF0) >> 4);
|
||||
u8 a = Common::Color::Convert4To8(bytes[0] & 0x0F);
|
||||
return {i, i, i, a};
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a depth value stored in D16 format
|
||||
* @param bytes Pointer to encoded source value
|
||||
@ -176,6 +219,7 @@ inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[1] = color.r();
|
||||
bytes[0] = color.g();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a color as RGB565 format
|
||||
* @param color Source color to encode
|
||||
@ -212,6 +256,43 @@ inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
std::memcpy(bytes, &data, sizeof(data));
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a color as IA8 format
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeIA8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[1] = AverageRgbComponents(color);
|
||||
bytes[0] = color.a();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a color as I8 format
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeI8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[0] = AverageRgbComponents(color);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a color as A8 format
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeA8(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[0] = color.a();
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a color as IA4 format
|
||||
* @param color Source color to encode
|
||||
* @param bytes Destination pointer to store encoded color
|
||||
*/
|
||||
inline void EncodeIA4(const Common::Vec4<u8>& color, u8* bytes) {
|
||||
bytes[0] = (Convert8To4(AverageRgbComponents(color)) << 4) | Convert8To4(color.a());
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a 16 bit depth value as D16 format
|
||||
* @param value 16 bit source depth value to encode
|
||||
|
@ -107,11 +107,11 @@ public:
|
||||
}
|
||||
|
||||
auto GetWriteBytes(std::size_t size) {
|
||||
return std::span{reinterpret_cast<std::byte*>(cptr), size > csize ? csize : size};
|
||||
return std::span{cptr, size > csize ? csize : size};
|
||||
}
|
||||
|
||||
auto GetReadBytes(std::size_t size) const {
|
||||
return std::span{reinterpret_cast<const std::byte*>(cptr), size > csize ? csize : size};
|
||||
return std::span{cptr, size > csize ? csize : size};
|
||||
}
|
||||
|
||||
std::size_t GetSize() const {
|
||||
|
@ -28,7 +28,7 @@ add_library(video_core STATIC
|
||||
regs_texturing.h
|
||||
renderer_base.cpp
|
||||
renderer_base.h
|
||||
rasterizer_cache/morton_swizzle.h
|
||||
rasterizer_cache/texture_codec.h
|
||||
rasterizer_cache/pixel_format.cpp
|
||||
rasterizer_cache/pixel_format.h
|
||||
rasterizer_cache/rasterizer_cache.cpp
|
||||
|
@ -1,367 +0,0 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include "common/alignment.h"
|
||||
#include "common/color.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/texture/etc1.h"
|
||||
#include "video_core/utils.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
template <typename T>
|
||||
inline T MakeInt(const std::byte* bytes) {
|
||||
T integer{};
|
||||
std::memcpy(&integer, bytes, sizeof(T));
|
||||
|
||||
return integer;
|
||||
}
|
||||
|
||||
template <PixelFormat format, bool converted>
|
||||
constexpr void DecodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &d24s8, sizeof(u32));
|
||||
} else if constexpr (format == PixelFormat::RGBA8 && converted) {
|
||||
const u32 rgba = MakeInt<u32>(source);
|
||||
const u32 abgr = Common::swap32(rgba);
|
||||
std::memcpy(dest, &abgr, 4);
|
||||
} else if constexpr (format == PixelFormat::RGB8 && converted) {
|
||||
u32 rgb{};
|
||||
std::memcpy(&rgb, source, 3);
|
||||
const u32 abgr = Common::swap32(rgb << 8) | 0xFF000000;
|
||||
std::memcpy(dest, &abgr, 4);
|
||||
} else if constexpr (format == PixelFormat::RGB565 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGB565(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGB5A1 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGB5A1(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGBA4 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGBA4(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::IA8) {
|
||||
std::memset(dest, static_cast<int>(source[1]), 3);
|
||||
dest[3] = source[0];
|
||||
} else if constexpr (format == PixelFormat::RG8) {
|
||||
const auto rgba = Common::Color::DecodeRG8(reinterpret_cast<const u8*>(source));
|
||||
std::memcpy(dest, rgba.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::I8) {
|
||||
std::memset(dest, static_cast<int>(source[0]), 3);
|
||||
dest[3] = std::byte{255};
|
||||
} else if constexpr (format == PixelFormat::A8) {
|
||||
std::memset(dest, 0, 3);
|
||||
dest[3] = source[0];
|
||||
} else if constexpr (format == PixelFormat::IA4) {
|
||||
const u8 ia4 = static_cast<const u8>(source[0]);
|
||||
std::memset(dest, Common::Color::Convert4To8(ia4 >> 4), 3);
|
||||
dest[3] = std::byte{Common::Color::Convert4To8(ia4 & 0xF)};
|
||||
} else if constexpr (format == PixelFormat::D24 && converted) {
|
||||
const auto d32 = Common::Color::DecodeD24(reinterpret_cast<const u8*>(source)) / 16777215.f;
|
||||
std::memcpy(dest, &d32, sizeof(d32));
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
constexpr void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
||||
const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
|
||||
const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
|
||||
|
||||
if constexpr (format == PixelFormat::I4) {
|
||||
std::memset(dest_pixel, static_cast<int>(pixel), 3);
|
||||
dest_pixel[3] = std::byte{255};
|
||||
} else {
|
||||
std::memset(dest_pixel, 0, 3);
|
||||
dest_pixel[3] = std::byte{pixel};
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
constexpr void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
|
||||
constexpr u32 subtile_width = 4;
|
||||
constexpr u32 subtile_height = 4;
|
||||
constexpr bool has_alpha = format == PixelFormat::ETC1A4;
|
||||
constexpr std::size_t subtile_size = has_alpha ? 16 : 8;
|
||||
|
||||
const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height);
|
||||
x %= subtile_width;
|
||||
y %= subtile_height;
|
||||
|
||||
const std::byte* subtile_ptr = source_tile + subtile_index * subtile_size;
|
||||
|
||||
u8 alpha = 255;
|
||||
if constexpr (has_alpha) {
|
||||
u64_le packed_alpha;
|
||||
std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64));
|
||||
subtile_ptr += sizeof(u64);
|
||||
|
||||
alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF);
|
||||
}
|
||||
|
||||
const u64_le subtile_data = MakeInt<u64_le>(subtile_ptr);
|
||||
const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y);
|
||||
|
||||
// Copy the uncompressed pixel to the destination
|
||||
std::memcpy(dest_pixel, rgb.AsArray(), 3);
|
||||
dest_pixel[3] = std::byte{alpha};
|
||||
}
|
||||
|
||||
template <PixelFormat format, bool converted>
|
||||
constexpr void EncodePixel(const std::byte* source, std::byte* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &s8d24, sizeof(u32));
|
||||
} else if constexpr (format == PixelFormat::RGBA8 && converted) {
|
||||
const u32 abgr = MakeInt<u32>(source);
|
||||
const u32 rgba = Common::swap32(abgr);
|
||||
std::memcpy(dest, &rgba, 4);
|
||||
} else if constexpr (format == PixelFormat::RGB8 && converted) {
|
||||
const u32 abgr = MakeInt<u32>(source);
|
||||
const u32 rgb = Common::swap32(abgr << 8);
|
||||
std::memcpy(dest, &rgb, 3);
|
||||
} else if constexpr (format == PixelFormat::RGB565 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGB565(rgba, reinterpret_cast<u8*>(dest));
|
||||
} else if constexpr (format == PixelFormat::RGB5A1 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGB5A1(rgba, reinterpret_cast<u8*>(dest));
|
||||
} else if constexpr (format == PixelFormat::RGBA4 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest));
|
||||
} else if constexpr (format == PixelFormat::D24 && converted) {
|
||||
float d32;
|
||||
std::memcpy(&d32, source, sizeof(d32));
|
||||
Common::Color::EncodeD24(d32 * 0xFFFFFF, reinterpret_cast<u8*>(dest));
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted>
|
||||
constexpr void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
|
||||
std::span<std::byte> linear_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
||||
constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4;
|
||||
|
||||
for (u32 y = 0; y < 8; y++) {
|
||||
for (u32 x = 0; x < 8; x++) {
|
||||
const auto tiled_pixel = tile_buffer.subspan(
|
||||
VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel);
|
||||
const auto linear_pixel = linear_buffer.subspan(
|
||||
((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel);
|
||||
if constexpr (morton_to_linear) {
|
||||
if constexpr (is_compressed) {
|
||||
DecodePixelETC1<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else if constexpr (is_4bit) {
|
||||
DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else {
|
||||
DecodePixel<format, converted>(tiled_pixel.data(), linear_pixel.data());
|
||||
}
|
||||
} else {
|
||||
EncodePixel<format, converted>(linear_pixel.data(), tiled_pixel.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Performs morton to/from linear convertions on the provided pixel data
|
||||
* @param converted If true performs RGBA8 to/from convertion to all color formats
|
||||
* @param width, height The dimentions of the rectangular region of pixels in linear_buffer
|
||||
* @param start_offset The number of bytes from the start of the first tile to the start of
|
||||
* tiled_buffer
|
||||
* @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer
|
||||
* @param linear_buffer The linear pixel data
|
||||
* @param tiled_buffer The tiled pixel data
|
||||
*
|
||||
* The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for
|
||||
* converting between linear and morton tiled layouts. The function handles both convertions but
|
||||
* there are slightly different paths and inputs for each:
|
||||
*
|
||||
* Morton to Linear:
|
||||
* During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the
|
||||
* linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire
|
||||
* texture area, but rather the specific rectangle affected by the upload.
|
||||
*
|
||||
* Linear to Morton:
|
||||
* This is similar to the other convertion but with some differences. In this case tiled_buffer is
|
||||
* not required to be aligned to any specific boundary which requires special care.
|
||||
* start_offset/end_offset are useful here as they tell us exactly where the data should be placed
|
||||
* in the linear_buffer.
|
||||
*/
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted = false>
|
||||
static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<std::byte> linear_buffer,
|
||||
std::span<std::byte> tiled_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||
|
||||
const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel;
|
||||
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
||||
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
||||
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
||||
|
||||
ASSERT(!morton_to_linear ||
|
||||
(aligned_start_offset == start_offset && aligned_end_offset == end_offset));
|
||||
|
||||
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
||||
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
||||
// the shader we read/write the linear buffer from the bottom up
|
||||
u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
|
||||
u32 tiled_offset = 0;
|
||||
u32 x = 0;
|
||||
u32 y = 0;
|
||||
|
||||
const auto LinearNextTile = [&] {
|
||||
x = (x + 8) % width;
|
||||
linear_offset += 8 * aligned_bytes_per_pixel;
|
||||
if (!x) {
|
||||
y = (y + 8) % height;
|
||||
if (!y) {
|
||||
return;
|
||||
}
|
||||
|
||||
linear_offset -= width * 9 * aligned_bytes_per_pixel;
|
||||
}
|
||||
};
|
||||
|
||||
// If during a texture download the start coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (start_offset < aligned_start_offset && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
|
||||
|
||||
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
|
||||
std::min(aligned_start_offset, end_offset) - start_offset);
|
||||
|
||||
tiled_offset += aligned_start_offset - start_offset;
|
||||
LinearNextTile();
|
||||
}
|
||||
|
||||
const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
|
||||
while (tiled_offset < buffer_end) {
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
|
||||
tiled_offset += tile_size;
|
||||
LinearNextTile();
|
||||
}
|
||||
|
||||
// If during a texture download the end coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
|
||||
std::array<std::byte, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
|
||||
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(),
|
||||
end_offset - aligned_end_offset);
|
||||
}
|
||||
}
|
||||
|
||||
using MortonFunc = void (*)(u32, u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<true, PixelFormat::RGB8>, // 1
|
||||
MortonCopy<true, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4>, // 4
|
||||
MortonCopy<true, PixelFormat::IA8>, // 5
|
||||
MortonCopy<true, PixelFormat::RG8>, // 6
|
||||
MortonCopy<true, PixelFormat::I8>, // 7
|
||||
MortonCopy<true, PixelFormat::A8>, // 8
|
||||
MortonCopy<true, PixelFormat::IA4>, // 9
|
||||
MortonCopy<true, PixelFormat::I4>, // 10
|
||||
MortonCopy<true, PixelFormat::A4>, // 11
|
||||
MortonCopy<true, PixelFormat::ETC1>, // 12
|
||||
MortonCopy<true, PixelFormat::ETC1A4>, // 13
|
||||
MortonCopy<true, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<true, PixelFormat::D24>, // 16
|
||||
MortonCopy<true, PixelFormat::D24S8> // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE_CONVERTED = {
|
||||
MortonCopy<true, PixelFormat::RGBA8, true>, // 0
|
||||
MortonCopy<true, PixelFormat::RGB8, true>, // 1
|
||||
MortonCopy<true, PixelFormat::RGB5A1, true>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565, true>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4, true>, // 4
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
nullptr, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<true, PixelFormat::D24, true>, // 16
|
||||
nullptr, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE = {
|
||||
MortonCopy<false, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8>, // 1
|
||||
MortonCopy<false, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<false, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<false, PixelFormat::RGBA4>, // 4
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr, // 5 - 13
|
||||
MortonCopy<false, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<false, PixelFormat::D24>, // 16
|
||||
MortonCopy<false, PixelFormat::D24S8> // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE_CONVERTED = {
|
||||
MortonCopy<false, PixelFormat::RGBA8, true>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8, true>, // 1
|
||||
MortonCopy<false, PixelFormat::RGB5A1, true>, // 2
|
||||
MortonCopy<false, PixelFormat::RGB565, true>, // 3
|
||||
MortonCopy<false, PixelFormat::RGBA4, true>, // 4
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
nullptr, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<false, PixelFormat::D24, true>, // 16
|
||||
nullptr, // 17
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
@ -167,7 +167,7 @@ private:
|
||||
SurfaceSet remove_surfaces;
|
||||
u16 resolution_scale_factor;
|
||||
std::vector<std::function<void()>> download_queue;
|
||||
std::vector<std::byte> staging_buffer;
|
||||
std::vector<u8> staging_buffer;
|
||||
std::unordered_map<TextureCubeConfig, Surface> texture_cube_cache;
|
||||
std::recursive_mutex mutex;
|
||||
};
|
||||
@ -916,12 +916,8 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
|
||||
}
|
||||
|
||||
const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr);
|
||||
if (surface->is_tiled) {
|
||||
UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, true, upload_data, staging.mapped);
|
||||
}
|
||||
DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
|
||||
const BufferTextureCopy upload = {.buffer_offset = 0,
|
||||
.buffer_size = staging.size,
|
||||
@ -957,12 +953,8 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
|
||||
|
||||
download_queue.push_back([this, surface, flush_start, flush_end, flush_info,
|
||||
mapped = staging.mapped, download_dest]() {
|
||||
if (surface->is_tiled) {
|
||||
SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
} else {
|
||||
runtime.FormatConvert(*surface, false, mapped, download_dest);
|
||||
}
|
||||
EncodeTexture(flush_info, flush_start, flush_end, mapped, download_dest,
|
||||
runtime.NeedsConvertion(surface->pixel_format));
|
||||
});
|
||||
}
|
||||
|
||||
|
550
src/video_core/rasterizer_cache/texture_codec.h
Normal file
550
src/video_core/rasterizer_cache/texture_codec.h
Normal file
@ -0,0 +1,550 @@
|
||||
// Copyright 2022 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include "common/alignment.h"
|
||||
#include "common/color.h"
|
||||
#include "video_core/rasterizer_cache/pixel_format.h"
|
||||
#include "video_core/texture/etc1.h"
|
||||
#include "video_core/utils.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
template <typename T>
|
||||
inline T MakeInt(const u8* bytes) {
|
||||
T integer{};
|
||||
std::memcpy(&integer, bytes, sizeof(T));
|
||||
|
||||
return integer;
|
||||
}
|
||||
|
||||
template <PixelFormat format, bool converted>
|
||||
constexpr void DecodePixel(const u8* source, u8* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::RGBA8 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGBA8(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGB8 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGB8(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGB565 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGB565(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGB5A1 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGB5A1(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RGBA4 && converted) {
|
||||
const auto abgr = Common::Color::DecodeRGBA4(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::IA8) {
|
||||
const auto abgr = Common::Color::DecodeIA8(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::RG8) {
|
||||
const auto abgr = Common::Color::DecodeRG8(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::I8) {
|
||||
const auto abgr = Common::Color::DecodeI8(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::A8) {
|
||||
const auto abgr = Common::Color::DecodeA8(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::IA4) {
|
||||
const auto abgr = Common::Color::DecodeIA4(source);
|
||||
std::memcpy(dest, abgr.AsArray(), 4);
|
||||
} else if constexpr (format == PixelFormat::D16 && converted) {
|
||||
const auto d32 = Common::Color::DecodeD16(source) / 65535.f;
|
||||
std::memcpy(dest, &d32, sizeof(d32));
|
||||
} else if constexpr (format == PixelFormat::D24 && converted) {
|
||||
const auto d32 = Common::Color::DecodeD24(source) / 16777215.f;
|
||||
std::memcpy(dest, &d32, sizeof(d32));
|
||||
} else if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &d24s8, sizeof(u32));
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
constexpr void DecodePixel4(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) {
|
||||
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
||||
const u8 value = source_tile[morton_offset >> 1];
|
||||
const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
|
||||
|
||||
if constexpr (format == PixelFormat::I4) {
|
||||
std::memset(dest_pixel, pixel, 3);
|
||||
dest_pixel[3] = 255;
|
||||
} else {
|
||||
std::memset(dest_pixel, 0, 3);
|
||||
dest_pixel[3] = pixel;
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
constexpr void DecodePixelETC1(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) {
|
||||
constexpr u32 subtile_width = 4;
|
||||
constexpr u32 subtile_height = 4;
|
||||
constexpr bool has_alpha = format == PixelFormat::ETC1A4;
|
||||
constexpr std::size_t subtile_size = has_alpha ? 16 : 8;
|
||||
|
||||
const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height);
|
||||
x %= subtile_width;
|
||||
y %= subtile_height;
|
||||
|
||||
const u8* subtile_ptr = source_tile + subtile_index * subtile_size;
|
||||
|
||||
u8 alpha = 255;
|
||||
if constexpr (has_alpha) {
|
||||
u64_le packed_alpha;
|
||||
std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64));
|
||||
subtile_ptr += sizeof(u64);
|
||||
|
||||
alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF);
|
||||
}
|
||||
|
||||
const u64_le subtile_data = MakeInt<u64_le>(subtile_ptr);
|
||||
const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y);
|
||||
|
||||
// Copy the uncompressed pixel to the destination
|
||||
std::memcpy(dest_pixel, rgb.AsArray(), 3);
|
||||
dest_pixel[3] = alpha;
|
||||
}
|
||||
|
||||
template <PixelFormat format, bool converted>
|
||||
constexpr void EncodePixel(const u8* source, u8* dest) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
|
||||
if constexpr (format == PixelFormat::RGBA8 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGBA8(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::RGB8 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGB8(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::RGB565 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGB565(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::RGB5A1 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGB5A1(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::RGBA4 && converted) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRGBA4(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::IA8) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeIA8(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::RG8) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeRG8(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::I8) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeI8(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::A8) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeA8(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::IA4) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source, 4);
|
||||
Common::Color::EncodeIA4(rgba, dest);
|
||||
} else if constexpr (format == PixelFormat::D16 && converted) {
|
||||
float d32;
|
||||
std::memcpy(&d32, source, sizeof(d32));
|
||||
Common::Color::EncodeD16(d32 * 0xFFFF, dest);
|
||||
} else if constexpr (format == PixelFormat::D24 && converted) {
|
||||
float d32;
|
||||
std::memcpy(&d32, source, sizeof(d32));
|
||||
Common::Color::EncodeD24(d32 * 0xFFFFFF, dest);
|
||||
} else if constexpr (format == PixelFormat::D24S8) {
|
||||
const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8);
|
||||
std::memcpy(dest, &s8d24, sizeof(u32));
|
||||
} else {
|
||||
std::memcpy(dest, source, bytes_per_pixel);
|
||||
}
|
||||
}
|
||||
|
||||
template <PixelFormat format>
|
||||
constexpr void EncodePixel4(u32 x, u32 y, const u8* source_pixel, u8* dest_tile_buffer) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source_pixel, 4);
|
||||
|
||||
u8 pixel;
|
||||
if constexpr (format == PixelFormat::I4) {
|
||||
pixel = Common::Color::AverageRgbComponents(rgba);
|
||||
} else {
|
||||
pixel = rgba.a();
|
||||
}
|
||||
|
||||
const u32 morton_offset = VideoCore::MortonInterleave(x, y);
|
||||
const u32 byte_offset = morton_offset >> 1;
|
||||
|
||||
const u8 current_values = dest_tile_buffer[byte_offset];
|
||||
const u8 new_value = Common::Color::Convert8To4(pixel);
|
||||
|
||||
if (morton_offset % 2) {
|
||||
dest_tile_buffer[byte_offset] = (new_value << 4) | (current_values & 0x0F);
|
||||
} else {
|
||||
dest_tile_buffer[byte_offset] = (current_values & 0xF0) | new_value;
|
||||
}
|
||||
}
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted>
|
||||
constexpr void MortonCopyTile(u32 stride, std::span<u8> tile_buffer, std::span<u8> linear_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
|
||||
constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4;
|
||||
|
||||
for (u32 y = 0; y < 8; y++) {
|
||||
for (u32 x = 0; x < 8; x++) {
|
||||
const auto tiled_pixel = tile_buffer.subspan(
|
||||
VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel);
|
||||
const auto linear_pixel = linear_buffer.subspan(
|
||||
((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel);
|
||||
if constexpr (morton_to_linear) {
|
||||
if constexpr (is_compressed) {
|
||||
DecodePixelETC1<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else if constexpr (is_4bit) {
|
||||
DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data());
|
||||
} else {
|
||||
DecodePixel<format, converted>(tiled_pixel.data(), linear_pixel.data());
|
||||
}
|
||||
} else {
|
||||
if constexpr (is_4bit) {
|
||||
EncodePixel4<format>(x, y, linear_pixel.data(), tile_buffer.data());
|
||||
} else {
|
||||
EncodePixel<format, converted>(linear_pixel.data(), tiled_pixel.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Performs morton to/from linear convertions on the provided pixel data
|
||||
* @param converted If true performs RGBA8 to/from convertion to all color formats
|
||||
* @param width, height The dimentions of the rectangular region of pixels in linear_buffer
|
||||
* @param start_offset The number of bytes from the start of the first tile to the start of
|
||||
* tiled_buffer
|
||||
* @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer
|
||||
* @param linear_buffer The linear pixel data
|
||||
* @param tiled_buffer The tiled pixel data
|
||||
*
|
||||
* The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for
|
||||
* converting between linear and morton tiled layouts. The function handles both convertions but
|
||||
* there are slightly different paths and inputs for each:
|
||||
*
|
||||
* Morton to Linear:
|
||||
* During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the
|
||||
* linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire
|
||||
* texture area, but rather the specific rectangle affected by the upload.
|
||||
*
|
||||
* Linear to Morton:
|
||||
* This is similar to the other convertion but with some differences. In this case tiled_buffer is
|
||||
* not required to be aligned to any specific boundary which requires special care.
|
||||
* start_offset/end_offset are useful here as they tell us exactly where the data should be placed
|
||||
* in the linear_buffer.
|
||||
*/
|
||||
template <bool morton_to_linear, PixelFormat format, bool converted = false>
|
||||
static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
|
||||
std::span<u8> linear_buffer, std::span<u8> tiled_buffer) {
|
||||
constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
|
||||
constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
|
||||
static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
|
||||
|
||||
const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel;
|
||||
const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
|
||||
const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
|
||||
const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
|
||||
|
||||
ASSERT(!morton_to_linear ||
|
||||
(aligned_start_offset == start_offset && aligned_end_offset == end_offset));
|
||||
|
||||
// In OpenGL the texture origin is in the bottom left corner as opposed to other
|
||||
// APIs that have it at the top left. To avoid flipping texture coordinates in
|
||||
// the shader we read/write the linear buffer from the bottom up
|
||||
u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
|
||||
u32 tiled_offset = 0;
|
||||
u32 x = 0;
|
||||
u32 y = 0;
|
||||
|
||||
const auto LinearNextTile = [&] {
|
||||
x = (x + 8) % width;
|
||||
linear_offset += 8 * aligned_bytes_per_pixel;
|
||||
if (!x) {
|
||||
y = (y + 8) % height;
|
||||
if (!y) {
|
||||
return;
|
||||
}
|
||||
|
||||
linear_offset -= width * 9 * aligned_bytes_per_pixel;
|
||||
}
|
||||
};
|
||||
|
||||
// If during a texture download the start coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (start_offset < aligned_start_offset && !morton_to_linear) {
|
||||
std::array<u8, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
|
||||
|
||||
std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
|
||||
std::min(aligned_start_offset, end_offset) - start_offset);
|
||||
|
||||
tiled_offset += aligned_start_offset - start_offset;
|
||||
LinearNextTile();
|
||||
}
|
||||
|
||||
const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
|
||||
while (tiled_offset < buffer_end) {
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
|
||||
tiled_offset += tile_size;
|
||||
LinearNextTile();
|
||||
}
|
||||
|
||||
// If during a texture download the end coordinate is not tile aligned, swizzle
|
||||
// the tile affected to a temporary buffer and copy the part we are interested in
|
||||
if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
|
||||
std::array<u8, tile_size> tmp_buf;
|
||||
auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
|
||||
MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
|
||||
std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(),
|
||||
end_offset - aligned_end_offset);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs a linear copy, converting pixel formats if required.
|
||||
* @tparam decode If true, decodes the texture if needed. Otherwise, encodes if needed.
|
||||
* @tparam format Pixel format to copy.
|
||||
* @tparam converted If true, converts the texture to/from the appropriate format.
|
||||
* @param src_buffer The source pixel data
|
||||
* @param dst_buffer The destination pixel data
|
||||
* @return
|
||||
*/
|
||||
template <bool decode, PixelFormat format, bool converted = false>
|
||||
static constexpr void LinearCopy(std::span<u8> src_buffer, std::span<u8> dst_buffer) {
|
||||
const std::size_t src_size = src_buffer.size();
|
||||
const std::size_t dst_size = dst_buffer.size();
|
||||
|
||||
if constexpr (converted) {
|
||||
constexpr u32 encoded_bytes_per_pixel = GetFormatBpp(format) / 8;
|
||||
constexpr u32 decoded_bytes_per_pixel = 4;
|
||||
constexpr u32 src_bytes_per_pixel =
|
||||
decode ? encoded_bytes_per_pixel : decoded_bytes_per_pixel;
|
||||
constexpr u32 dst_bytes_per_pixel =
|
||||
decode ? decoded_bytes_per_pixel : encoded_bytes_per_pixel;
|
||||
|
||||
for (std::size_t src_index = 0, dst_index = 0; src_index < src_size && dst_index < dst_size;
|
||||
src_index += src_bytes_per_pixel, dst_index += dst_bytes_per_pixel) {
|
||||
const auto src_pixel = src_buffer.subspan(src_index, src_bytes_per_pixel);
|
||||
const auto dst_pixel = dst_buffer.subspan(dst_index, dst_bytes_per_pixel);
|
||||
if constexpr (decode) {
|
||||
DecodePixel<format, converted>(src_pixel.data(), dst_pixel.data());
|
||||
} else {
|
||||
EncodePixel<format, converted>(src_pixel.data(), dst_pixel.data());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::memcpy(dst_buffer.data(), src_buffer.data(), std::min(src_size, dst_size));
|
||||
}
|
||||
}
|
||||
|
||||
using MortonFunc = void (*)(u32, u32, u32, u32, std::span<u8>, std::span<u8>);
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
|
||||
MortonCopy<true, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<true, PixelFormat::RGB8>, // 1
|
||||
MortonCopy<true, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4>, // 4
|
||||
MortonCopy<true, PixelFormat::IA8>, // 5
|
||||
MortonCopy<true, PixelFormat::RG8>, // 6
|
||||
MortonCopy<true, PixelFormat::I8>, // 7
|
||||
MortonCopy<true, PixelFormat::A8>, // 8
|
||||
MortonCopy<true, PixelFormat::IA4>, // 9
|
||||
MortonCopy<true, PixelFormat::I4>, // 10
|
||||
MortonCopy<true, PixelFormat::A4>, // 11
|
||||
MortonCopy<true, PixelFormat::ETC1>, // 12
|
||||
MortonCopy<true, PixelFormat::ETC1A4>, // 13
|
||||
MortonCopy<true, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<true, PixelFormat::D24>, // 16
|
||||
MortonCopy<true, PixelFormat::D24S8>, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE_CONVERTED = {
|
||||
MortonCopy<true, PixelFormat::RGBA8, true>, // 0
|
||||
MortonCopy<true, PixelFormat::RGB8, true>, // 1
|
||||
MortonCopy<true, PixelFormat::RGB5A1, true>, // 2
|
||||
MortonCopy<true, PixelFormat::RGB565, true>, // 3
|
||||
MortonCopy<true, PixelFormat::RGBA4, true>, // 4
|
||||
// The following formats are implicitly converted to RGBA regardless, so ignore them.
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
MortonCopy<true, PixelFormat::D16, true>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<true, PixelFormat::D24, true>, // 16
|
||||
// No conversion here as we need to do a special deinterleaving conversion elsewhere.
|
||||
nullptr, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE = {
|
||||
MortonCopy<false, PixelFormat::RGBA8>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8>, // 1
|
||||
MortonCopy<false, PixelFormat::RGB5A1>, // 2
|
||||
MortonCopy<false, PixelFormat::RGB565>, // 3
|
||||
MortonCopy<false, PixelFormat::RGBA4>, // 4
|
||||
MortonCopy<false, PixelFormat::IA8>, // 5
|
||||
MortonCopy<false, PixelFormat::RG8>, // 6
|
||||
MortonCopy<false, PixelFormat::I8>, // 7
|
||||
MortonCopy<false, PixelFormat::A8>, // 8
|
||||
MortonCopy<false, PixelFormat::IA4>, // 9
|
||||
MortonCopy<false, PixelFormat::I4>, // 10
|
||||
MortonCopy<false, PixelFormat::A4>, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
MortonCopy<false, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<false, PixelFormat::D24>, // 16
|
||||
MortonCopy<false, PixelFormat::D24S8>, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE_CONVERTED = {
|
||||
MortonCopy<false, PixelFormat::RGBA8, true>, // 0
|
||||
MortonCopy<false, PixelFormat::RGB8, true>, // 1
|
||||
MortonCopy<false, PixelFormat::RGB5A1, true>, // 2
|
||||
MortonCopy<false, PixelFormat::RGB565, true>, // 3
|
||||
MortonCopy<false, PixelFormat::RGBA4, true>, // 4
|
||||
// The following formats are implicitly converted from RGBA regardless, so ignore them.
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
MortonCopy<false, PixelFormat::D16, true>, // 14
|
||||
nullptr, // 15
|
||||
MortonCopy<false, PixelFormat::D24, true>, // 16
|
||||
// No conversion here as we need to do a special interleaving conversion elsewhere.
|
||||
nullptr, // 17
|
||||
};
|
||||
|
||||
using LinearFunc = void (*)(std::span<u8>, std::span<u8>);
|
||||
|
||||
static constexpr std::array<LinearFunc, 18> LINEAR_DECODE_TABLE = {
|
||||
LinearCopy<true, PixelFormat::RGBA8>, // 0
|
||||
LinearCopy<true, PixelFormat::RGB8>, // 1
|
||||
LinearCopy<true, PixelFormat::RGB5A1>, // 2
|
||||
LinearCopy<true, PixelFormat::RGB565>, // 3
|
||||
LinearCopy<true, PixelFormat::RGBA4>, // 4
|
||||
// These formats cannot be used linearly and can be ignored.
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
LinearCopy<true, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
LinearCopy<true, PixelFormat::D24>, // 16
|
||||
LinearCopy<true, PixelFormat::D24S8>, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<LinearFunc, 18> LINEAR_DECODE_TABLE_CONVERTED = {
|
||||
LinearCopy<true, PixelFormat::RGBA8, true>, // 0
|
||||
LinearCopy<true, PixelFormat::RGB8, true>, // 1
|
||||
LinearCopy<true, PixelFormat::RGB5A1, true>, // 2
|
||||
LinearCopy<true, PixelFormat::RGB565, true>, // 3
|
||||
LinearCopy<true, PixelFormat::RGBA4, true>, // 4
|
||||
// These formats cannot be used linearly and can be ignored.
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
LinearCopy<true, PixelFormat::D16, true>, // 14
|
||||
nullptr, // 15
|
||||
LinearCopy<true, PixelFormat::D24, true>, // 16
|
||||
// No conversion here as we need to do a special deinterleaving conversion elsewhere.
|
||||
nullptr, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<LinearFunc, 18> LINEAR_ENCODE_TABLE = {
|
||||
LinearCopy<false, PixelFormat::RGBA8>, // 0
|
||||
LinearCopy<false, PixelFormat::RGB8>, // 1
|
||||
LinearCopy<false, PixelFormat::RGB5A1>, // 2
|
||||
LinearCopy<false, PixelFormat::RGB565>, // 3
|
||||
LinearCopy<false, PixelFormat::RGBA4>, // 4
|
||||
// These formats cannot be used linearly and can be ignored.
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
LinearCopy<false, PixelFormat::D16>, // 14
|
||||
nullptr, // 15
|
||||
LinearCopy<false, PixelFormat::D24>, // 16
|
||||
LinearCopy<false, PixelFormat::D24S8>, // 17
|
||||
};
|
||||
|
||||
static constexpr std::array<LinearFunc, 18> LINEAR_ENCODE_TABLE_CONVERTED = {
|
||||
LinearCopy<false, PixelFormat::RGBA8, true>, // 0
|
||||
LinearCopy<false, PixelFormat::RGB8, true>, // 1
|
||||
LinearCopy<false, PixelFormat::RGB5A1, true>, // 2
|
||||
LinearCopy<false, PixelFormat::RGB565, true>, // 3
|
||||
LinearCopy<false, PixelFormat::RGBA4, true>, // 4
|
||||
// These formats cannot be used linearly and can be ignored.
|
||||
nullptr, // 5
|
||||
nullptr, // 6
|
||||
nullptr, // 7
|
||||
nullptr, // 8
|
||||
nullptr, // 9
|
||||
nullptr, // 10
|
||||
nullptr, // 11
|
||||
nullptr, // 12
|
||||
nullptr, // 13
|
||||
LinearCopy<false, PixelFormat::D16, true>, // 14
|
||||
nullptr, // 15
|
||||
LinearCopy<false, PixelFormat::D24, true>, // 16
|
||||
// No conversion here as we need to do a special interleaving conversion elsewhere.
|
||||
nullptr, // 17
|
||||
};
|
||||
|
||||
} // namespace VideoCore
|
@ -3,8 +3,8 @@
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/rasterizer_cache/morton_swizzle.h"
|
||||
#include "video_core/rasterizer_cache/surface_params.h"
|
||||
#include "video_core/rasterizer_cache/texture_codec.h"
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/texture/texture_decode.h"
|
||||
|
||||
@ -47,32 +47,58 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d
|
||||
return result;
|
||||
}
|
||||
|
||||
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled,
|
||||
bool convert) {
|
||||
const u32 func_index = static_cast<u32>(swizzle_info.pixel_format);
|
||||
const MortonFunc SwizzleImpl = (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index];
|
||||
if (!SwizzleImpl) {
|
||||
LOG_ERROR(Render_Vulkan, "Unimplemented swizzle function for pixel format {}.", func_index);
|
||||
UNREACHABLE();
|
||||
void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<u8> source, std::span<u8> dest, bool convert) {
|
||||
const u32 func_index = static_cast<u32>(surface_info.pixel_format);
|
||||
|
||||
if (surface_info.is_tiled) {
|
||||
const MortonFunc SwizzleImpl =
|
||||
(convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index];
|
||||
if (SwizzleImpl) {
|
||||
SwizzleImpl(surface_info.width, surface_info.height, start_addr - surface_info.addr,
|
||||
end_addr - surface_info.addr, source, dest);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
const LinearFunc LinearEncodeImpl =
|
||||
(convert ? LINEAR_ENCODE_TABLE_CONVERTED : LINEAR_ENCODE_TABLE)[func_index];
|
||||
if (LinearEncodeImpl) {
|
||||
LinearEncodeImpl(source, dest);
|
||||
return;
|
||||
}
|
||||
}
|
||||
SwizzleImpl(swizzle_info.width, swizzle_info.height, start_addr - swizzle_info.addr,
|
||||
end_addr - swizzle_info.addr, source_linear, dest_tiled);
|
||||
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Unimplemented texture encode function for pixel format = {}, tiled = {}", func_index,
|
||||
surface_info.is_tiled);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear,
|
||||
bool convert) {
|
||||
const u32 func_index = static_cast<u32>(unswizzle_info.pixel_format);
|
||||
const MortonFunc UnswizzleImpl =
|
||||
(convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index];
|
||||
if (!UnswizzleImpl) {
|
||||
LOG_ERROR(Render_Vulkan, "Unimplemented un-swizzle function for pixel format {}.",
|
||||
func_index);
|
||||
UNREACHABLE();
|
||||
void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<u8> source, std::span<u8> dest, bool convert) {
|
||||
const u32 func_index = static_cast<u32>(surface_info.pixel_format);
|
||||
|
||||
if (surface_info.is_tiled) {
|
||||
const MortonFunc UnswizzleImpl =
|
||||
(convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index];
|
||||
if (UnswizzleImpl) {
|
||||
UnswizzleImpl(surface_info.width, surface_info.height, start_addr - surface_info.addr,
|
||||
end_addr - surface_info.addr, dest, source);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
const LinearFunc LinearDecodeImpl =
|
||||
(convert ? LINEAR_DECODE_TABLE_CONVERTED : LINEAR_DECODE_TABLE)[func_index];
|
||||
if (LinearDecodeImpl) {
|
||||
LinearDecodeImpl(source, dest);
|
||||
return;
|
||||
}
|
||||
}
|
||||
UnswizzleImpl(unswizzle_info.width, unswizzle_info.height, start_addr - unswizzle_info.addr,
|
||||
end_addr - unswizzle_info.addr, dest_linear, source_tiled);
|
||||
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Unimplemented texture decode function for pixel format = {}, tiled = {}", func_index,
|
||||
surface_info.is_tiled);
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
||||
|
@ -107,30 +107,30 @@ struct TextureCubeConfig {
|
||||
[[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
|
||||
|
||||
/**
|
||||
* Converts a morton swizzled texture to linear format.
|
||||
* Encodes a linear texture to the expected linear or tiled format.
|
||||
*
|
||||
* @param unswizzle_info Structure used to query the surface information.
|
||||
* @param start_addr The start address of the source_tiled data.
|
||||
* @param end_addr The end address of the source_tiled data.
|
||||
* @param source_tiled The tiled data to convert.
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
* @param surface_info Structure used to query the surface information.
|
||||
* @param start_addr The start address of the dest data. Used if tiled.
|
||||
* @param end_addr The end address of the dest data. Used if tiled.
|
||||
* @param source_tiled The source linear texture data.
|
||||
* @param dest_linear The output buffer where the encoded linear or tiled data will be written to.
|
||||
* @param convert Whether the pixel format needs to be converted.
|
||||
*/
|
||||
void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_tiled, std::span<std::byte> dest_linear,
|
||||
bool convert = false);
|
||||
void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<u8> source, std::span<u8> dest, bool convert = false);
|
||||
|
||||
/**
|
||||
* Swizzles a linear texture according to the morton code.
|
||||
* Decodes a linear or tiled texture to the expected linear format.
|
||||
*
|
||||
* @param swizzle_info Structure used to query the surface information.
|
||||
* @param start_addr The start address of the dest_tiled data.
|
||||
* @param end_addr The end address of the dest_tiled data.
|
||||
* @param source_tiled The source morton swizzled data.
|
||||
* @param dest_linear The output buffer where the generated linear data will be written to.
|
||||
* @param surface_info Structure used to query the surface information.
|
||||
* @param start_addr The start address of the source data. Used if tiled.
|
||||
* @param end_addr The end address of the source data. Used if tiled.
|
||||
* @param source_tiled The source linear or tiled texture data.
|
||||
* @param dest_linear The output buffer where the decoded linear data will be written to.
|
||||
* @param convert Whether the pixel format needs to be converted.
|
||||
*/
|
||||
void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<std::byte> source_linear, std::span<std::byte> dest_tiled,
|
||||
bool convert = false);
|
||||
void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
|
||||
std::span<u8> source, std::span<u8> dest, bool convert = false);
|
||||
|
||||
} // namespace VideoCore
|
||||
|
||||
|
@ -82,7 +82,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
|
||||
return StagingData{.buffer = buffer.GetHandle(),
|
||||
.size = size,
|
||||
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||
.mapped = std::span<u8>{data, size},
|
||||
.buffer_offset = offset};
|
||||
}
|
||||
|
||||
@ -103,23 +103,6 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
|
||||
return DEFAULT_TUPLE;
|
||||
}
|
||||
|
||||
void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
|
||||
std::span<std::byte> dest) {
|
||||
const VideoCore::PixelFormat format = surface.pixel_format;
|
||||
if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
} else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {
|
||||
return Pica::Texture::ConvertBGRToRGB(source, dest);
|
||||
} else {
|
||||
// Sometimes the source size might be larger than the destination.
|
||||
// This can happen during texture downloads when FromInterval aligns
|
||||
// the flush range to scanline boundaries. In that case only copy
|
||||
// what we need
|
||||
const std::size_t copy_size = std::min(source.size(), dest.size());
|
||||
std::memcpy(dest.data(), source.data(), copy_size);
|
||||
}
|
||||
}
|
||||
|
||||
OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type) {
|
||||
const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
|
||||
|
@ -22,7 +22,7 @@ struct FormatTuple {
|
||||
struct StagingData {
|
||||
GLuint buffer;
|
||||
u32 size = 0;
|
||||
std::span<std::byte> mapped{};
|
||||
std::span<u8> mapped{};
|
||||
GLintptr buffer_offset = 0;
|
||||
};
|
||||
|
||||
@ -48,10 +48,6 @@ public:
|
||||
|
||||
void Finish() const {}
|
||||
|
||||
/// Performs required format convertions on the staging data
|
||||
void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
|
||||
std::span<std::byte> dest);
|
||||
|
||||
/// Allocates an OpenGL texture with the specified dimentions and format
|
||||
OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
|
||||
VideoCore::TextureType type);
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
#include <bit>
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/rasterizer_cache/morton_swizzle.h"
|
||||
#include "video_core/rasterizer_cache/texture_codec.h"
|
||||
#include "video_core/rasterizer_cache/utils.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
@ -66,10 +66,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
|
||||
switch (dest) {
|
||||
case vk::Format::eD24UnormS8Uint: {
|
||||
for (; stencil_offset < data.size; depth_offset += 4) {
|
||||
std::byte* ptr = mapped.data() + depth_offset;
|
||||
u8* ptr = mapped.data() + depth_offset;
|
||||
const u32 d24s8 = VideoCore::MakeInt<u32>(ptr);
|
||||
const u32 d24 = d24s8 >> 8;
|
||||
mapped[stencil_offset] = static_cast<std::byte>(d24s8 & 0xFF);
|
||||
mapped[stencil_offset] = d24s8 & 0xFF;
|
||||
std::memcpy(ptr, &d24, 4);
|
||||
stencil_offset++;
|
||||
}
|
||||
@ -77,10 +77,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
|
||||
}
|
||||
case vk::Format::eD32SfloatS8Uint: {
|
||||
for (; stencil_offset < data.size; depth_offset += 4) {
|
||||
std::byte* ptr = mapped.data() + depth_offset;
|
||||
u8* ptr = mapped.data() + depth_offset;
|
||||
const u32 d24s8 = VideoCore::MakeInt<u32>(ptr);
|
||||
const float d32 = (d24s8 >> 8) / 16777215.f;
|
||||
mapped[stencil_offset] = static_cast<std::byte>(d24s8 & 0xFF);
|
||||
mapped[stencil_offset] = d24s8 & 0xFF;
|
||||
std::memcpy(ptr, &d32, 4);
|
||||
stencil_offset++;
|
||||
}
|
||||
@ -151,7 +151,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
|
||||
return StagingData{
|
||||
.buffer = buffer.Handle(),
|
||||
.size = size,
|
||||
.mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
|
||||
.mapped = std::span<u8>{data, size},
|
||||
.buffer_offset = offset,
|
||||
};
|
||||
}
|
||||
@ -354,46 +354,6 @@ void TextureRuntime::Recycle(const HostTextureTag tag, ImageAlloc&& alloc) {
|
||||
texture_recycler.emplace(tag, std::move(alloc));
|
||||
}
|
||||
|
||||
void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
|
||||
std::span<std::byte> dest) {
|
||||
if (!NeedsConvertion(surface.pixel_format)) {
|
||||
std::memcpy(dest.data(), source.data(), source.size());
|
||||
return;
|
||||
}
|
||||
|
||||
if (upload) {
|
||||
switch (surface.pixel_format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return Pica::Texture::ConvertBGRToRGBA(source, dest);
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest);
|
||||
case VideoCore::PixelFormat::D24:
|
||||
return Pica::Texture::ConvertD24ToD32(source, dest);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (surface.pixel_format) {
|
||||
case VideoCore::PixelFormat::RGBA8:
|
||||
return Pica::Texture::ConvertABGRToRGBA(source, dest);
|
||||
case VideoCore::PixelFormat::RGBA4:
|
||||
return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
|
||||
case VideoCore::PixelFormat::RGB8:
|
||||
return Pica::Texture::ConvertRGBAToBGR(source, dest);
|
||||
case VideoCore::PixelFormat::D24:
|
||||
return Pica::Texture::ConvertD32ToD24(source, dest);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_WARNING(Render_Vulkan, "Missing linear format convertion: {} {} {}",
|
||||
vk::to_string(surface.traits.native), upload ? "->" : "<-",
|
||||
vk::to_string(surface.alloc.format));
|
||||
}
|
||||
|
||||
bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
VideoCore::ClearValue value) {
|
||||
renderpass_cache.ExitRenderpass();
|
||||
|
@ -21,7 +21,7 @@ namespace Vulkan {
|
||||
struct StagingData {
|
||||
vk::Buffer buffer;
|
||||
u32 size = 0;
|
||||
std::span<std::byte> mapped{};
|
||||
std::span<u8> mapped{};
|
||||
u64 buffer_offset = 0;
|
||||
};
|
||||
|
||||
@ -108,10 +108,6 @@ public:
|
||||
VideoCore::TextureType type, vk::Format format,
|
||||
vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect);
|
||||
|
||||
/// Performs required format convertions on the staging data
|
||||
void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
|
||||
std::span<std::byte> dest);
|
||||
|
||||
/// Fills the rectangle of the texture with the clear value provided
|
||||
bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
|
||||
VideoCore::ClearValue value);
|
||||
|
@ -105,47 +105,36 @@ Common::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned in
|
||||
}
|
||||
|
||||
case TextureFormat::IA8: {
|
||||
const u8* source_ptr = source + MortonInterleave(x, y) * 2;
|
||||
|
||||
if (disable_alpha) {
|
||||
// Show intensity as red, alpha as green
|
||||
return {source_ptr[1], source_ptr[0], 0, 255};
|
||||
} else {
|
||||
return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
|
||||
}
|
||||
auto res = Common::Color::DecodeIA8(source + MortonInterleave(x, y) * 2);
|
||||
return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
|
||||
}
|
||||
|
||||
case TextureFormat::RG8: {
|
||||
auto res = Common::Color::DecodeRG8(source + MortonInterleave(x, y) * 2);
|
||||
return {res.r(), res.g(), 0, 255};
|
||||
return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
|
||||
}
|
||||
|
||||
case TextureFormat::I8: {
|
||||
const u8* source_ptr = source + MortonInterleave(x, y);
|
||||
return {*source_ptr, *source_ptr, *source_ptr, 255};
|
||||
auto res = Common::Color::DecodeI8(source + MortonInterleave(x, y) * 2);
|
||||
return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
|
||||
}
|
||||
|
||||
case TextureFormat::A8: {
|
||||
const u8* source_ptr = source + MortonInterleave(x, y);
|
||||
|
||||
auto res = Common::Color::DecodeA8(source + MortonInterleave(x, y) * 2);
|
||||
if (disable_alpha) {
|
||||
return {*source_ptr, *source_ptr, *source_ptr, 255};
|
||||
return {res.a(), res.a(), res.a(), 255};
|
||||
} else {
|
||||
return {0, 0, 0, *source_ptr};
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
case TextureFormat::IA4: {
|
||||
const u8* source_ptr = source + MortonInterleave(x, y);
|
||||
|
||||
u8 i = Common::Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
|
||||
u8 a = Common::Color::Convert4To8((*source_ptr) & 0xF);
|
||||
|
||||
auto res = Common::Color::DecodeIA4(source + MortonInterleave(x, y) * 2);
|
||||
if (disable_alpha) {
|
||||
// Show intensity as red, alpha as green
|
||||
return {i, a, 0, 255};
|
||||
return {res.r(), res.a(), 0, 255};
|
||||
} else {
|
||||
return {i, i, i, a};
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
@ -223,139 +212,4 @@ TextureInfo TextureInfo::FromPicaRegister(const TexturingRegs::TextureConfig& co
|
||||
return info;
|
||||
}
|
||||
|
||||
void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
for (std::size_t i = 0; i < source.size(); i += 3) {
|
||||
u32 bgr{};
|
||||
std::memcpy(&bgr, source.data() + i, 3);
|
||||
const u32 rgb = Common::swap32(bgr << 8);
|
||||
std::memcpy(dest.data() + i, &rgb, 3);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
dest[i] = source[j + 2];
|
||||
dest[i + 1] = source[j + 1];
|
||||
dest[i + 2] = source[j];
|
||||
dest[i + 3] = std::byte{0xFF};
|
||||
j += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBAToBGR(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 3) {
|
||||
dest[i] = source[j + 2];
|
||||
dest[i + 1] = source[j + 1];
|
||||
dest[i + 2] = source[j];
|
||||
j += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
for (u32 i = 0; i < dest.size(); i += 4) {
|
||||
u32 abgr;
|
||||
std::memcpy(&abgr, source.data() + i, sizeof(u32));
|
||||
const u32 rgba = Common::swap32(abgr);
|
||||
std::memcpy(dest.data() + i, &rgba, 4);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
auto rgba = Common::Color::DecodeRGBA4(reinterpret_cast<const u8*>(source.data() + j));
|
||||
std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba));
|
||||
j += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 2) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
|
||||
Common::Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest.data() + i));
|
||||
j += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGB5A1ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
auto rgba = Common::Color::DecodeRGB5A1(reinterpret_cast<const u8*>(source.data() + j));
|
||||
std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba));
|
||||
j += 2;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8ToRGB5A1(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 2) {
|
||||
Common::Vec4<u8> rgba;
|
||||
std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
|
||||
Common::Color::EncodeRGB5A1(rgba, reinterpret_cast<u8*>(dest.data() + i));
|
||||
j += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertD24ToD32(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
auto d32 =
|
||||
Common::Color::DecodeD24(reinterpret_cast<const u8*>(source.data() + j)) / 16777215.f;
|
||||
std::memcpy(dest.data() + i, &d32, sizeof(d32));
|
||||
j += 3;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertD32ToD24(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
u32 j = 0;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 3) {
|
||||
float d32;
|
||||
std::memcpy(&d32, source.data() + j, sizeof(d32));
|
||||
Common::Color::EncodeD24(d32 * 0xFFFFFF, reinterpret_cast<u8*>(dest.data() + i));
|
||||
j += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
std::size_t depth_offset = 0;
|
||||
std::size_t stencil_offset = 4 * source.size() / 5;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
float depth;
|
||||
std::memcpy(&depth, source.data() + depth_offset, sizeof(float));
|
||||
u32 depth_uint = depth * 0xFFFFFF;
|
||||
|
||||
dest[i] = source[stencil_offset];
|
||||
std::memcpy(dest.data() + i + 1, &depth_uint, 3);
|
||||
|
||||
depth_offset += 4;
|
||||
stencil_offset += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
std::size_t depth_offset = 0;
|
||||
std::size_t stencil_offset = 3 * source.size() / 4;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
dest[i] = source[stencil_offset];
|
||||
std::memcpy(dest.data() + i + 1, source.data() + depth_offset, 3);
|
||||
depth_offset += 3;
|
||||
stencil_offset += 1;
|
||||
}
|
||||
}
|
||||
|
||||
void DeinterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
|
||||
std::size_t depth_offset = 0;
|
||||
std::size_t stencil_offset = 3 * source.size() / 4;
|
||||
for (std::size_t i = 0; i < dest.size(); i += 4) {
|
||||
dest[stencil_offset] = source[i];
|
||||
std::memcpy(dest.data() + depth_offset, source.data() + i + 1, 3);
|
||||
depth_offset += 3;
|
||||
stencil_offset += 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Pica::Texture
|
||||
|
@ -55,48 +55,4 @@ Common::Vec4<u8> LookupTexture(const u8* source, unsigned int x, unsigned int y,
|
||||
Common::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned int y,
|
||||
const TextureInfo& info, bool disable_alpha);
|
||||
|
||||
/**
|
||||
* Converts pixel data encoded in BGR format to RGBA
|
||||
*
|
||||
* @param source Span to the source pixel data
|
||||
* @param dest Span to the destination pixel data
|
||||
*/
|
||||
void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
/**
|
||||
* Converts pixel data encoded in BGR format to RGBA
|
||||
*
|
||||
* @param source Span to the source pixel data
|
||||
* @param dest Span to the destination pixel data
|
||||
*/
|
||||
void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBAToBGR(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
/**
|
||||
* Converts pixel data encoded in ABGR format to RGBA
|
||||
*
|
||||
* @param source Span to the source pixel data
|
||||
* @param dest Span to the destination pixel data
|
||||
*/
|
||||
void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGB5A1ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertRGBA8ToRGB5A1(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertD24ToD32(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertD32ToD24(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
void DeinterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
|
||||
|
||||
} // namespace Pica::Texture
|
||||
|
Reference in New Issue
Block a user