From 489248e77f9a6d850564f46e2c37cecfd43b5e65 Mon Sep 17 00:00:00 2001 From: Steveice10 <1269164+Steveice10@users.noreply.github.com> Date: Fri, 13 Jan 2023 03:54:42 -0800 Subject: [PATCH] video_core: De-duplicate texture format conversion logic. (#21) * video_core: De-duplicate texture format conversion logic. * video_core: Replace std::byte with u8 and remove excess linear texture converters. * video_core: Remove implicit RGBA conversions from convert table for now, add comments explaining omissions. --- src/common/color.h | 81 +++ src/common/memory_ref.h | 4 +- src/video_core/CMakeLists.txt | 2 +- .../rasterizer_cache/morton_swizzle.h | 367 ------------ .../rasterizer_cache/rasterizer_cache.h | 18 +- .../rasterizer_cache/texture_codec.h | 550 ++++++++++++++++++ src/video_core/rasterizer_cache/utils.cpp | 72 ++- src/video_core/rasterizer_cache/utils.h | 36 +- .../renderer_opengl/gl_texture_runtime.cpp | 19 +- .../renderer_opengl/gl_texture_runtime.h | 6 +- .../renderer_vulkan/vk_texture_runtime.cpp | 52 +- .../renderer_vulkan/vk_texture_runtime.h | 6 +- src/video_core/texture/texture_decode.cpp | 168 +----- src/video_core/texture/texture_decode.h | 44 -- 14 files changed, 726 insertions(+), 699 deletions(-) delete mode 100644 src/video_core/rasterizer_cache/morton_swizzle.h create mode 100644 src/video_core/rasterizer_cache/texture_codec.h diff --git a/src/common/color.h b/src/common/color.h index bbcac858e..7e0ae8f4c 100644 --- a/src/common/color.h +++ b/src/common/color.h @@ -52,6 +52,11 @@ namespace Common::Color { return value >> 2; } +/// Averages the RGB components of a color +[[nodiscard]] constexpr u8 AverageRgbComponents(const Common::Vec4& color) { + return (static_cast(color.r()) + color.g() + color.b()) / 3; +} + /** * Decode a color stored in RGBA8 format * @param bytes Pointer to encoded source color @@ -115,6 +120,44 @@ namespace Common::Color { Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF)}; } +/** + * Decode a color stored in IA8 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Common::Vec4 + */ +[[nodiscard]] inline Common::Vec4 DecodeIA8(const u8* bytes) { + return {bytes[1], bytes[1], bytes[1], bytes[0]}; +} + +/** + * Decode a color stored in I8 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Common::Vec4 + */ +[[nodiscard]] inline Common::Vec4 DecodeI8(const u8* bytes) { + return {bytes[0], bytes[0], bytes[0], 255}; +} + +/** + * Decode a color stored in A8 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Common::Vec4 + */ +[[nodiscard]] inline Common::Vec4 DecodeA8(const u8* bytes) { + return {0, 0, 0, bytes[0]}; +} + +/** + * Decode a color stored in IA4 format + * @param bytes Pointer to encoded source color + * @return Result color decoded as Common::Vec4 + */ +[[nodiscard]] inline Common::Vec4 DecodeIA4(const u8* bytes) { + u8 i = Common::Color::Convert4To8((bytes[0] & 0xF0) >> 4); + u8 a = Common::Color::Convert4To8(bytes[0] & 0x0F); + return {i, i, i, a}; +} + /** * Decode a depth value stored in D16 format * @param bytes Pointer to encoded source value @@ -176,6 +219,7 @@ inline void EncodeRG8(const Common::Vec4& color, u8* bytes) { bytes[1] = color.r(); bytes[0] = color.g(); } + /** * Encode a color as RGB565 format * @param color Source color to encode @@ -212,6 +256,43 @@ inline void EncodeRGBA4(const Common::Vec4& color, u8* bytes) { std::memcpy(bytes, &data, sizeof(data)); } +/** + * Encode a color as IA8 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeIA8(const Common::Vec4& color, u8* bytes) { + bytes[1] = AverageRgbComponents(color); + bytes[0] = color.a(); +} + +/** + * Encode a color as I8 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeI8(const Common::Vec4& color, u8* bytes) { + bytes[0] = AverageRgbComponents(color); +} + +/** + * Encode a color as A8 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeA8(const Common::Vec4& color, u8* bytes) { + bytes[0] = color.a(); +} + +/** + * Encode a color as IA4 format + * @param color Source color to encode + * @param bytes Destination pointer to store encoded color + */ +inline void EncodeIA4(const Common::Vec4& color, u8* bytes) { + bytes[0] = (Convert8To4(AverageRgbComponents(color)) << 4) | Convert8To4(color.a()); +} + /** * Encode a 16 bit depth value as D16 format * @param value 16 bit source depth value to encode diff --git a/src/common/memory_ref.h b/src/common/memory_ref.h index 7e313961f..a63791a88 100644 --- a/src/common/memory_ref.h +++ b/src/common/memory_ref.h @@ -107,11 +107,11 @@ public: } auto GetWriteBytes(std::size_t size) { - return std::span{reinterpret_cast(cptr), size > csize ? csize : size}; + return std::span{cptr, size > csize ? csize : size}; } auto GetReadBytes(std::size_t size) const { - return std::span{reinterpret_cast(cptr), size > csize ? csize : size}; + return std::span{cptr, size > csize ? csize : size}; } std::size_t GetSize() const { diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 498f4ac51..2570a42f0 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -28,7 +28,7 @@ add_library(video_core STATIC regs_texturing.h renderer_base.cpp renderer_base.h - rasterizer_cache/morton_swizzle.h + rasterizer_cache/texture_codec.h rasterizer_cache/pixel_format.cpp rasterizer_cache/pixel_format.h rasterizer_cache/rasterizer_cache.cpp diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h deleted file mode 100644 index 011eaa8b0..000000000 --- a/src/video_core/rasterizer_cache/morton_swizzle.h +++ /dev/null @@ -1,367 +0,0 @@ -// Copyright 2022 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once -#include -#include -#include -#include "common/alignment.h" -#include "common/color.h" -#include "video_core/rasterizer_cache/pixel_format.h" -#include "video_core/texture/etc1.h" -#include "video_core/utils.h" - -namespace VideoCore { - -template -inline T MakeInt(const std::byte* bytes) { - T integer{}; - std::memcpy(&integer, bytes, sizeof(T)); - - return integer; -} - -template -constexpr void DecodePixel(const std::byte* source, std::byte* dest) { - constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; - - if constexpr (format == PixelFormat::D24S8) { - const u32 d24s8 = std::rotl(MakeInt(source), 8); - std::memcpy(dest, &d24s8, sizeof(u32)); - } else if constexpr (format == PixelFormat::RGBA8 && converted) { - const u32 rgba = MakeInt(source); - const u32 abgr = Common::swap32(rgba); - std::memcpy(dest, &abgr, 4); - } else if constexpr (format == PixelFormat::RGB8 && converted) { - u32 rgb{}; - std::memcpy(&rgb, source, 3); - const u32 abgr = Common::swap32(rgb << 8) | 0xFF000000; - std::memcpy(dest, &abgr, 4); - } else if constexpr (format == PixelFormat::RGB565 && converted) { - const auto abgr = Common::Color::DecodeRGB565(reinterpret_cast(source)); - std::memcpy(dest, abgr.AsArray(), 4); - } else if constexpr (format == PixelFormat::RGB5A1 && converted) { - const auto abgr = Common::Color::DecodeRGB5A1(reinterpret_cast(source)); - std::memcpy(dest, abgr.AsArray(), 4); - } else if constexpr (format == PixelFormat::RGBA4 && converted) { - const auto abgr = Common::Color::DecodeRGBA4(reinterpret_cast(source)); - std::memcpy(dest, abgr.AsArray(), 4); - } else if constexpr (format == PixelFormat::IA8) { - std::memset(dest, static_cast(source[1]), 3); - dest[3] = source[0]; - } else if constexpr (format == PixelFormat::RG8) { - const auto rgba = Common::Color::DecodeRG8(reinterpret_cast(source)); - std::memcpy(dest, rgba.AsArray(), 4); - } else if constexpr (format == PixelFormat::I8) { - std::memset(dest, static_cast(source[0]), 3); - dest[3] = std::byte{255}; - } else if constexpr (format == PixelFormat::A8) { - std::memset(dest, 0, 3); - dest[3] = source[0]; - } else if constexpr (format == PixelFormat::IA4) { - const u8 ia4 = static_cast(source[0]); - std::memset(dest, Common::Color::Convert4To8(ia4 >> 4), 3); - dest[3] = std::byte{Common::Color::Convert4To8(ia4 & 0xF)}; - } else if constexpr (format == PixelFormat::D24 && converted) { - const auto d32 = Common::Color::DecodeD24(reinterpret_cast(source)) / 16777215.f; - std::memcpy(dest, &d32, sizeof(d32)); - } else { - std::memcpy(dest, source, bytes_per_pixel); - } -} - -template -constexpr void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) { - const u32 morton_offset = VideoCore::MortonInterleave(x, y); - const u8 value = static_cast(source_tile[morton_offset >> 1]); - const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF)); - - if constexpr (format == PixelFormat::I4) { - std::memset(dest_pixel, static_cast(pixel), 3); - dest_pixel[3] = std::byte{255}; - } else { - std::memset(dest_pixel, 0, 3); - dest_pixel[3] = std::byte{pixel}; - } -} - -template -constexpr void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) { - constexpr u32 subtile_width = 4; - constexpr u32 subtile_height = 4; - constexpr bool has_alpha = format == PixelFormat::ETC1A4; - constexpr std::size_t subtile_size = has_alpha ? 16 : 8; - - const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height); - x %= subtile_width; - y %= subtile_height; - - const std::byte* subtile_ptr = source_tile + subtile_index * subtile_size; - - u8 alpha = 255; - if constexpr (has_alpha) { - u64_le packed_alpha; - std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64)); - subtile_ptr += sizeof(u64); - - alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF); - } - - const u64_le subtile_data = MakeInt(subtile_ptr); - const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y); - - // Copy the uncompressed pixel to the destination - std::memcpy(dest_pixel, rgb.AsArray(), 3); - dest_pixel[3] = std::byte{alpha}; -} - -template -constexpr void EncodePixel(const std::byte* source, std::byte* dest) { - constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; - - if constexpr (format == PixelFormat::D24S8) { - const u32 s8d24 = std::rotr(MakeInt(source), 8); - std::memcpy(dest, &s8d24, sizeof(u32)); - } else if constexpr (format == PixelFormat::RGBA8 && converted) { - const u32 abgr = MakeInt(source); - const u32 rgba = Common::swap32(abgr); - std::memcpy(dest, &rgba, 4); - } else if constexpr (format == PixelFormat::RGB8 && converted) { - const u32 abgr = MakeInt(source); - const u32 rgb = Common::swap32(abgr << 8); - std::memcpy(dest, &rgb, 3); - } else if constexpr (format == PixelFormat::RGB565 && converted) { - Common::Vec4 rgba; - std::memcpy(rgba.AsArray(), source, 4); - Common::Color::EncodeRGB565(rgba, reinterpret_cast(dest)); - } else if constexpr (format == PixelFormat::RGB5A1 && converted) { - Common::Vec4 rgba; - std::memcpy(rgba.AsArray(), source, 4); - Common::Color::EncodeRGB5A1(rgba, reinterpret_cast(dest)); - } else if constexpr (format == PixelFormat::RGBA4 && converted) { - Common::Vec4 rgba; - std::memcpy(rgba.AsArray(), source, 4); - Common::Color::EncodeRGBA4(rgba, reinterpret_cast(dest)); - } else if constexpr (format == PixelFormat::D24 && converted) { - float d32; - std::memcpy(&d32, source, sizeof(d32)); - Common::Color::EncodeD24(d32 * 0xFFFFFF, reinterpret_cast(dest)); - } else { - std::memcpy(dest, source, bytes_per_pixel); - } -} - -template -constexpr void MortonCopyTile(u32 stride, std::span tile_buffer, - std::span linear_buffer) { - constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; - constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format); - constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4; - constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4; - - for (u32 y = 0; y < 8; y++) { - for (u32 x = 0; x < 8; x++) { - const auto tiled_pixel = tile_buffer.subspan( - VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel); - const auto linear_pixel = linear_buffer.subspan( - ((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel); - if constexpr (morton_to_linear) { - if constexpr (is_compressed) { - DecodePixelETC1(x, y, tile_buffer.data(), linear_pixel.data()); - } else if constexpr (is_4bit) { - DecodePixel4(x, y, tile_buffer.data(), linear_pixel.data()); - } else { - DecodePixel(tiled_pixel.data(), linear_pixel.data()); - } - } else { - EncodePixel(linear_pixel.data(), tiled_pixel.data()); - } - } - } -} - -/** - * @brief Performs morton to/from linear convertions on the provided pixel data - * @param converted If true performs RGBA8 to/from convertion to all color formats - * @param width, height The dimentions of the rectangular region of pixels in linear_buffer - * @param start_offset The number of bytes from the start of the first tile to the start of - * tiled_buffer - * @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer - * @param linear_buffer The linear pixel data - * @param tiled_buffer The tiled pixel data - * - * The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for - * converting between linear and morton tiled layouts. The function handles both convertions but - * there are slightly different paths and inputs for each: - * - * Morton to Linear: - * During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the - * linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire - * texture area, but rather the specific rectangle affected by the upload. - * - * Linear to Morton: - * This is similar to the other convertion but with some differences. In this case tiled_buffer is - * not required to be aligned to any specific boundary which requires special care. - * start_offset/end_offset are useful here as they tell us exactly where the data should be placed - * in the linear_buffer. - */ -template -static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, - std::span linear_buffer, - std::span tiled_buffer) { - constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; - constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format); - constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; - static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); - - const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel; - const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size); - const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size); - const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size); - - ASSERT(!morton_to_linear || - (aligned_start_offset == start_offset && aligned_end_offset == end_offset)); - - // In OpenGL the texture origin is in the bottom left corner as opposed to other - // APIs that have it at the top left. To avoid flipping texture coordinates in - // the shader we read/write the linear buffer from the bottom up - u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel; - u32 tiled_offset = 0; - u32 x = 0; - u32 y = 0; - - const auto LinearNextTile = [&] { - x = (x + 8) % width; - linear_offset += 8 * aligned_bytes_per_pixel; - if (!x) { - y = (y + 8) % height; - if (!y) { - return; - } - - linear_offset -= width * 9 * aligned_bytes_per_pixel; - } - }; - - // If during a texture download the start coordinate is not tile aligned, swizzle - // the tile affected to a temporary buffer and copy the part we are interested in - if (start_offset < aligned_start_offset && !morton_to_linear) { - std::array tmp_buf; - auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); - MortonCopyTile(width, tmp_buf, linear_data); - - std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset, - std::min(aligned_start_offset, end_offset) - start_offset); - - tiled_offset += aligned_start_offset - start_offset; - LinearNextTile(); - } - - const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset; - while (tiled_offset < buffer_end) { - auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); - auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); - MortonCopyTile(width, tiled_data, linear_data); - tiled_offset += tile_size; - LinearNextTile(); - } - - // If during a texture download the end coordinate is not tile aligned, swizzle - // the tile affected to a temporary buffer and copy the part we are interested in - if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) { - std::array tmp_buf; - auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); - MortonCopyTile(width, tmp_buf, linear_data); - std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), - end_offset - aligned_end_offset); - } -} - -using MortonFunc = void (*)(u32, u32, u32, u32, std::span, std::span); - -static constexpr std::array UNSWIZZLE_TABLE = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 - MortonCopy, // 5 - MortonCopy, // 6 - MortonCopy, // 7 - MortonCopy, // 8 - MortonCopy, // 9 - MortonCopy, // 10 - MortonCopy, // 11 - MortonCopy, // 12 - MortonCopy, // 13 - MortonCopy, // 14 - nullptr, // 15 - MortonCopy, // 16 - MortonCopy // 17 -}; - -static constexpr std::array UNSWIZZLE_TABLE_CONVERTED = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 - nullptr, // 5 - nullptr, // 6 - nullptr, // 7 - nullptr, // 8 - nullptr, // 9 - nullptr, // 10 - nullptr, // 11 - nullptr, // 12 - nullptr, // 13 - nullptr, // 14 - nullptr, // 15 - MortonCopy, // 16 - nullptr, // 17 -}; - -static constexpr std::array SWIZZLE_TABLE = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, // 5 - 13 - MortonCopy, // 14 - nullptr, // 15 - MortonCopy, // 16 - MortonCopy // 17 -}; - -static constexpr std::array SWIZZLE_TABLE_CONVERTED = { - MortonCopy, // 0 - MortonCopy, // 1 - MortonCopy, // 2 - MortonCopy, // 3 - MortonCopy, // 4 - nullptr, // 5 - nullptr, // 6 - nullptr, // 7 - nullptr, // 8 - nullptr, // 9 - nullptr, // 10 - nullptr, // 11 - nullptr, // 12 - nullptr, // 13 - nullptr, // 14 - nullptr, // 15 - MortonCopy, // 16 - nullptr, // 17 -}; - -} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h index 8f82f4662..52833b635 100644 --- a/src/video_core/rasterizer_cache/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache/rasterizer_cache.h @@ -167,7 +167,7 @@ private: SurfaceSet remove_surfaces; u16 resolution_scale_factor; std::vector> download_queue; - std::vector staging_buffer; + std::vector staging_buffer; std::unordered_map texture_cube_cache; std::recursive_mutex mutex; }; @@ -916,12 +916,8 @@ void RasterizerCache::UploadSurface(const Surface& surface, SurfaceInterval i } const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr); - if (surface->is_tiled) { - UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, - runtime.NeedsConvertion(surface->pixel_format)); - } else { - runtime.FormatConvert(*surface, true, upload_data, staging.mapped); - } + DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped, + runtime.NeedsConvertion(surface->pixel_format)); const BufferTextureCopy upload = {.buffer_offset = 0, .buffer_size = staging.size, @@ -957,12 +953,8 @@ void RasterizerCache::DownloadSurface(const Surface& surface, SurfaceInterval download_queue.push_back([this, surface, flush_start, flush_end, flush_info, mapped = staging.mapped, download_dest]() { - if (surface->is_tiled) { - SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest, - runtime.NeedsConvertion(surface->pixel_format)); - } else { - runtime.FormatConvert(*surface, false, mapped, download_dest); - } + EncodeTexture(flush_info, flush_start, flush_end, mapped, download_dest, + runtime.NeedsConvertion(surface->pixel_format)); }); } diff --git a/src/video_core/rasterizer_cache/texture_codec.h b/src/video_core/rasterizer_cache/texture_codec.h new file mode 100644 index 000000000..6eb64d486 --- /dev/null +++ b/src/video_core/rasterizer_cache/texture_codec.h @@ -0,0 +1,550 @@ +// Copyright 2022 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once +#include +#include +#include +#include "common/alignment.h" +#include "common/color.h" +#include "video_core/rasterizer_cache/pixel_format.h" +#include "video_core/texture/etc1.h" +#include "video_core/utils.h" + +namespace VideoCore { + +template +inline T MakeInt(const u8* bytes) { + T integer{}; + std::memcpy(&integer, bytes, sizeof(T)); + + return integer; +} + +template +constexpr void DecodePixel(const u8* source, u8* dest) { + constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; + + if constexpr (format == PixelFormat::RGBA8 && converted) { + const auto abgr = Common::Color::DecodeRGBA8(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RGB8 && converted) { + const auto abgr = Common::Color::DecodeRGB8(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RGB565 && converted) { + const auto abgr = Common::Color::DecodeRGB565(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RGB5A1 && converted) { + const auto abgr = Common::Color::DecodeRGB5A1(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RGBA4 && converted) { + const auto abgr = Common::Color::DecodeRGBA4(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::IA8) { + const auto abgr = Common::Color::DecodeIA8(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::RG8) { + const auto abgr = Common::Color::DecodeRG8(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::I8) { + const auto abgr = Common::Color::DecodeI8(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::A8) { + const auto abgr = Common::Color::DecodeA8(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::IA4) { + const auto abgr = Common::Color::DecodeIA4(source); + std::memcpy(dest, abgr.AsArray(), 4); + } else if constexpr (format == PixelFormat::D16 && converted) { + const auto d32 = Common::Color::DecodeD16(source) / 65535.f; + std::memcpy(dest, &d32, sizeof(d32)); + } else if constexpr (format == PixelFormat::D24 && converted) { + const auto d32 = Common::Color::DecodeD24(source) / 16777215.f; + std::memcpy(dest, &d32, sizeof(d32)); + } else if constexpr (format == PixelFormat::D24S8) { + const u32 d24s8 = std::rotl(MakeInt(source), 8); + std::memcpy(dest, &d24s8, sizeof(u32)); + } else { + std::memcpy(dest, source, bytes_per_pixel); + } +} + +template +constexpr void DecodePixel4(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) { + const u32 morton_offset = VideoCore::MortonInterleave(x, y); + const u8 value = source_tile[morton_offset >> 1]; + const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF)); + + if constexpr (format == PixelFormat::I4) { + std::memset(dest_pixel, pixel, 3); + dest_pixel[3] = 255; + } else { + std::memset(dest_pixel, 0, 3); + dest_pixel[3] = pixel; + } +} + +template +constexpr void DecodePixelETC1(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) { + constexpr u32 subtile_width = 4; + constexpr u32 subtile_height = 4; + constexpr bool has_alpha = format == PixelFormat::ETC1A4; + constexpr std::size_t subtile_size = has_alpha ? 16 : 8; + + const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height); + x %= subtile_width; + y %= subtile_height; + + const u8* subtile_ptr = source_tile + subtile_index * subtile_size; + + u8 alpha = 255; + if constexpr (has_alpha) { + u64_le packed_alpha; + std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64)); + subtile_ptr += sizeof(u64); + + alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF); + } + + const u64_le subtile_data = MakeInt(subtile_ptr); + const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y); + + // Copy the uncompressed pixel to the destination + std::memcpy(dest_pixel, rgb.AsArray(), 3); + dest_pixel[3] = alpha; +} + +template +constexpr void EncodePixel(const u8* source, u8* dest) { + constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; + + if constexpr (format == PixelFormat::RGBA8 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeRGBA8(rgba, dest); + } else if constexpr (format == PixelFormat::RGB8 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeRGB8(rgba, dest); + } else if constexpr (format == PixelFormat::RGB565 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeRGB565(rgba, dest); + } else if constexpr (format == PixelFormat::RGB5A1 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeRGB5A1(rgba, dest); + } else if constexpr (format == PixelFormat::RGBA4 && converted) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeRGBA4(rgba, dest); + } else if constexpr (format == PixelFormat::IA8) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeIA8(rgba, dest); + } else if constexpr (format == PixelFormat::RG8) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeRG8(rgba, dest); + } else if constexpr (format == PixelFormat::I8) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeI8(rgba, dest); + } else if constexpr (format == PixelFormat::A8) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeA8(rgba, dest); + } else if constexpr (format == PixelFormat::IA4) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source, 4); + Common::Color::EncodeIA4(rgba, dest); + } else if constexpr (format == PixelFormat::D16 && converted) { + float d32; + std::memcpy(&d32, source, sizeof(d32)); + Common::Color::EncodeD16(d32 * 0xFFFF, dest); + } else if constexpr (format == PixelFormat::D24 && converted) { + float d32; + std::memcpy(&d32, source, sizeof(d32)); + Common::Color::EncodeD24(d32 * 0xFFFFFF, dest); + } else if constexpr (format == PixelFormat::D24S8) { + const u32 s8d24 = std::rotr(MakeInt(source), 8); + std::memcpy(dest, &s8d24, sizeof(u32)); + } else { + std::memcpy(dest, source, bytes_per_pixel); + } +} + +template +constexpr void EncodePixel4(u32 x, u32 y, const u8* source_pixel, u8* dest_tile_buffer) { + Common::Vec4 rgba; + std::memcpy(rgba.AsArray(), source_pixel, 4); + + u8 pixel; + if constexpr (format == PixelFormat::I4) { + pixel = Common::Color::AverageRgbComponents(rgba); + } else { + pixel = rgba.a(); + } + + const u32 morton_offset = VideoCore::MortonInterleave(x, y); + const u32 byte_offset = morton_offset >> 1; + + const u8 current_values = dest_tile_buffer[byte_offset]; + const u8 new_value = Common::Color::Convert8To4(pixel); + + if (morton_offset % 2) { + dest_tile_buffer[byte_offset] = (new_value << 4) | (current_values & 0x0F); + } else { + dest_tile_buffer[byte_offset] = (current_values & 0xF0) | new_value; + } +} + +template +constexpr void MortonCopyTile(u32 stride, std::span tile_buffer, std::span linear_buffer) { + constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; + constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format); + constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4; + constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4; + + for (u32 y = 0; y < 8; y++) { + for (u32 x = 0; x < 8; x++) { + const auto tiled_pixel = tile_buffer.subspan( + VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel); + const auto linear_pixel = linear_buffer.subspan( + ((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel); + if constexpr (morton_to_linear) { + if constexpr (is_compressed) { + DecodePixelETC1(x, y, tile_buffer.data(), linear_pixel.data()); + } else if constexpr (is_4bit) { + DecodePixel4(x, y, tile_buffer.data(), linear_pixel.data()); + } else { + DecodePixel(tiled_pixel.data(), linear_pixel.data()); + } + } else { + if constexpr (is_4bit) { + EncodePixel4(x, y, linear_pixel.data(), tile_buffer.data()); + } else { + EncodePixel(linear_pixel.data(), tiled_pixel.data()); + } + } + } + } +} + +/** + * @brief Performs morton to/from linear convertions on the provided pixel data + * @param converted If true performs RGBA8 to/from convertion to all color formats + * @param width, height The dimentions of the rectangular region of pixels in linear_buffer + * @param start_offset The number of bytes from the start of the first tile to the start of + * tiled_buffer + * @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer + * @param linear_buffer The linear pixel data + * @param tiled_buffer The tiled pixel data + * + * The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for + * converting between linear and morton tiled layouts. The function handles both convertions but + * there are slightly different paths and inputs for each: + * + * Morton to Linear: + * During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the + * linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire + * texture area, but rather the specific rectangle affected by the upload. + * + * Linear to Morton: + * This is similar to the other convertion but with some differences. In this case tiled_buffer is + * not required to be aligned to any specific boundary which requires special care. + * start_offset/end_offset are useful here as they tell us exactly where the data should be placed + * in the linear_buffer. + */ +template +static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset, + std::span linear_buffer, std::span tiled_buffer) { + constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8; + constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format); + constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8; + static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, ""); + + const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel; + const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size); + const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size); + const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size); + + ASSERT(!morton_to_linear || + (aligned_start_offset == start_offset && aligned_end_offset == end_offset)); + + // In OpenGL the texture origin is in the bottom left corner as opposed to other + // APIs that have it at the top left. To avoid flipping texture coordinates in + // the shader we read/write the linear buffer from the bottom up + u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel; + u32 tiled_offset = 0; + u32 x = 0; + u32 y = 0; + + const auto LinearNextTile = [&] { + x = (x + 8) % width; + linear_offset += 8 * aligned_bytes_per_pixel; + if (!x) { + y = (y + 8) % height; + if (!y) { + return; + } + + linear_offset -= width * 9 * aligned_bytes_per_pixel; + } + }; + + // If during a texture download the start coordinate is not tile aligned, swizzle + // the tile affected to a temporary buffer and copy the part we are interested in + if (start_offset < aligned_start_offset && !morton_to_linear) { + std::array tmp_buf; + auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); + MortonCopyTile(width, tmp_buf, linear_data); + + std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset, + std::min(aligned_start_offset, end_offset) - start_offset); + + tiled_offset += aligned_start_offset - start_offset; + LinearNextTile(); + } + + const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset; + while (tiled_offset < buffer_end) { + auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); + auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size); + MortonCopyTile(width, tiled_data, linear_data); + tiled_offset += tile_size; + LinearNextTile(); + } + + // If during a texture download the end coordinate is not tile aligned, swizzle + // the tile affected to a temporary buffer and copy the part we are interested in + if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) { + std::array tmp_buf; + auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride); + MortonCopyTile(width, tmp_buf, linear_data); + std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(), + end_offset - aligned_end_offset); + } +} + +/** + * Performs a linear copy, converting pixel formats if required. + * @tparam decode If true, decodes the texture if needed. Otherwise, encodes if needed. + * @tparam format Pixel format to copy. + * @tparam converted If true, converts the texture to/from the appropriate format. + * @param src_buffer The source pixel data + * @param dst_buffer The destination pixel data + * @return + */ +template +static constexpr void LinearCopy(std::span src_buffer, std::span dst_buffer) { + const std::size_t src_size = src_buffer.size(); + const std::size_t dst_size = dst_buffer.size(); + + if constexpr (converted) { + constexpr u32 encoded_bytes_per_pixel = GetFormatBpp(format) / 8; + constexpr u32 decoded_bytes_per_pixel = 4; + constexpr u32 src_bytes_per_pixel = + decode ? encoded_bytes_per_pixel : decoded_bytes_per_pixel; + constexpr u32 dst_bytes_per_pixel = + decode ? decoded_bytes_per_pixel : encoded_bytes_per_pixel; + + for (std::size_t src_index = 0, dst_index = 0; src_index < src_size && dst_index < dst_size; + src_index += src_bytes_per_pixel, dst_index += dst_bytes_per_pixel) { + const auto src_pixel = src_buffer.subspan(src_index, src_bytes_per_pixel); + const auto dst_pixel = dst_buffer.subspan(dst_index, dst_bytes_per_pixel); + if constexpr (decode) { + DecodePixel(src_pixel.data(), dst_pixel.data()); + } else { + EncodePixel(src_pixel.data(), dst_pixel.data()); + } + } + } else { + std::memcpy(dst_buffer.data(), src_buffer.data(), std::min(src_size, dst_size)); + } +} + +using MortonFunc = void (*)(u32, u32, u32, u32, std::span, std::span); + +static constexpr std::array UNSWIZZLE_TABLE = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + MortonCopy, // 5 + MortonCopy, // 6 + MortonCopy, // 7 + MortonCopy, // 8 + MortonCopy, // 9 + MortonCopy, // 10 + MortonCopy, // 11 + MortonCopy, // 12 + MortonCopy, // 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy, // 17 +}; + +static constexpr std::array UNSWIZZLE_TABLE_CONVERTED = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + // The following formats are implicitly converted to RGBA regardless, so ignore them. + nullptr, // 5 + nullptr, // 6 + nullptr, // 7 + nullptr, // 8 + nullptr, // 9 + nullptr, // 10 + nullptr, // 11 + nullptr, // 12 + nullptr, // 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + // No conversion here as we need to do a special deinterleaving conversion elsewhere. + nullptr, // 17 +}; + +static constexpr std::array SWIZZLE_TABLE = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + MortonCopy, // 5 + MortonCopy, // 6 + MortonCopy, // 7 + MortonCopy, // 8 + MortonCopy, // 9 + MortonCopy, // 10 + MortonCopy, // 11 + nullptr, // 12 + nullptr, // 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + MortonCopy, // 17 +}; + +static constexpr std::array SWIZZLE_TABLE_CONVERTED = { + MortonCopy, // 0 + MortonCopy, // 1 + MortonCopy, // 2 + MortonCopy, // 3 + MortonCopy, // 4 + // The following formats are implicitly converted from RGBA regardless, so ignore them. + nullptr, // 5 + nullptr, // 6 + nullptr, // 7 + nullptr, // 8 + nullptr, // 9 + nullptr, // 10 + nullptr, // 11 + nullptr, // 12 + nullptr, // 13 + MortonCopy, // 14 + nullptr, // 15 + MortonCopy, // 16 + // No conversion here as we need to do a special interleaving conversion elsewhere. + nullptr, // 17 +}; + +using LinearFunc = void (*)(std::span, std::span); + +static constexpr std::array LINEAR_DECODE_TABLE = { + LinearCopy, // 0 + LinearCopy, // 1 + LinearCopy, // 2 + LinearCopy, // 3 + LinearCopy, // 4 + // These formats cannot be used linearly and can be ignored. + nullptr, // 5 + nullptr, // 6 + nullptr, // 7 + nullptr, // 8 + nullptr, // 9 + nullptr, // 10 + nullptr, // 11 + nullptr, // 12 + nullptr, // 13 + LinearCopy, // 14 + nullptr, // 15 + LinearCopy, // 16 + LinearCopy, // 17 +}; + +static constexpr std::array LINEAR_DECODE_TABLE_CONVERTED = { + LinearCopy, // 0 + LinearCopy, // 1 + LinearCopy, // 2 + LinearCopy, // 3 + LinearCopy, // 4 + // These formats cannot be used linearly and can be ignored. + nullptr, // 5 + nullptr, // 6 + nullptr, // 7 + nullptr, // 8 + nullptr, // 9 + nullptr, // 10 + nullptr, // 11 + nullptr, // 12 + nullptr, // 13 + LinearCopy, // 14 + nullptr, // 15 + LinearCopy, // 16 + // No conversion here as we need to do a special deinterleaving conversion elsewhere. + nullptr, // 17 +}; + +static constexpr std::array LINEAR_ENCODE_TABLE = { + LinearCopy, // 0 + LinearCopy, // 1 + LinearCopy, // 2 + LinearCopy, // 3 + LinearCopy, // 4 + // These formats cannot be used linearly and can be ignored. + nullptr, // 5 + nullptr, // 6 + nullptr, // 7 + nullptr, // 8 + nullptr, // 9 + nullptr, // 10 + nullptr, // 11 + nullptr, // 12 + nullptr, // 13 + LinearCopy, // 14 + nullptr, // 15 + LinearCopy, // 16 + LinearCopy, // 17 +}; + +static constexpr std::array LINEAR_ENCODE_TABLE_CONVERTED = { + LinearCopy, // 0 + LinearCopy, // 1 + LinearCopy, // 2 + LinearCopy, // 3 + LinearCopy, // 4 + // These formats cannot be used linearly and can be ignored. + nullptr, // 5 + nullptr, // 6 + nullptr, // 7 + nullptr, // 8 + nullptr, // 9 + nullptr, // 10 + nullptr, // 11 + nullptr, // 12 + nullptr, // 13 + LinearCopy, // 14 + nullptr, // 15 + LinearCopy, // 16 + // No conversion here as we need to do a special interleaving conversion elsewhere. + nullptr, // 17 +}; + +} // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp index 6ff2fb8e5..79f2f46d2 100644 --- a/src/video_core/rasterizer_cache/utils.cpp +++ b/src/video_core/rasterizer_cache/utils.cpp @@ -3,8 +3,8 @@ // Refer to the license.txt file included. #include "common/assert.h" -#include "video_core/rasterizer_cache/morton_swizzle.h" #include "video_core/rasterizer_cache/surface_params.h" +#include "video_core/rasterizer_cache/texture_codec.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/texture/texture_decode.h" @@ -47,32 +47,58 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d return result; } -void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_linear, std::span dest_tiled, - bool convert) { - const u32 func_index = static_cast(swizzle_info.pixel_format); - const MortonFunc SwizzleImpl = (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index]; - if (!SwizzleImpl) { - LOG_ERROR(Render_Vulkan, "Unimplemented swizzle function for pixel format {}.", func_index); - UNREACHABLE(); +void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr, + std::span source, std::span dest, bool convert) { + const u32 func_index = static_cast(surface_info.pixel_format); + + if (surface_info.is_tiled) { + const MortonFunc SwizzleImpl = + (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index]; + if (SwizzleImpl) { + SwizzleImpl(surface_info.width, surface_info.height, start_addr - surface_info.addr, + end_addr - surface_info.addr, source, dest); + return; + } + } else { + const LinearFunc LinearEncodeImpl = + (convert ? LINEAR_ENCODE_TABLE_CONVERTED : LINEAR_ENCODE_TABLE)[func_index]; + if (LinearEncodeImpl) { + LinearEncodeImpl(source, dest); + return; + } } - SwizzleImpl(swizzle_info.width, swizzle_info.height, start_addr - swizzle_info.addr, - end_addr - swizzle_info.addr, source_linear, dest_tiled); + + LOG_ERROR(Render_Vulkan, + "Unimplemented texture encode function for pixel format = {}, tiled = {}", func_index, + surface_info.is_tiled); + UNREACHABLE(); } -void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_tiled, std::span dest_linear, - bool convert) { - const u32 func_index = static_cast(unswizzle_info.pixel_format); - const MortonFunc UnswizzleImpl = - (convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index]; - if (!UnswizzleImpl) { - LOG_ERROR(Render_Vulkan, "Unimplemented un-swizzle function for pixel format {}.", - func_index); - UNREACHABLE(); +void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr, + std::span source, std::span dest, bool convert) { + const u32 func_index = static_cast(surface_info.pixel_format); + + if (surface_info.is_tiled) { + const MortonFunc UnswizzleImpl = + (convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index]; + if (UnswizzleImpl) { + UnswizzleImpl(surface_info.width, surface_info.height, start_addr - surface_info.addr, + end_addr - surface_info.addr, dest, source); + return; + } + } else { + const LinearFunc LinearDecodeImpl = + (convert ? LINEAR_DECODE_TABLE_CONVERTED : LINEAR_DECODE_TABLE)[func_index]; + if (LinearDecodeImpl) { + LinearDecodeImpl(source, dest); + return; + } } - UnswizzleImpl(unswizzle_info.width, unswizzle_info.height, start_addr - unswizzle_info.addr, - end_addr - unswizzle_info.addr, dest_linear, source_tiled); + + LOG_ERROR(Render_Vulkan, + "Unimplemented texture decode function for pixel format = {}, tiled = {}", func_index, + surface_info.is_tiled); + UNREACHABLE(); } } // namespace VideoCore diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h index 91da22f84..ff87084fb 100644 --- a/src/video_core/rasterizer_cache/utils.h +++ b/src/video_core/rasterizer_cache/utils.h @@ -107,30 +107,30 @@ struct TextureCubeConfig { [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data); /** - * Converts a morton swizzled texture to linear format. + * Encodes a linear texture to the expected linear or tiled format. * - * @param unswizzle_info Structure used to query the surface information. - * @param start_addr The start address of the source_tiled data. - * @param end_addr The end address of the source_tiled data. - * @param source_tiled The tiled data to convert. - * @param dest_linear The output buffer where the generated linear data will be written to. + * @param surface_info Structure used to query the surface information. + * @param start_addr The start address of the dest data. Used if tiled. + * @param end_addr The end address of the dest data. Used if tiled. + * @param source_tiled The source linear texture data. + * @param dest_linear The output buffer where the encoded linear or tiled data will be written to. + * @param convert Whether the pixel format needs to be converted. */ -void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_tiled, std::span dest_linear, - bool convert = false); +void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr, + std::span source, std::span dest, bool convert = false); /** - * Swizzles a linear texture according to the morton code. + * Decodes a linear or tiled texture to the expected linear format. * - * @param swizzle_info Structure used to query the surface information. - * @param start_addr The start address of the dest_tiled data. - * @param end_addr The end address of the dest_tiled data. - * @param source_tiled The source morton swizzled data. - * @param dest_linear The output buffer where the generated linear data will be written to. + * @param surface_info Structure used to query the surface information. + * @param start_addr The start address of the source data. Used if tiled. + * @param end_addr The end address of the source data. Used if tiled. + * @param source_tiled The source linear or tiled texture data. + * @param dest_linear The output buffer where the decoded linear data will be written to. + * @param convert Whether the pixel format needs to be converted. */ -void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr, - std::span source_linear, std::span dest_tiled, - bool convert = false); +void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr, + std::span source, std::span dest, bool convert = false); } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp index 3405c41ee..6d9e974db 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp +++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp @@ -82,7 +82,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) { return StagingData{.buffer = buffer.GetHandle(), .size = size, - .mapped = std::span{reinterpret_cast(data), size}, + .mapped = std::span{data, size}, .buffer_offset = offset}; } @@ -103,23 +103,6 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f return DEFAULT_TUPLE; } -void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span source, - std::span dest) { - const VideoCore::PixelFormat format = surface.pixel_format; - if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) { - return Pica::Texture::ConvertABGRToRGBA(source, dest); - } else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) { - return Pica::Texture::ConvertBGRToRGB(source, dest); - } else { - // Sometimes the source size might be larger than the destination. - // This can happen during texture downloads when FromInterval aligns - // the flush range to scanline boundaries. In that case only copy - // what we need - const std::size_t copy_size = std::min(source.size(), dest.size()); - std::memcpy(dest.data(), source.data(), copy_size); - } -} - OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type) { const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1; diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h index f2e8bb9cf..6e76316ed 100644 --- a/src/video_core/renderer_opengl/gl_texture_runtime.h +++ b/src/video_core/renderer_opengl/gl_texture_runtime.h @@ -22,7 +22,7 @@ struct FormatTuple { struct StagingData { GLuint buffer; u32 size = 0; - std::span mapped{}; + std::span mapped{}; GLintptr buffer_offset = 0; }; @@ -48,10 +48,6 @@ public: void Finish() const {} - /// Performs required format convertions on the staging data - void FormatConvert(const Surface& surface, bool upload, std::span source, - std::span dest); - /// Allocates an OpenGL texture with the specified dimentions and format OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format, VideoCore::TextureType type); diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp index 47951a9ae..3ccb41f40 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp @@ -4,7 +4,7 @@ #include #include "common/microprofile.h" -#include "video_core/rasterizer_cache/morton_swizzle.h" +#include "video_core/rasterizer_cache/texture_codec.h" #include "video_core/rasterizer_cache/utils.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" @@ -66,10 +66,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { switch (dest) { case vk::Format::eD24UnormS8Uint: { for (; stencil_offset < data.size; depth_offset += 4) { - std::byte* ptr = mapped.data() + depth_offset; + u8* ptr = mapped.data() + depth_offset; const u32 d24s8 = VideoCore::MakeInt(ptr); const u32 d24 = d24s8 >> 8; - mapped[stencil_offset] = static_cast(d24s8 & 0xFF); + mapped[stencil_offset] = d24s8 & 0xFF; std::memcpy(ptr, &d24, 4); stencil_offset++; } @@ -77,10 +77,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) { } case vk::Format::eD32SfloatS8Uint: { for (; stencil_offset < data.size; depth_offset += 4) { - std::byte* ptr = mapped.data() + depth_offset; + u8* ptr = mapped.data() + depth_offset; const u32 d24s8 = VideoCore::MakeInt(ptr); const float d32 = (d24s8 >> 8) / 16777215.f; - mapped[stencil_offset] = static_cast(d24s8 & 0xFF); + mapped[stencil_offset] = d24s8 & 0xFF; std::memcpy(ptr, &d32, 4); stencil_offset++; } @@ -151,7 +151,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) { return StagingData{ .buffer = buffer.Handle(), .size = size, - .mapped = std::span{reinterpret_cast(data), size}, + .mapped = std::span{data, size}, .buffer_offset = offset, }; } @@ -354,46 +354,6 @@ void TextureRuntime::Recycle(const HostTextureTag tag, ImageAlloc&& alloc) { texture_recycler.emplace(tag, std::move(alloc)); } -void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span source, - std::span dest) { - if (!NeedsConvertion(surface.pixel_format)) { - std::memcpy(dest.data(), source.data(), source.size()); - return; - } - - if (upload) { - switch (surface.pixel_format) { - case VideoCore::PixelFormat::RGBA8: - return Pica::Texture::ConvertABGRToRGBA(source, dest); - case VideoCore::PixelFormat::RGB8: - return Pica::Texture::ConvertBGRToRGBA(source, dest); - case VideoCore::PixelFormat::RGBA4: - return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest); - case VideoCore::PixelFormat::D24: - return Pica::Texture::ConvertD24ToD32(source, dest); - default: - break; - } - } else { - switch (surface.pixel_format) { - case VideoCore::PixelFormat::RGBA8: - return Pica::Texture::ConvertABGRToRGBA(source, dest); - case VideoCore::PixelFormat::RGBA4: - return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest); - case VideoCore::PixelFormat::RGB8: - return Pica::Texture::ConvertRGBAToBGR(source, dest); - case VideoCore::PixelFormat::D24: - return Pica::Texture::ConvertD32ToD24(source, dest); - default: - break; - } - } - - LOG_WARNING(Render_Vulkan, "Missing linear format convertion: {} {} {}", - vk::to_string(surface.traits.native), upload ? "->" : "<-", - vk::to_string(surface.alloc.format)); -} - bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, VideoCore::ClearValue value) { renderpass_cache.ExitRenderpass(); diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h index a3ba85d8d..e088927d4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_runtime.h +++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h @@ -21,7 +21,7 @@ namespace Vulkan { struct StagingData { vk::Buffer buffer; u32 size = 0; - std::span mapped{}; + std::span mapped{}; u64 buffer_offset = 0; }; @@ -108,10 +108,6 @@ public: VideoCore::TextureType type, vk::Format format, vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect); - /// Performs required format convertions on the staging data - void FormatConvert(const Surface& surface, bool upload, std::span source, - std::span dest); - /// Fills the rectangle of the texture with the clear value provided bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear, VideoCore::ClearValue value); diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp index b10a52c2a..c6497f1f7 100644 --- a/src/video_core/texture/texture_decode.cpp +++ b/src/video_core/texture/texture_decode.cpp @@ -105,47 +105,36 @@ Common::Vec4 LookupTexelInTile(const u8* source, unsigned int x, unsigned in } case TextureFormat::IA8: { - const u8* source_ptr = source + MortonInterleave(x, y) * 2; - - if (disable_alpha) { - // Show intensity as red, alpha as green - return {source_ptr[1], source_ptr[0], 0, 255}; - } else { - return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; - } + auto res = Common::Color::DecodeIA8(source + MortonInterleave(x, y) * 2); + return {res.r(), res.g(), res.b(), static_cast(disable_alpha ? 255 : res.a())}; } case TextureFormat::RG8: { auto res = Common::Color::DecodeRG8(source + MortonInterleave(x, y) * 2); - return {res.r(), res.g(), 0, 255}; + return {res.r(), res.g(), res.b(), static_cast(disable_alpha ? 255 : res.a())}; } case TextureFormat::I8: { - const u8* source_ptr = source + MortonInterleave(x, y); - return {*source_ptr, *source_ptr, *source_ptr, 255}; + auto res = Common::Color::DecodeI8(source + MortonInterleave(x, y) * 2); + return {res.r(), res.g(), res.b(), static_cast(disable_alpha ? 255 : res.a())}; } case TextureFormat::A8: { - const u8* source_ptr = source + MortonInterleave(x, y); - + auto res = Common::Color::DecodeA8(source + MortonInterleave(x, y) * 2); if (disable_alpha) { - return {*source_ptr, *source_ptr, *source_ptr, 255}; + return {res.a(), res.a(), res.a(), 255}; } else { - return {0, 0, 0, *source_ptr}; + return res; } } case TextureFormat::IA4: { - const u8* source_ptr = source + MortonInterleave(x, y); - - u8 i = Common::Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); - u8 a = Common::Color::Convert4To8((*source_ptr) & 0xF); - + auto res = Common::Color::DecodeIA4(source + MortonInterleave(x, y) * 2); if (disable_alpha) { // Show intensity as red, alpha as green - return {i, a, 0, 255}; + return {res.r(), res.a(), 0, 255}; } else { - return {i, i, i, a}; + return res; } } @@ -223,139 +212,4 @@ TextureInfo TextureInfo::FromPicaRegister(const TexturingRegs::TextureConfig& co return info; } -void ConvertBGRToRGB(std::span source, std::span dest) { - for (std::size_t i = 0; i < source.size(); i += 3) { - u32 bgr{}; - std::memcpy(&bgr, source.data() + i, 3); - const u32 rgb = Common::swap32(bgr << 8); - std::memcpy(dest.data() + i, &rgb, 3); - } -} - -void ConvertBGRToRGBA(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 4) { - dest[i] = source[j + 2]; - dest[i + 1] = source[j + 1]; - dest[i + 2] = source[j]; - dest[i + 3] = std::byte{0xFF}; - j += 3; - } -} - -void ConvertRGBAToBGR(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 3) { - dest[i] = source[j + 2]; - dest[i + 1] = source[j + 1]; - dest[i + 2] = source[j]; - j += 4; - } -} - -void ConvertABGRToRGBA(std::span source, std::span dest) { - for (u32 i = 0; i < dest.size(); i += 4) { - u32 abgr; - std::memcpy(&abgr, source.data() + i, sizeof(u32)); - const u32 rgba = Common::swap32(abgr); - std::memcpy(dest.data() + i, &rgba, 4); - } -} - -void ConvertRGBA4ToRGBA8(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 4) { - auto rgba = Common::Color::DecodeRGBA4(reinterpret_cast(source.data() + j)); - std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba)); - j += 2; - } -} - -void ConvertRGBA8ToRGBA4(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 2) { - Common::Vec4 rgba; - std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba)); - Common::Color::EncodeRGBA4(rgba, reinterpret_cast(dest.data() + i)); - j += 4; - } -} - -void ConvertRGB5A1ToRGBA8(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 4) { - auto rgba = Common::Color::DecodeRGB5A1(reinterpret_cast(source.data() + j)); - std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba)); - j += 2; - } -} - -void ConvertRGBA8ToRGB5A1(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 2) { - Common::Vec4 rgba; - std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba)); - Common::Color::EncodeRGB5A1(rgba, reinterpret_cast(dest.data() + i)); - j += 4; - } -} - -void ConvertD24ToD32(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 4) { - auto d32 = - Common::Color::DecodeD24(reinterpret_cast(source.data() + j)) / 16777215.f; - std::memcpy(dest.data() + i, &d32, sizeof(d32)); - j += 3; - } -} - -void ConvertD32ToD24(std::span source, std::span dest) { - u32 j = 0; - for (std::size_t i = 0; i < dest.size(); i += 3) { - float d32; - std::memcpy(&d32, source.data() + j, sizeof(d32)); - Common::Color::EncodeD24(d32 * 0xFFFFFF, reinterpret_cast(dest.data() + i)); - j += 4; - } -} - -void ConvertD32S8ToD24S8(std::span source, std::span dest) { - std::size_t depth_offset = 0; - std::size_t stencil_offset = 4 * source.size() / 5; - for (std::size_t i = 0; i < dest.size(); i += 4) { - float depth; - std::memcpy(&depth, source.data() + depth_offset, sizeof(float)); - u32 depth_uint = depth * 0xFFFFFF; - - dest[i] = source[stencil_offset]; - std::memcpy(dest.data() + i + 1, &depth_uint, 3); - - depth_offset += 4; - stencil_offset += 1; - } -} - -void InterleaveD24S8(std::span source, std::span dest) { - std::size_t depth_offset = 0; - std::size_t stencil_offset = 3 * source.size() / 4; - for (std::size_t i = 0; i < dest.size(); i += 4) { - dest[i] = source[stencil_offset]; - std::memcpy(dest.data() + i + 1, source.data() + depth_offset, 3); - depth_offset += 3; - stencil_offset += 1; - } -} - -void DeinterleaveD24S8(std::span source, std::span dest) { - std::size_t depth_offset = 0; - std::size_t stencil_offset = 3 * source.size() / 4; - for (std::size_t i = 0; i < dest.size(); i += 4) { - dest[stencil_offset] = source[i]; - std::memcpy(dest.data() + depth_offset, source.data() + i + 1, 3); - depth_offset += 3; - stencil_offset += 1; - } -} - } // namespace Pica::Texture diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h index ef161110a..add934516 100644 --- a/src/video_core/texture/texture_decode.h +++ b/src/video_core/texture/texture_decode.h @@ -55,48 +55,4 @@ Common::Vec4 LookupTexture(const u8* source, unsigned int x, unsigned int y, Common::Vec4 LookupTexelInTile(const u8* source, unsigned int x, unsigned int y, const TextureInfo& info, bool disable_alpha); -/** - * Converts pixel data encoded in BGR format to RGBA - * - * @param source Span to the source pixel data - * @param dest Span to the destination pixel data - */ -void ConvertBGRToRGB(std::span source, std::span dest); - -/** - * Converts pixel data encoded in BGR format to RGBA - * - * @param source Span to the source pixel data - * @param dest Span to the destination pixel data - */ -void ConvertBGRToRGBA(std::span source, std::span dest); - -void ConvertRGBAToBGR(std::span source, std::span dest); - -/** - * Converts pixel data encoded in ABGR format to RGBA - * - * @param source Span to the source pixel data - * @param dest Span to the destination pixel data - */ -void ConvertABGRToRGBA(std::span source, std::span dest); - -void ConvertRGBA4ToRGBA8(std::span source, std::span dest); - -void ConvertRGBA8ToRGBA4(std::span source, std::span dest); - -void ConvertRGB5A1ToRGBA8(std::span source, std::span dest); - -void ConvertRGBA8ToRGB5A1(std::span source, std::span dest); - -void ConvertD24ToD32(std::span source, std::span dest); - -void ConvertD32ToD24(std::span source, std::span dest); - -void ConvertD32S8ToD24S8(std::span source, std::span dest); - -void InterleaveD24S8(std::span source, std::span dest); - -void DeinterleaveD24S8(std::span source, std::span dest); - } // namespace Pica::Texture