From 489248e77f9a6d850564f46e2c37cecfd43b5e65 Mon Sep 17 00:00:00 2001
From: Steveice10 <1269164+Steveice10@users.noreply.github.com>
Date: Fri, 13 Jan 2023 03:54:42 -0800
Subject: [PATCH] video_core: De-duplicate texture format conversion logic.
 (#21)

* video_core: De-duplicate texture format conversion logic.

* video_core: Replace std::byte with u8 and remove excess linear texture converters.

* video_core: Remove implicit RGBA conversions from convert table for now, add comments explaining omissions.
---
 src/common/color.h                            |  81 +++
 src/common/memory_ref.h                       |   4 +-
 src/video_core/CMakeLists.txt                 |   2 +-
 .../rasterizer_cache/morton_swizzle.h         | 367 ------------
 .../rasterizer_cache/rasterizer_cache.h       |  18 +-
 .../rasterizer_cache/texture_codec.h          | 550 ++++++++++++++++++
 src/video_core/rasterizer_cache/utils.cpp     |  72 ++-
 src/video_core/rasterizer_cache/utils.h       |  36 +-
 .../renderer_opengl/gl_texture_runtime.cpp    |  19 +-
 .../renderer_opengl/gl_texture_runtime.h      |   6 +-
 .../renderer_vulkan/vk_texture_runtime.cpp    |  52 +-
 .../renderer_vulkan/vk_texture_runtime.h      |   6 +-
 src/video_core/texture/texture_decode.cpp     | 168 +-----
 src/video_core/texture/texture_decode.h       |  44 --
 14 files changed, 726 insertions(+), 699 deletions(-)
 delete mode 100644 src/video_core/rasterizer_cache/morton_swizzle.h
 create mode 100644 src/video_core/rasterizer_cache/texture_codec.h
diff --git a/src/common/color.h b/src/common/color.h
index bbcac858e..7e0ae8f4c 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -52,6 +52,11 @@ namespace Common::Color {
     return value >> 2;
 }
 
+/// Averages the RGB components of a color
+[[nodiscard]] constexpr u8 AverageRgbComponents(const Common::Vec4<u8>& color) {
+    return (static_cast<u32>(color.r()) + color.g() + color.b()) / 3;
+}
+
 /**
  * Decode a color stored in RGBA8 format
  * @param bytes Pointer to encoded source color
@@ -115,6 +120,44 @@ namespace Common::Color {
             Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF)};
 }
 
+/**
+ * Decode a color stored in IA8 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Common::Vec4<u8>
+ */
+[[nodiscard]] inline Common::Vec4<u8> DecodeIA8(const u8* bytes) {
+    return {bytes[1], bytes[1], bytes[1], bytes[0]};
+}
+
+/**
+ * Decode a color stored in I8 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Common::Vec4<u8>
+ */
+[[nodiscard]] inline Common::Vec4<u8> DecodeI8(const u8* bytes) {
+    return {bytes[0], bytes[0], bytes[0], 255};
+}
+
+/**
+ * Decode a color stored in A8 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Common::Vec4<u8>
+ */
+[[nodiscard]] inline Common::Vec4<u8> DecodeA8(const u8* bytes) {
+    return {0, 0, 0, bytes[0]};
+}
+
+/**
+ * Decode a color stored in IA4 format
+ * @param bytes Pointer to encoded source color
+ * @return Result color decoded as Common::Vec4<u8>
+ */
+[[nodiscard]] inline Common::Vec4<u8> DecodeIA4(const u8* bytes) {
+    u8 i = Common::Color::Convert4To8((bytes[0] & 0xF0) >> 4);
+    u8 a = Common::Color::Convert4To8(bytes[0] & 0x0F);
+    return {i, i, i, a};
+}
+
 /**
  * Decode a depth value stored in D16 format
  * @param bytes Pointer to encoded source value
@@ -176,6 +219,7 @@ inline void EncodeRG8(const Common::Vec4<u8>& color, u8* bytes) {
     bytes[1] = color.r();
     bytes[0] = color.g();
 }
+
 /**
  * Encode a color as RGB565 format
  * @param color Source color to encode
@@ -212,6 +256,43 @@ inline void EncodeRGBA4(const Common::Vec4<u8>& color, u8* bytes) {
     std::memcpy(bytes, &data, sizeof(data));
 }
 
+/**
+ * Encode a color as IA8 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeIA8(const Common::Vec4<u8>& color, u8* bytes) {
+    bytes[1] = AverageRgbComponents(color);
+    bytes[0] = color.a();
+}
+
+/**
+ * Encode a color as I8 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeI8(const Common::Vec4<u8>& color, u8* bytes) {
+    bytes[0] = AverageRgbComponents(color);
+}
+
+/**
+ * Encode a color as A8 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeA8(const Common::Vec4<u8>& color, u8* bytes) {
+    bytes[0] = color.a();
+}
+
+/**
+ * Encode a color as IA4 format
+ * @param color Source color to encode
+ * @param bytes Destination pointer to store encoded color
+ */
+inline void EncodeIA4(const Common::Vec4<u8>& color, u8* bytes) {
+    bytes[0] = (Convert8To4(AverageRgbComponents(color)) << 4) | Convert8To4(color.a());
+}
+
 /**
  * Encode a 16 bit depth value as D16 format
  * @param value 16 bit source depth value to encode
diff --git a/src/common/memory_ref.h b/src/common/memory_ref.h
index 7e313961f..a63791a88 100644
--- a/src/common/memory_ref.h
+++ b/src/common/memory_ref.h
@@ -107,11 +107,11 @@ public:
     }
 
     auto GetWriteBytes(std::size_t size) {
-        return std::span{reinterpret_cast<std::byte*>(cptr), size > csize ? csize : size};
+        return std::span{cptr, size > csize ? csize : size};
     }
 
     auto GetReadBytes(std::size_t size) const {
-        return std::span{reinterpret_cast<const std::byte*>(cptr), size > csize ? csize : size};
+        return std::span{cptr, size > csize ? csize : size};
     }
 
     std::size_t GetSize() const {
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 498f4ac51..2570a42f0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -28,7 +28,7 @@ add_library(video_core STATIC
     regs_texturing.h
     renderer_base.cpp
     renderer_base.h
-    rasterizer_cache/morton_swizzle.h
+    rasterizer_cache/texture_codec.h
     rasterizer_cache/pixel_format.cpp
     rasterizer_cache/pixel_format.h
     rasterizer_cache/rasterizer_cache.cpp
diff --git a/src/video_core/rasterizer_cache/morton_swizzle.h b/src/video_core/rasterizer_cache/morton_swizzle.h
deleted file mode 100644
index 011eaa8b0..000000000
--- a/src/video_core/rasterizer_cache/morton_swizzle.h
+++ /dev/null
@@ -1,367 +0,0 @@
-// Copyright 2022 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-#include <algorithm>
-#include <bit>
-#include <span>
-#include "common/alignment.h"
-#include "common/color.h"
-#include "video_core/rasterizer_cache/pixel_format.h"
-#include "video_core/texture/etc1.h"
-#include "video_core/utils.h"
-
-namespace VideoCore {
-
-template <typename T>
-inline T MakeInt(const std::byte* bytes) {
-    T integer{};
-    std::memcpy(&integer, bytes, sizeof(T));
-
-    return integer;
-}
-
-template <PixelFormat format, bool converted>
-constexpr void DecodePixel(const std::byte* source, std::byte* dest) {
-    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
-
-    if constexpr (format == PixelFormat::D24S8) {
-        const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8);
-        std::memcpy(dest, &d24s8, sizeof(u32));
-    } else if constexpr (format == PixelFormat::RGBA8 && converted) {
-        const u32 rgba = MakeInt<u32>(source);
-        const u32 abgr = Common::swap32(rgba);
-        std::memcpy(dest, &abgr, 4);
-    } else if constexpr (format == PixelFormat::RGB8 && converted) {
-        u32 rgb{};
-        std::memcpy(&rgb, source, 3);
-        const u32 abgr = Common::swap32(rgb << 8) | 0xFF000000;
-        std::memcpy(dest, &abgr, 4);
-    } else if constexpr (format == PixelFormat::RGB565 && converted) {
-        const auto abgr = Common::Color::DecodeRGB565(reinterpret_cast<const u8*>(source));
-        std::memcpy(dest, abgr.AsArray(), 4);
-    } else if constexpr (format == PixelFormat::RGB5A1 && converted) {
-        const auto abgr = Common::Color::DecodeRGB5A1(reinterpret_cast<const u8*>(source));
-        std::memcpy(dest, abgr.AsArray(), 4);
-    } else if constexpr (format == PixelFormat::RGBA4 && converted) {
-        const auto abgr = Common::Color::DecodeRGBA4(reinterpret_cast<const u8*>(source));
-        std::memcpy(dest, abgr.AsArray(), 4);
-    } else if constexpr (format == PixelFormat::IA8) {
-        std::memset(dest, static_cast<int>(source[1]), 3);
-        dest[3] = source[0];
-    } else if constexpr (format == PixelFormat::RG8) {
-        const auto rgba = Common::Color::DecodeRG8(reinterpret_cast<const u8*>(source));
-        std::memcpy(dest, rgba.AsArray(), 4);
-    } else if constexpr (format == PixelFormat::I8) {
-        std::memset(dest, static_cast<int>(source[0]), 3);
-        dest[3] = std::byte{255};
-    } else if constexpr (format == PixelFormat::A8) {
-        std::memset(dest, 0, 3);
-        dest[3] = source[0];
-    } else if constexpr (format == PixelFormat::IA4) {
-        const u8 ia4 = static_cast<const u8>(source[0]);
-        std::memset(dest, Common::Color::Convert4To8(ia4 >> 4), 3);
-        dest[3] = std::byte{Common::Color::Convert4To8(ia4 & 0xF)};
-    } else if constexpr (format == PixelFormat::D24 && converted) {
-        const auto d32 = Common::Color::DecodeD24(reinterpret_cast<const u8*>(source)) / 16777215.f;
-        std::memcpy(dest, &d32, sizeof(d32));
-    } else {
-        std::memcpy(dest, source, bytes_per_pixel);
-    }
-}
-
-template <PixelFormat format>
-constexpr void DecodePixel4(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
-    const u32 morton_offset = VideoCore::MortonInterleave(x, y);
-    const u8 value = static_cast<const u8>(source_tile[morton_offset >> 1]);
-    const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
-
-    if constexpr (format == PixelFormat::I4) {
-        std::memset(dest_pixel, static_cast<int>(pixel), 3);
-        dest_pixel[3] = std::byte{255};
-    } else {
-        std::memset(dest_pixel, 0, 3);
-        dest_pixel[3] = std::byte{pixel};
-    }
-}
-
-template <PixelFormat format>
-constexpr void DecodePixelETC1(u32 x, u32 y, const std::byte* source_tile, std::byte* dest_pixel) {
-    constexpr u32 subtile_width = 4;
-    constexpr u32 subtile_height = 4;
-    constexpr bool has_alpha = format == PixelFormat::ETC1A4;
-    constexpr std::size_t subtile_size = has_alpha ? 16 : 8;
-
-    const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height);
-    x %= subtile_width;
-    y %= subtile_height;
-
-    const std::byte* subtile_ptr = source_tile + subtile_index * subtile_size;
-
-    u8 alpha = 255;
-    if constexpr (has_alpha) {
-        u64_le packed_alpha;
-        std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64));
-        subtile_ptr += sizeof(u64);
-
-        alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF);
-    }
-
-    const u64_le subtile_data = MakeInt<u64_le>(subtile_ptr);
-    const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y);
-
-    // Copy the uncompressed pixel to the destination
-    std::memcpy(dest_pixel, rgb.AsArray(), 3);
-    dest_pixel[3] = std::byte{alpha};
-}
-
-template <PixelFormat format, bool converted>
-constexpr void EncodePixel(const std::byte* source, std::byte* dest) {
-    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
-
-    if constexpr (format == PixelFormat::D24S8) {
-        const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8);
-        std::memcpy(dest, &s8d24, sizeof(u32));
-    } else if constexpr (format == PixelFormat::RGBA8 && converted) {
-        const u32 abgr = MakeInt<u32>(source);
-        const u32 rgba = Common::swap32(abgr);
-        std::memcpy(dest, &rgba, 4);
-    } else if constexpr (format == PixelFormat::RGB8 && converted) {
-        const u32 abgr = MakeInt<u32>(source);
-        const u32 rgb = Common::swap32(abgr << 8);
-        std::memcpy(dest, &rgb, 3);
-    } else if constexpr (format == PixelFormat::RGB565 && converted) {
-        Common::Vec4<u8> rgba;
-        std::memcpy(rgba.AsArray(), source, 4);
-        Common::Color::EncodeRGB565(rgba, reinterpret_cast<u8*>(dest));
-    } else if constexpr (format == PixelFormat::RGB5A1 && converted) {
-        Common::Vec4<u8> rgba;
-        std::memcpy(rgba.AsArray(), source, 4);
-        Common::Color::EncodeRGB5A1(rgba, reinterpret_cast<u8*>(dest));
-    } else if constexpr (format == PixelFormat::RGBA4 && converted) {
-        Common::Vec4<u8> rgba;
-        std::memcpy(rgba.AsArray(), source, 4);
-        Common::Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest));
-    } else if constexpr (format == PixelFormat::D24 && converted) {
-        float d32;
-        std::memcpy(&d32, source, sizeof(d32));
-        Common::Color::EncodeD24(d32 * 0xFFFFFF, reinterpret_cast<u8*>(dest));
-    } else {
-        std::memcpy(dest, source, bytes_per_pixel);
-    }
-}
-
-template <bool morton_to_linear, PixelFormat format, bool converted>
-constexpr void MortonCopyTile(u32 stride, std::span<std::byte> tile_buffer,
-                              std::span<std::byte> linear_buffer) {
-    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
-    constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
-    constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
-    constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4;
-
-    for (u32 y = 0; y < 8; y++) {
-        for (u32 x = 0; x < 8; x++) {
-            const auto tiled_pixel = tile_buffer.subspan(
-                VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel);
-            const auto linear_pixel = linear_buffer.subspan(
-                ((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel);
-            if constexpr (morton_to_linear) {
-                if constexpr (is_compressed) {
-                    DecodePixelETC1<format>(x, y, tile_buffer.data(), linear_pixel.data());
-                } else if constexpr (is_4bit) {
-                    DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data());
-                } else {
-                    DecodePixel<format, converted>(tiled_pixel.data(), linear_pixel.data());
-                }
-            } else {
-                EncodePixel<format, converted>(linear_pixel.data(), tiled_pixel.data());
-            }
-        }
-    }
-}
-
-/**
- * @brief Performs morton to/from linear convertions on the provided pixel data
- * @param converted If true performs RGBA8 to/from convertion to all color formats
- * @param width, height The dimentions of the rectangular region of pixels in linear_buffer
- * @param start_offset The number of bytes from the start of the first tile to the start of
- * tiled_buffer
- * @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer
- * @param linear_buffer The linear pixel data
- * @param tiled_buffer The tiled pixel data
- *
- * The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for
- * converting between linear and morton tiled layouts. The function handles both convertions but
- * there are slightly different paths and inputs for each:
- *
- * Morton to Linear:
- * During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the
- * linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire
- * texture area, but rather the specific rectangle affected by the upload.
- *
- * Linear to Morton:
- * This is similar to the other convertion but with some differences. In this case tiled_buffer is
- * not required to be aligned to any specific boundary which requires special care.
- * start_offset/end_offset are useful here as they tell us exactly where the data should be placed
- * in the linear_buffer.
- */
-template <bool morton_to_linear, PixelFormat format, bool converted = false>
-static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
-                                 std::span<std::byte> linear_buffer,
-                                 std::span<std::byte> tiled_buffer) {
-    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
-    constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
-    constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
-    static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
-
-    const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel;
-    const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
-    const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
-    const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
-
-    ASSERT(!morton_to_linear ||
-           (aligned_start_offset == start_offset && aligned_end_offset == end_offset));
-
-    // In OpenGL the texture origin is in the bottom left corner as opposed to other
-    // APIs that have it at the top left. To avoid flipping texture coordinates in
-    // the shader we read/write the linear buffer from the bottom up
-    u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
-    u32 tiled_offset = 0;
-    u32 x = 0;
-    u32 y = 0;
-
-    const auto LinearNextTile = [&] {
-        x = (x + 8) % width;
-        linear_offset += 8 * aligned_bytes_per_pixel;
-        if (!x) {
-            y = (y + 8) % height;
-            if (!y) {
-                return;
-            }
-
-            linear_offset -= width * 9 * aligned_bytes_per_pixel;
-        }
-    };
-
-    // If during a texture download the start coordinate is not tile aligned, swizzle
-    // the tile affected to a temporary buffer and copy the part we are interested in
-    if (start_offset < aligned_start_offset && !morton_to_linear) {
-        std::array<std::byte, tile_size> tmp_buf;
-        auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
-        MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
-
-        std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
-                    std::min(aligned_start_offset, end_offset) - start_offset);
-
-        tiled_offset += aligned_start_offset - start_offset;
-        LinearNextTile();
-    }
-
-    const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
-    while (tiled_offset < buffer_end) {
-        auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
-        auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
-        MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
-        tiled_offset += tile_size;
-        LinearNextTile();
-    }
-
-    // If during a texture download the end coordinate is not tile aligned, swizzle
-    // the tile affected to a temporary buffer and copy the part we are interested in
-    if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
-        std::array<std::byte, tile_size> tmp_buf;
-        auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
-        MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
-        std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(),
-                    end_offset - aligned_end_offset);
-    }
-}
-
-using MortonFunc = void (*)(u32, u32, u32, u32, std::span<std::byte>, std::span<std::byte>);
-
-static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
-    MortonCopy<true, PixelFormat::RGBA8>,  // 0
-    MortonCopy<true, PixelFormat::RGB8>,   // 1
-    MortonCopy<true, PixelFormat::RGB5A1>, // 2
-    MortonCopy<true, PixelFormat::RGB565>, // 3
-    MortonCopy<true, PixelFormat::RGBA4>,  // 4
-    MortonCopy<true, PixelFormat::IA8>,    // 5
-    MortonCopy<true, PixelFormat::RG8>,    // 6
-    MortonCopy<true, PixelFormat::I8>,     // 7
-    MortonCopy<true, PixelFormat::A8>,     // 8
-    MortonCopy<true, PixelFormat::IA4>,    // 9
-    MortonCopy<true, PixelFormat::I4>,     // 10
-    MortonCopy<true, PixelFormat::A4>,     // 11
-    MortonCopy<true, PixelFormat::ETC1>,   // 12
-    MortonCopy<true, PixelFormat::ETC1A4>, // 13
-    MortonCopy<true, PixelFormat::D16>,    // 14
-    nullptr,                               // 15
-    MortonCopy<true, PixelFormat::D24>,    // 16
-    MortonCopy<true, PixelFormat::D24S8>   // 17
-};
-
-static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE_CONVERTED = {
-    MortonCopy<true, PixelFormat::RGBA8, true>,  // 0
-    MortonCopy<true, PixelFormat::RGB8, true>,   // 1
-    MortonCopy<true, PixelFormat::RGB5A1, true>, // 2
-    MortonCopy<true, PixelFormat::RGB565, true>, // 3
-    MortonCopy<true, PixelFormat::RGBA4, true>,  // 4
-    nullptr,                                     // 5
-    nullptr,                                     // 6
-    nullptr,                                     // 7
-    nullptr,                                     // 8
-    nullptr,                                     // 9
-    nullptr,                                     // 10
-    nullptr,                                     // 11
-    nullptr,                                     // 12
-    nullptr,                                     // 13
-    nullptr,                                     // 14
-    nullptr,                                     // 15
-    MortonCopy<true, PixelFormat::D24, true>,    // 16
-    nullptr,                                     // 17
-};
-
-static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE = {
-    MortonCopy<false, PixelFormat::RGBA8>,  // 0
-    MortonCopy<false, PixelFormat::RGB8>,   // 1
-    MortonCopy<false, PixelFormat::RGB5A1>, // 2
-    MortonCopy<false, PixelFormat::RGB565>, // 3
-    MortonCopy<false, PixelFormat::RGBA4>,  // 4
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,
-    nullptr,                              // 5 - 13
-    MortonCopy<false, PixelFormat::D16>,  // 14
-    nullptr,                              // 15
-    MortonCopy<false, PixelFormat::D24>,  // 16
-    MortonCopy<false, PixelFormat::D24S8> // 17
-};
-
-static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE_CONVERTED = {
-    MortonCopy<false, PixelFormat::RGBA8, true>,  // 0
-    MortonCopy<false, PixelFormat::RGB8, true>,   // 1
-    MortonCopy<false, PixelFormat::RGB5A1, true>, // 2
-    MortonCopy<false, PixelFormat::RGB565, true>, // 3
-    MortonCopy<false, PixelFormat::RGBA4, true>,  // 4
-    nullptr,                                      // 5
-    nullptr,                                      // 6
-    nullptr,                                      // 7
-    nullptr,                                      // 8
-    nullptr,                                      // 9
-    nullptr,                                      // 10
-    nullptr,                                      // 11
-    nullptr,                                      // 12
-    nullptr,                                      // 13
-    nullptr,                                      // 14
-    nullptr,                                      // 15
-    MortonCopy<false, PixelFormat::D24, true>,    // 16
-    nullptr,                                      // 17
-};
-
-} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache/rasterizer_cache.h b/src/video_core/rasterizer_cache/rasterizer_cache.h
index 8f82f4662..52833b635 100644
--- a/src/video_core/rasterizer_cache/rasterizer_cache.h
+++ b/src/video_core/rasterizer_cache/rasterizer_cache.h
@@ -167,7 +167,7 @@ private:
     SurfaceSet remove_surfaces;
     u16 resolution_scale_factor;
     std::vector<std::function<void()>> download_queue;
-    std::vector<std::byte> staging_buffer;
+    std::vector<u8> staging_buffer;
     std::unordered_map<TextureCubeConfig, Surface> texture_cube_cache;
     std::recursive_mutex mutex;
 };
@@ -916,12 +916,8 @@ void RasterizerCache<T>::UploadSurface(const Surface& surface, SurfaceInterval i
     }
 
     const auto upload_data = source_ptr.GetWriteBytes(load_info.end - load_info.addr);
-    if (surface->is_tiled) {
-        UnswizzleTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped,
-                         runtime.NeedsConvertion(surface->pixel_format));
-    } else {
-        runtime.FormatConvert(*surface, true, upload_data, staging.mapped);
-    }
+    DecodeTexture(load_info, load_info.addr, load_info.end, upload_data, staging.mapped,
+                  runtime.NeedsConvertion(surface->pixel_format));
 
     const BufferTextureCopy upload = {.buffer_offset = 0,
                                       .buffer_size = staging.size,
@@ -957,12 +953,8 @@ void RasterizerCache<T>::DownloadSurface(const Surface& surface, SurfaceInterval
 
     download_queue.push_back([this, surface, flush_start, flush_end, flush_info,
                               mapped = staging.mapped, download_dest]() {
-        if (surface->is_tiled) {
-            SwizzleTexture(flush_info, flush_start, flush_end, mapped, download_dest,
-                           runtime.NeedsConvertion(surface->pixel_format));
-        } else {
-            runtime.FormatConvert(*surface, false, mapped, download_dest);
-        }
+        EncodeTexture(flush_info, flush_start, flush_end, mapped, download_dest,
+                      runtime.NeedsConvertion(surface->pixel_format));
     });
 }
 
diff --git a/src/video_core/rasterizer_cache/texture_codec.h b/src/video_core/rasterizer_cache/texture_codec.h
new file mode 100644
index 000000000..6eb64d486
--- /dev/null
+++ b/src/video_core/rasterizer_cache/texture_codec.h
@@ -0,0 +1,550 @@
+// Copyright 2022 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+#include <algorithm>
+#include <bit>
+#include <span>
+#include "common/alignment.h"
+#include "common/color.h"
+#include "video_core/rasterizer_cache/pixel_format.h"
+#include "video_core/texture/etc1.h"
+#include "video_core/utils.h"
+
+namespace VideoCore {
+
+template <typename T>
+inline T MakeInt(const u8* bytes) {
+    T integer{};
+    std::memcpy(&integer, bytes, sizeof(T));
+
+    return integer;
+}
+
+template <PixelFormat format, bool converted>
+constexpr void DecodePixel(const u8* source, u8* dest) {
+    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
+
+    if constexpr (format == PixelFormat::RGBA8 && converted) {
+        const auto abgr = Common::Color::DecodeRGBA8(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::RGB8 && converted) {
+        const auto abgr = Common::Color::DecodeRGB8(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::RGB565 && converted) {
+        const auto abgr = Common::Color::DecodeRGB565(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::RGB5A1 && converted) {
+        const auto abgr = Common::Color::DecodeRGB5A1(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::RGBA4 && converted) {
+        const auto abgr = Common::Color::DecodeRGBA4(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::IA8) {
+        const auto abgr = Common::Color::DecodeIA8(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::RG8) {
+        const auto abgr = Common::Color::DecodeRG8(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::I8) {
+        const auto abgr = Common::Color::DecodeI8(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::A8) {
+        const auto abgr = Common::Color::DecodeA8(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::IA4) {
+        const auto abgr = Common::Color::DecodeIA4(source);
+        std::memcpy(dest, abgr.AsArray(), 4);
+    } else if constexpr (format == PixelFormat::D16 && converted) {
+        const auto d32 = Common::Color::DecodeD16(source) / 65535.f;
+        std::memcpy(dest, &d32, sizeof(d32));
+    } else if constexpr (format == PixelFormat::D24 && converted) {
+        const auto d32 = Common::Color::DecodeD24(source) / 16777215.f;
+        std::memcpy(dest, &d32, sizeof(d32));
+    } else if constexpr (format == PixelFormat::D24S8) {
+        const u32 d24s8 = std::rotl(MakeInt<u32>(source), 8);
+        std::memcpy(dest, &d24s8, sizeof(u32));
+    } else {
+        std::memcpy(dest, source, bytes_per_pixel);
+    }
+}
+
+template <PixelFormat format>
+constexpr void DecodePixel4(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) {
+    const u32 morton_offset = VideoCore::MortonInterleave(x, y);
+    const u8 value = source_tile[morton_offset >> 1];
+    const u8 pixel = Common::Color::Convert4To8((morton_offset % 2) ? (value >> 4) : (value & 0xF));
+
+    if constexpr (format == PixelFormat::I4) {
+        std::memset(dest_pixel, pixel, 3);
+        dest_pixel[3] = 255;
+    } else {
+        std::memset(dest_pixel, 0, 3);
+        dest_pixel[3] = pixel;
+    }
+}
+
+template <PixelFormat format>
+constexpr void DecodePixelETC1(u32 x, u32 y, const u8* source_tile, u8* dest_pixel) {
+    constexpr u32 subtile_width = 4;
+    constexpr u32 subtile_height = 4;
+    constexpr bool has_alpha = format == PixelFormat::ETC1A4;
+    constexpr std::size_t subtile_size = has_alpha ? 16 : 8;
+
+    const u32 subtile_index = (x / subtile_width) + 2 * (y / subtile_height);
+    x %= subtile_width;
+    y %= subtile_height;
+
+    const u8* subtile_ptr = source_tile + subtile_index * subtile_size;
+
+    u8 alpha = 255;
+    if constexpr (has_alpha) {
+        u64_le packed_alpha;
+        std::memcpy(&packed_alpha, subtile_ptr, sizeof(u64));
+        subtile_ptr += sizeof(u64);
+
+        alpha = Common::Color::Convert4To8((packed_alpha >> (4 * (x * subtile_width + y))) & 0xF);
+    }
+
+    const u64_le subtile_data = MakeInt<u64_le>(subtile_ptr);
+    const auto rgb = Pica::Texture::SampleETC1Subtile(subtile_data, x, y);
+
+    // Copy the uncompressed pixel to the destination
+    std::memcpy(dest_pixel, rgb.AsArray(), 3);
+    dest_pixel[3] = alpha;
+}
+
+template <PixelFormat format, bool converted>
+constexpr void EncodePixel(const u8* source, u8* dest) {
+    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
+
+    if constexpr (format == PixelFormat::RGBA8 && converted) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeRGBA8(rgba, dest);
+    } else if constexpr (format == PixelFormat::RGB8 && converted) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeRGB8(rgba, dest);
+    } else if constexpr (format == PixelFormat::RGB565 && converted) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeRGB565(rgba, dest);
+    } else if constexpr (format == PixelFormat::RGB5A1 && converted) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeRGB5A1(rgba, dest);
+    } else if constexpr (format == PixelFormat::RGBA4 && converted) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeRGBA4(rgba, dest);
+    } else if constexpr (format == PixelFormat::IA8) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeIA8(rgba, dest);
+    } else if constexpr (format == PixelFormat::RG8) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeRG8(rgba, dest);
+    } else if constexpr (format == PixelFormat::I8) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeI8(rgba, dest);
+    } else if constexpr (format == PixelFormat::A8) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeA8(rgba, dest);
+    } else if constexpr (format == PixelFormat::IA4) {
+        Common::Vec4<u8> rgba;
+        std::memcpy(rgba.AsArray(), source, 4);
+        Common::Color::EncodeIA4(rgba, dest);
+    } else if constexpr (format == PixelFormat::D16 && converted) {
+        float d32;
+        std::memcpy(&d32, source, sizeof(d32));
+        Common::Color::EncodeD16(d32 * 0xFFFF, dest);
+    } else if constexpr (format == PixelFormat::D24 && converted) {
+        float d32;
+        std::memcpy(&d32, source, sizeof(d32));
+        Common::Color::EncodeD24(d32 * 0xFFFFFF, dest);
+    } else if constexpr (format == PixelFormat::D24S8) {
+        const u32 s8d24 = std::rotr(MakeInt<u32>(source), 8);
+        std::memcpy(dest, &s8d24, sizeof(u32));
+    } else {
+        std::memcpy(dest, source, bytes_per_pixel);
+    }
+}
+
+template <PixelFormat format>
+constexpr void EncodePixel4(u32 x, u32 y, const u8* source_pixel, u8* dest_tile_buffer) {
+    Common::Vec4<u8> rgba;
+    std::memcpy(rgba.AsArray(), source_pixel, 4);
+
+    u8 pixel;
+    if constexpr (format == PixelFormat::I4) {
+        pixel = Common::Color::AverageRgbComponents(rgba);
+    } else {
+        pixel = rgba.a();
+    }
+
+    const u32 morton_offset = VideoCore::MortonInterleave(x, y);
+    const u32 byte_offset = morton_offset >> 1;
+
+    const u8 current_values = dest_tile_buffer[byte_offset];
+    const u8 new_value = Common::Color::Convert8To4(pixel);
+
+    if (morton_offset % 2) {
+        dest_tile_buffer[byte_offset] = (new_value << 4) | (current_values & 0x0F);
+    } else {
+        dest_tile_buffer[byte_offset] = (current_values & 0xF0) | new_value;
+    }
+}
+
+template <bool morton_to_linear, PixelFormat format, bool converted>
+constexpr void MortonCopyTile(u32 stride, std::span<u8> tile_buffer, std::span<u8> linear_buffer) {
+    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
+    constexpr u32 linear_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
+    constexpr bool is_compressed = format == PixelFormat::ETC1 || format == PixelFormat::ETC1A4;
+    constexpr bool is_4bit = format == PixelFormat::I4 || format == PixelFormat::A4;
+
+    for (u32 y = 0; y < 8; y++) {
+        for (u32 x = 0; x < 8; x++) {
+            const auto tiled_pixel = tile_buffer.subspan(
+                VideoCore::MortonInterleave(x, y) * bytes_per_pixel, bytes_per_pixel);
+            const auto linear_pixel = linear_buffer.subspan(
+                ((7 - y) * stride + x) * linear_bytes_per_pixel, linear_bytes_per_pixel);
+            if constexpr (morton_to_linear) {
+                if constexpr (is_compressed) {
+                    DecodePixelETC1<format>(x, y, tile_buffer.data(), linear_pixel.data());
+                } else if constexpr (is_4bit) {
+                    DecodePixel4<format>(x, y, tile_buffer.data(), linear_pixel.data());
+                } else {
+                    DecodePixel<format, converted>(tiled_pixel.data(), linear_pixel.data());
+                }
+            } else {
+                if constexpr (is_4bit) {
+                    EncodePixel4<format>(x, y, linear_pixel.data(), tile_buffer.data());
+                } else {
+                    EncodePixel<format, converted>(linear_pixel.data(), tiled_pixel.data());
+                }
+            }
+        }
+    }
+}
+
+/**
+ * @brief Performs morton to/from linear convertions on the provided pixel data
+ * @param converted If true performs RGBA8 to/from convertion to all color formats
+ * @param width, height The dimentions of the rectangular region of pixels in linear_buffer
+ * @param start_offset The number of bytes from the start of the first tile to the start of
+ * tiled_buffer
+ * @param end_offset The number of bytes from the start of the first tile to the end of tiled_buffer
+ * @param linear_buffer The linear pixel data
+ * @param tiled_buffer The tiled pixel data
+ *
+ * The MortonCopy is at the heart of the PICA texture implementation, as it's responsible for
+ * converting between linear and morton tiled layouts. The function handles both convertions but
+ * there are slightly different paths and inputs for each:
+ *
+ * Morton to Linear:
+ * During uploads, tiled_buffer is always aligned to the tile or scanline boundary depending if the
+ * linear rectangle spans multiple vertical tiles. linear_buffer does not reference the entire
+ * texture area, but rather the specific rectangle affected by the upload.
+ *
+ * Linear to Morton:
+ * This is similar to the other convertion but with some differences. In this case tiled_buffer is
+ * not required to be aligned to any specific boundary which requires special care.
+ * start_offset/end_offset are useful here as they tell us exactly where the data should be placed
+ * in the linear_buffer.
+ */
+template <bool morton_to_linear, PixelFormat format, bool converted = false>
+static constexpr void MortonCopy(u32 width, u32 height, u32 start_offset, u32 end_offset,
+                                 std::span<u8> linear_buffer, std::span<u8> tiled_buffer) {
+    constexpr u32 bytes_per_pixel = GetFormatBpp(format) / 8;
+    constexpr u32 aligned_bytes_per_pixel = converted ? 4 : GetBytesPerPixel(format);
+    constexpr u32 tile_size = GetFormatBpp(format) * 64 / 8;
+    static_assert(aligned_bytes_per_pixel >= bytes_per_pixel, "");
+
+    const u32 linear_tile_stride = (7 * width + 8) * aligned_bytes_per_pixel;
+    const u32 aligned_down_start_offset = Common::AlignDown(start_offset, tile_size);
+    const u32 aligned_start_offset = Common::AlignUp(start_offset, tile_size);
+    const u32 aligned_end_offset = Common::AlignDown(end_offset, tile_size);
+
+    ASSERT(!morton_to_linear ||
+           (aligned_start_offset == start_offset && aligned_end_offset == end_offset));
+
+    // In OpenGL the texture origin is in the bottom left corner as opposed to other
+    // APIs that have it at the top left. To avoid flipping texture coordinates in
+    // the shader we read/write the linear buffer from the bottom up
+    u32 linear_offset = ((height - 8) * width) * aligned_bytes_per_pixel;
+    u32 tiled_offset = 0;
+    u32 x = 0;
+    u32 y = 0;
+
+    const auto LinearNextTile = [&] {
+        x = (x + 8) % width;
+        linear_offset += 8 * aligned_bytes_per_pixel;
+        if (!x) {
+            y = (y + 8) % height;
+            if (!y) {
+                return;
+            }
+
+            linear_offset -= width * 9 * aligned_bytes_per_pixel;
+        }
+    };
+
+    // If during a texture download the start coordinate is not tile aligned, swizzle
+    // the tile affected to a temporary buffer and copy the part we are interested in
+    if (start_offset < aligned_start_offset && !morton_to_linear) {
+        std::array<u8, tile_size> tmp_buf;
+        auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
+        MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
+
+        std::memcpy(tiled_buffer.data(), tmp_buf.data() + start_offset - aligned_down_start_offset,
+                    std::min(aligned_start_offset, end_offset) - start_offset);
+
+        tiled_offset += aligned_start_offset - start_offset;
+        LinearNextTile();
+    }
+
+    const u32 buffer_end = tiled_offset + aligned_end_offset - aligned_start_offset;
+    while (tiled_offset < buffer_end) {
+        auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
+        auto tiled_data = tiled_buffer.subspan(tiled_offset, tile_size);
+        MortonCopyTile<morton_to_linear, format, converted>(width, tiled_data, linear_data);
+        tiled_offset += tile_size;
+        LinearNextTile();
+    }
+
+    // If during a texture download the end coordinate is not tile aligned, swizzle
+    // the tile affected to a temporary buffer and copy the part we are interested in
+    if (end_offset > std::max(aligned_start_offset, aligned_end_offset) && !morton_to_linear) {
+        std::array<u8, tile_size> tmp_buf;
+        auto linear_data = linear_buffer.subspan(linear_offset, linear_tile_stride);
+        MortonCopyTile<morton_to_linear, format, converted>(width, tmp_buf, linear_data);
+        std::memcpy(tiled_buffer.data() + tiled_offset, tmp_buf.data(),
+                    end_offset - aligned_end_offset);
+    }
+}
+
+/**
+ * Performs a linear copy, converting pixel formats if required.
+ * @tparam decode If true, decodes the texture if needed. Otherwise, encodes if needed.
+ * @tparam format Pixel format to copy.
+ * @tparam converted If true, converts the texture to/from the appropriate format.
+ * @param src_buffer The source pixel data
+ * @param dst_buffer The destination pixel data
+ * @return
+ */
+template <bool decode, PixelFormat format, bool converted = false>
+static constexpr void LinearCopy(std::span<u8> src_buffer, std::span<u8> dst_buffer) {
+    const std::size_t src_size = src_buffer.size();
+    const std::size_t dst_size = dst_buffer.size();
+
+    if constexpr (converted) {
+        constexpr u32 encoded_bytes_per_pixel = GetFormatBpp(format) / 8;
+        constexpr u32 decoded_bytes_per_pixel = 4;
+        constexpr u32 src_bytes_per_pixel =
+            decode ? encoded_bytes_per_pixel : decoded_bytes_per_pixel;
+        constexpr u32 dst_bytes_per_pixel =
+            decode ? decoded_bytes_per_pixel : encoded_bytes_per_pixel;
+
+        for (std::size_t src_index = 0, dst_index = 0; src_index < src_size && dst_index < dst_size;
+             src_index += src_bytes_per_pixel, dst_index += dst_bytes_per_pixel) {
+            const auto src_pixel = src_buffer.subspan(src_index, src_bytes_per_pixel);
+            const auto dst_pixel = dst_buffer.subspan(dst_index, dst_bytes_per_pixel);
+            if constexpr (decode) {
+                DecodePixel<format, converted>(src_pixel.data(), dst_pixel.data());
+            } else {
+                EncodePixel<format, converted>(src_pixel.data(), dst_pixel.data());
+            }
+        }
+    } else {
+        std::memcpy(dst_buffer.data(), src_buffer.data(), std::min(src_size, dst_size));
+    }
+}
+
+using MortonFunc = void (*)(u32, u32, u32, u32, std::span<u8>, std::span<u8>);
+
+static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE = {
+    MortonCopy<true, PixelFormat::RGBA8>,  // 0
+    MortonCopy<true, PixelFormat::RGB8>,   // 1
+    MortonCopy<true, PixelFormat::RGB5A1>, // 2
+    MortonCopy<true, PixelFormat::RGB565>, // 3
+    MortonCopy<true, PixelFormat::RGBA4>,  // 4
+    MortonCopy<true, PixelFormat::IA8>,    // 5
+    MortonCopy<true, PixelFormat::RG8>,    // 6
+    MortonCopy<true, PixelFormat::I8>,     // 7
+    MortonCopy<true, PixelFormat::A8>,     // 8
+    MortonCopy<true, PixelFormat::IA4>,    // 9
+    MortonCopy<true, PixelFormat::I4>,     // 10
+    MortonCopy<true, PixelFormat::A4>,     // 11
+    MortonCopy<true, PixelFormat::ETC1>,   // 12
+    MortonCopy<true, PixelFormat::ETC1A4>, // 13
+    MortonCopy<true, PixelFormat::D16>,    // 14
+    nullptr,                               // 15
+    MortonCopy<true, PixelFormat::D24>,    // 16
+    MortonCopy<true, PixelFormat::D24S8>,  // 17
+};
+
+static constexpr std::array<MortonFunc, 18> UNSWIZZLE_TABLE_CONVERTED = {
+    MortonCopy<true, PixelFormat::RGBA8, true>,  // 0
+    MortonCopy<true, PixelFormat::RGB8, true>,   // 1
+    MortonCopy<true, PixelFormat::RGB5A1, true>, // 2
+    MortonCopy<true, PixelFormat::RGB565, true>, // 3
+    MortonCopy<true, PixelFormat::RGBA4, true>,  // 4
+    // The following formats are implicitly converted to RGBA regardless, so ignore them.
+    nullptr,                                  // 5
+    nullptr,                                  // 6
+    nullptr,                                  // 7
+    nullptr,                                  // 8
+    nullptr,                                  // 9
+    nullptr,                                  // 10
+    nullptr,                                  // 11
+    nullptr,                                  // 12
+    nullptr,                                  // 13
+    MortonCopy<true, PixelFormat::D16, true>, // 14
+    nullptr,                                  // 15
+    MortonCopy<true, PixelFormat::D24, true>, // 16
+    // No conversion here as we need to do a special deinterleaving conversion elsewhere.
+    nullptr, // 17
+};
+
+static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE = {
+    MortonCopy<false, PixelFormat::RGBA8>,  // 0
+    MortonCopy<false, PixelFormat::RGB8>,   // 1
+    MortonCopy<false, PixelFormat::RGB5A1>, // 2
+    MortonCopy<false, PixelFormat::RGB565>, // 3
+    MortonCopy<false, PixelFormat::RGBA4>,  // 4
+    MortonCopy<false, PixelFormat::IA8>,    // 5
+    MortonCopy<false, PixelFormat::RG8>,    // 6
+    MortonCopy<false, PixelFormat::I8>,     // 7
+    MortonCopy<false, PixelFormat::A8>,     // 8
+    MortonCopy<false, PixelFormat::IA4>,    // 9
+    MortonCopy<false, PixelFormat::I4>,     // 10
+    MortonCopy<false, PixelFormat::A4>,     // 11
+    nullptr,                                // 12
+    nullptr,                                // 13
+    MortonCopy<false, PixelFormat::D16>,    // 14
+    nullptr,                                // 15
+    MortonCopy<false, PixelFormat::D24>,    // 16
+    MortonCopy<false, PixelFormat::D24S8>,  // 17
+};
+
+static constexpr std::array<MortonFunc, 18> SWIZZLE_TABLE_CONVERTED = {
+    MortonCopy<false, PixelFormat::RGBA8, true>,  // 0
+    MortonCopy<false, PixelFormat::RGB8, true>,   // 1
+    MortonCopy<false, PixelFormat::RGB5A1, true>, // 2
+    MortonCopy<false, PixelFormat::RGB565, true>, // 3
+    MortonCopy<false, PixelFormat::RGBA4, true>,  // 4
+    // The following formats are implicitly converted from RGBA regardless, so ignore them.
+    nullptr,                                   // 5
+    nullptr,                                   // 6
+    nullptr,                                   // 7
+    nullptr,                                   // 8
+    nullptr,                                   // 9
+    nullptr,                                   // 10
+    nullptr,                                   // 11
+    nullptr,                                   // 12
+    nullptr,                                   // 13
+    MortonCopy<false, PixelFormat::D16, true>, // 14
+    nullptr,                                   // 15
+    MortonCopy<false, PixelFormat::D24, true>, // 16
+    // No conversion here as we need to do a special interleaving conversion elsewhere.
+    nullptr, // 17
+};
+
+using LinearFunc = void (*)(std::span<u8>, std::span<u8>);
+
+static constexpr std::array<LinearFunc, 18> LINEAR_DECODE_TABLE = {
+    LinearCopy<true, PixelFormat::RGBA8>,  // 0
+    LinearCopy<true, PixelFormat::RGB8>,   // 1
+    LinearCopy<true, PixelFormat::RGB5A1>, // 2
+    LinearCopy<true, PixelFormat::RGB565>, // 3
+    LinearCopy<true, PixelFormat::RGBA4>,  // 4
+    // These formats cannot be used linearly and can be ignored.
+    nullptr,                              // 5
+    nullptr,                              // 6
+    nullptr,                              // 7
+    nullptr,                              // 8
+    nullptr,                              // 9
+    nullptr,                              // 10
+    nullptr,                              // 11
+    nullptr,                              // 12
+    nullptr,                              // 13
+    LinearCopy<true, PixelFormat::D16>,   // 14
+    nullptr,                              // 15
+    LinearCopy<true, PixelFormat::D24>,   // 16
+    LinearCopy<true, PixelFormat::D24S8>, // 17
+};
+
+static constexpr std::array<LinearFunc, 18> LINEAR_DECODE_TABLE_CONVERTED = {
+    LinearCopy<true, PixelFormat::RGBA8, true>,  // 0
+    LinearCopy<true, PixelFormat::RGB8, true>,   // 1
+    LinearCopy<true, PixelFormat::RGB5A1, true>, // 2
+    LinearCopy<true, PixelFormat::RGB565, true>, // 3
+    LinearCopy<true, PixelFormat::RGBA4, true>,  // 4
+    // These formats cannot be used linearly and can be ignored.
+    nullptr,                                  // 5
+    nullptr,                                  // 6
+    nullptr,                                  // 7
+    nullptr,                                  // 8
+    nullptr,                                  // 9
+    nullptr,                                  // 10
+    nullptr,                                  // 11
+    nullptr,                                  // 12
+    nullptr,                                  // 13
+    LinearCopy<true, PixelFormat::D16, true>, // 14
+    nullptr,                                  // 15
+    LinearCopy<true, PixelFormat::D24, true>, // 16
+    // No conversion here as we need to do a special deinterleaving conversion elsewhere.
+    nullptr, // 17
+};
+
+static constexpr std::array<LinearFunc, 18> LINEAR_ENCODE_TABLE = {
+    LinearCopy<false, PixelFormat::RGBA8>,  // 0
+    LinearCopy<false, PixelFormat::RGB8>,   // 1
+    LinearCopy<false, PixelFormat::RGB5A1>, // 2
+    LinearCopy<false, PixelFormat::RGB565>, // 3
+    LinearCopy<false, PixelFormat::RGBA4>,  // 4
+    // These formats cannot be used linearly and can be ignored.
+    nullptr,                               // 5
+    nullptr,                               // 6
+    nullptr,                               // 7
+    nullptr,                               // 8
+    nullptr,                               // 9
+    nullptr,                               // 10
+    nullptr,                               // 11
+    nullptr,                               // 12
+    nullptr,                               // 13
+    LinearCopy<false, PixelFormat::D16>,   // 14
+    nullptr,                               // 15
+    LinearCopy<false, PixelFormat::D24>,   // 16
+    LinearCopy<false, PixelFormat::D24S8>, // 17
+};
+
+static constexpr std::array<LinearFunc, 18> LINEAR_ENCODE_TABLE_CONVERTED = {
+    LinearCopy<false, PixelFormat::RGBA8, true>,  // 0
+    LinearCopy<false, PixelFormat::RGB8, true>,   // 1
+    LinearCopy<false, PixelFormat::RGB5A1, true>, // 2
+    LinearCopy<false, PixelFormat::RGB565, true>, // 3
+    LinearCopy<false, PixelFormat::RGBA4, true>,  // 4
+    // These formats cannot be used linearly and can be ignored.
+    nullptr,                                   // 5
+    nullptr,                                   // 6
+    nullptr,                                   // 7
+    nullptr,                                   // 8
+    nullptr,                                   // 9
+    nullptr,                                   // 10
+    nullptr,                                   // 11
+    nullptr,                                   // 12
+    nullptr,                                   // 13
+    LinearCopy<false, PixelFormat::D16, true>, // 14
+    nullptr,                                   // 15
+    LinearCopy<false, PixelFormat::D24, true>, // 16
+    // No conversion here as we need to do a special interleaving conversion elsewhere.
+    nullptr, // 17
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache/utils.cpp b/src/video_core/rasterizer_cache/utils.cpp
index 6ff2fb8e5..79f2f46d2 100644
--- a/src/video_core/rasterizer_cache/utils.cpp
+++ b/src/video_core/rasterizer_cache/utils.cpp
@@ -3,8 +3,8 @@
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
-#include "video_core/rasterizer_cache/morton_swizzle.h"
 #include "video_core/rasterizer_cache/surface_params.h"
+#include "video_core/rasterizer_cache/texture_codec.h"
 #include "video_core/rasterizer_cache/utils.h"
 #include "video_core/texture/texture_decode.h"
 
@@ -47,32 +47,58 @@ ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_d
     return result;
 }
 
-void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
-                    std::span<std::byte> source_linear, std::span<std::byte> dest_tiled,
-                    bool convert) {
-    const u32 func_index = static_cast<u32>(swizzle_info.pixel_format);
-    const MortonFunc SwizzleImpl = (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index];
-    if (!SwizzleImpl) {
-        LOG_ERROR(Render_Vulkan, "Unimplemented swizzle function for pixel format {}.", func_index);
-        UNREACHABLE();
+void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
+                   std::span<u8> source, std::span<u8> dest, bool convert) {
+    const u32 func_index = static_cast<u32>(surface_info.pixel_format);
+
+    if (surface_info.is_tiled) {
+        const MortonFunc SwizzleImpl =
+            (convert ? SWIZZLE_TABLE_CONVERTED : SWIZZLE_TABLE)[func_index];
+        if (SwizzleImpl) {
+            SwizzleImpl(surface_info.width, surface_info.height, start_addr - surface_info.addr,
+                        end_addr - surface_info.addr, source, dest);
+            return;
+        }
+    } else {
+        const LinearFunc LinearEncodeImpl =
+            (convert ? LINEAR_ENCODE_TABLE_CONVERTED : LINEAR_ENCODE_TABLE)[func_index];
+        if (LinearEncodeImpl) {
+            LinearEncodeImpl(source, dest);
+            return;
+        }
     }
-    SwizzleImpl(swizzle_info.width, swizzle_info.height, start_addr - swizzle_info.addr,
-                end_addr - swizzle_info.addr, source_linear, dest_tiled);
+
+    LOG_ERROR(Render_Vulkan,
+              "Unimplemented texture encode function for pixel format = {}, tiled = {}", func_index,
+              surface_info.is_tiled);
+    UNREACHABLE();
 }
 
-void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
-                      std::span<std::byte> source_tiled, std::span<std::byte> dest_linear,
-                      bool convert) {
-    const u32 func_index = static_cast<u32>(unswizzle_info.pixel_format);
-    const MortonFunc UnswizzleImpl =
-        (convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index];
-    if (!UnswizzleImpl) {
-        LOG_ERROR(Render_Vulkan, "Unimplemented un-swizzle function for pixel format {}.",
-                  func_index);
-        UNREACHABLE();
+void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
+                   std::span<u8> source, std::span<u8> dest, bool convert) {
+    const u32 func_index = static_cast<u32>(surface_info.pixel_format);
+
+    if (surface_info.is_tiled) {
+        const MortonFunc UnswizzleImpl =
+            (convert ? UNSWIZZLE_TABLE_CONVERTED : UNSWIZZLE_TABLE)[func_index];
+        if (UnswizzleImpl) {
+            UnswizzleImpl(surface_info.width, surface_info.height, start_addr - surface_info.addr,
+                          end_addr - surface_info.addr, dest, source);
+            return;
+        }
+    } else {
+        const LinearFunc LinearDecodeImpl =
+            (convert ? LINEAR_DECODE_TABLE_CONVERTED : LINEAR_DECODE_TABLE)[func_index];
+        if (LinearDecodeImpl) {
+            LinearDecodeImpl(source, dest);
+            return;
+        }
     }
-    UnswizzleImpl(unswizzle_info.width, unswizzle_info.height, start_addr - unswizzle_info.addr,
-                  end_addr - unswizzle_info.addr, dest_linear, source_tiled);
+
+    LOG_ERROR(Render_Vulkan,
+              "Unimplemented texture decode function for pixel format = {}, tiled = {}", func_index,
+              surface_info.is_tiled);
+    UNREACHABLE();
 }
 
 } // namespace VideoCore
diff --git a/src/video_core/rasterizer_cache/utils.h b/src/video_core/rasterizer_cache/utils.h
index 91da22f84..ff87084fb 100644
--- a/src/video_core/rasterizer_cache/utils.h
+++ b/src/video_core/rasterizer_cache/utils.h
@@ -107,30 +107,30 @@ struct TextureCubeConfig {
 [[nodiscard]] ClearValue MakeClearValue(SurfaceType type, PixelFormat format, const u8* fill_data);
 
 /**
- * Converts a morton swizzled texture to linear format.
+ * Encodes a linear texture to the expected linear or tiled format.
  *
- * @param unswizzle_info Structure used to query the surface information.
- * @param start_addr The start address of the source_tiled data.
- * @param end_addr The end address of the source_tiled data.
- * @param source_tiled The tiled data to convert.
- * @param dest_linear The output buffer where the generated linear data will be written to.
+ * @param surface_info Structure used to query the surface information.
+ * @param start_addr The start address of the dest data. Used if tiled.
+ * @param end_addr The end address of the dest data. Used if tiled.
+ * @param source_tiled The source linear texture data.
+ * @param dest_linear The output buffer where the encoded linear or tiled data will be written to.
+ * @param convert Whether the pixel format needs to be converted.
  */
-void UnswizzleTexture(const SurfaceParams& unswizzle_info, PAddr start_addr, PAddr end_addr,
-                      std::span<std::byte> source_tiled, std::span<std::byte> dest_linear,
-                      bool convert = false);
+void EncodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
+                   std::span<u8> source, std::span<u8> dest, bool convert = false);
 
 /**
- * Swizzles a linear texture according to the morton code.
+ * Decodes a linear or tiled texture to the expected linear format.
  *
- * @param swizzle_info Structure used to query the surface information.
- * @param start_addr The start address of the dest_tiled data.
- * @param end_addr The end address of the dest_tiled data.
- * @param source_tiled The source morton swizzled data.
- * @param dest_linear The output buffer where the generated linear data will be written to.
+ * @param surface_info Structure used to query the surface information.
+ * @param start_addr The start address of the source data. Used if tiled.
+ * @param end_addr The end address of the source data. Used if tiled.
+ * @param source_tiled The source linear or tiled texture data.
+ * @param dest_linear The output buffer where the decoded linear data will be written to.
+ * @param convert Whether the pixel format needs to be converted.
  */
-void SwizzleTexture(const SurfaceParams& swizzle_info, PAddr start_addr, PAddr end_addr,
-                    std::span<std::byte> source_linear, std::span<std::byte> dest_tiled,
-                    bool convert = false);
+void DecodeTexture(const SurfaceParams& surface_info, PAddr start_addr, PAddr end_addr,
+                   std::span<u8> source, std::span<u8> dest, bool convert = false);
 
 } // namespace VideoCore
 
diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.cpp b/src/video_core/renderer_opengl/gl_texture_runtime.cpp
index 3405c41ee..6d9e974db 100644
--- a/src/video_core/renderer_opengl/gl_texture_runtime.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_runtime.cpp
@@ -82,7 +82,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
 
     return StagingData{.buffer = buffer.GetHandle(),
                        .size = size,
-                       .mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
+                       .mapped = std::span<u8>{data, size},
                        .buffer_offset = offset};
 }
 
@@ -103,23 +103,6 @@ const FormatTuple& TextureRuntime::GetFormatTuple(VideoCore::PixelFormat pixel_f
     return DEFAULT_TUPLE;
 }
 
-void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
-                                   std::span<std::byte> dest) {
-    const VideoCore::PixelFormat format = surface.pixel_format;
-    if (format == VideoCore::PixelFormat::RGBA8 && driver.IsOpenGLES()) {
-        return Pica::Texture::ConvertABGRToRGBA(source, dest);
-    } else if (format == VideoCore::PixelFormat::RGB8 && driver.IsOpenGLES()) {
-        return Pica::Texture::ConvertBGRToRGB(source, dest);
-    } else {
-        // Sometimes the source size might be larger than the destination.
-        // This can happen during texture downloads when FromInterval aligns
-        // the flush range to scanline boundaries. In that case only copy
-        // what we need
-        const std::size_t copy_size = std::min(source.size(), dest.size());
-        std::memcpy(dest.data(), source.data(), copy_size);
-    }
-}
-
 OGLTexture TextureRuntime::Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
                                     VideoCore::TextureType type) {
     const u32 layers = type == VideoCore::TextureType::CubeMap ? 6 : 1;
diff --git a/src/video_core/renderer_opengl/gl_texture_runtime.h b/src/video_core/renderer_opengl/gl_texture_runtime.h
index f2e8bb9cf..6e76316ed 100644
--- a/src/video_core/renderer_opengl/gl_texture_runtime.h
+++ b/src/video_core/renderer_opengl/gl_texture_runtime.h
@@ -22,7 +22,7 @@ struct FormatTuple {
 struct StagingData {
     GLuint buffer;
     u32 size = 0;
-    std::span<std::byte> mapped{};
+    std::span<u8> mapped{};
     GLintptr buffer_offset = 0;
 };
 
@@ -48,10 +48,6 @@ public:
 
     void Finish() const {}
 
-    /// Performs required format convertions on the staging data
-    void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
-                       std::span<std::byte> dest);
-
     /// Allocates an OpenGL texture with the specified dimentions and format
     OGLTexture Allocate(u32 width, u32 height, VideoCore::PixelFormat format,
                         VideoCore::TextureType type);
diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
index 47951a9ae..3ccb41f40 100644
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.cpp
@@ -4,7 +4,7 @@
 
 #include <bit>
 #include "common/microprofile.h"
-#include "video_core/rasterizer_cache/morton_swizzle.h"
+#include "video_core/rasterizer_cache/texture_codec.h"
 #include "video_core/rasterizer_cache/utils.h"
 #include "video_core/renderer_vulkan/vk_instance.h"
 #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
@@ -66,10 +66,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
     switch (dest) {
     case vk::Format::eD24UnormS8Uint: {
         for (; stencil_offset < data.size; depth_offset += 4) {
-            std::byte* ptr = mapped.data() + depth_offset;
+            u8* ptr = mapped.data() + depth_offset;
             const u32 d24s8 = VideoCore::MakeInt<u32>(ptr);
             const u32 d24 = d24s8 >> 8;
-            mapped[stencil_offset] = static_cast<std::byte>(d24s8 & 0xFF);
+            mapped[stencil_offset] = d24s8 & 0xFF;
             std::memcpy(ptr, &d24, 4);
             stencil_offset++;
         }
@@ -77,10 +77,10 @@ u32 UnpackDepthStencil(const StagingData& data, vk::Format dest) {
     }
     case vk::Format::eD32SfloatS8Uint: {
         for (; stencil_offset < data.size; depth_offset += 4) {
-            std::byte* ptr = mapped.data() + depth_offset;
+            u8* ptr = mapped.data() + depth_offset;
             const u32 d24s8 = VideoCore::MakeInt<u32>(ptr);
             const float d32 = (d24s8 >> 8) / 16777215.f;
-            mapped[stencil_offset] = static_cast<std::byte>(d24s8 & 0xFF);
+            mapped[stencil_offset] = d24s8 & 0xFF;
             std::memcpy(ptr, &d32, 4);
             stencil_offset++;
         }
@@ -151,7 +151,7 @@ StagingData TextureRuntime::FindStaging(u32 size, bool upload) {
     return StagingData{
         .buffer = buffer.Handle(),
         .size = size,
-        .mapped = std::span<std::byte>{reinterpret_cast<std::byte*>(data), size},
+        .mapped = std::span<u8>{data, size},
         .buffer_offset = offset,
     };
 }
@@ -354,46 +354,6 @@ void TextureRuntime::Recycle(const HostTextureTag tag, ImageAlloc&& alloc) {
     texture_recycler.emplace(tag, std::move(alloc));
 }
 
-void TextureRuntime::FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
-                                   std::span<std::byte> dest) {
-    if (!NeedsConvertion(surface.pixel_format)) {
-        std::memcpy(dest.data(), source.data(), source.size());
-        return;
-    }
-
-    if (upload) {
-        switch (surface.pixel_format) {
-        case VideoCore::PixelFormat::RGBA8:
-            return Pica::Texture::ConvertABGRToRGBA(source, dest);
-        case VideoCore::PixelFormat::RGB8:
-            return Pica::Texture::ConvertBGRToRGBA(source, dest);
-        case VideoCore::PixelFormat::RGBA4:
-            return Pica::Texture::ConvertRGBA4ToRGBA8(source, dest);
-        case VideoCore::PixelFormat::D24:
-            return Pica::Texture::ConvertD24ToD32(source, dest);
-        default:
-            break;
-        }
-    } else {
-        switch (surface.pixel_format) {
-        case VideoCore::PixelFormat::RGBA8:
-            return Pica::Texture::ConvertABGRToRGBA(source, dest);
-        case VideoCore::PixelFormat::RGBA4:
-            return Pica::Texture::ConvertRGBA8ToRGBA4(source, dest);
-        case VideoCore::PixelFormat::RGB8:
-            return Pica::Texture::ConvertRGBAToBGR(source, dest);
-        case VideoCore::PixelFormat::D24:
-            return Pica::Texture::ConvertD32ToD24(source, dest);
-        default:
-            break;
-        }
-    }
-
-    LOG_WARNING(Render_Vulkan, "Missing linear format convertion: {} {} {}",
-                vk::to_string(surface.traits.native), upload ? "->" : "<-",
-                vk::to_string(surface.alloc.format));
-}
-
 bool TextureRuntime::ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
                                   VideoCore::ClearValue value) {
     renderpass_cache.ExitRenderpass();
diff --git a/src/video_core/renderer_vulkan/vk_texture_runtime.h b/src/video_core/renderer_vulkan/vk_texture_runtime.h
index a3ba85d8d..e088927d4 100644
--- a/src/video_core/renderer_vulkan/vk_texture_runtime.h
+++ b/src/video_core/renderer_vulkan/vk_texture_runtime.h
@@ -21,7 +21,7 @@ namespace Vulkan {
 struct StagingData {
     vk::Buffer buffer;
     u32 size = 0;
-    std::span<std::byte> mapped{};
+    std::span<u8> mapped{};
     u64 buffer_offset = 0;
 };
 
@@ -108,10 +108,6 @@ public:
                                       VideoCore::TextureType type, vk::Format format,
                                       vk::ImageUsageFlags usage, vk::ImageAspectFlags aspect);
 
-    /// Performs required format convertions on the staging data
-    void FormatConvert(const Surface& surface, bool upload, std::span<std::byte> source,
-                       std::span<std::byte> dest);
-
     /// Fills the rectangle of the texture with the clear value provided
     bool ClearTexture(Surface& surface, const VideoCore::TextureClear& clear,
                       VideoCore::ClearValue value);
diff --git a/src/video_core/texture/texture_decode.cpp b/src/video_core/texture/texture_decode.cpp
index b10a52c2a..c6497f1f7 100644
--- a/src/video_core/texture/texture_decode.cpp
+++ b/src/video_core/texture/texture_decode.cpp
@@ -105,47 +105,36 @@ Common::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned in
     }
 
     case TextureFormat::IA8: {
-        const u8* source_ptr = source + MortonInterleave(x, y) * 2;
-
-        if (disable_alpha) {
-            // Show intensity as red, alpha as green
-            return {source_ptr[1], source_ptr[0], 0, 255};
-        } else {
-            return {source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]};
-        }
+        auto res = Common::Color::DecodeIA8(source + MortonInterleave(x, y) * 2);
+        return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
     }
 
     case TextureFormat::RG8: {
         auto res = Common::Color::DecodeRG8(source + MortonInterleave(x, y) * 2);
-        return {res.r(), res.g(), 0, 255};
+        return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
     }
 
     case TextureFormat::I8: {
-        const u8* source_ptr = source + MortonInterleave(x, y);
-        return {*source_ptr, *source_ptr, *source_ptr, 255};
+        auto res = Common::Color::DecodeI8(source + MortonInterleave(x, y) * 2);
+        return {res.r(), res.g(), res.b(), static_cast<u8>(disable_alpha ? 255 : res.a())};
     }
 
     case TextureFormat::A8: {
-        const u8* source_ptr = source + MortonInterleave(x, y);
-
+        auto res = Common::Color::DecodeA8(source + MortonInterleave(x, y) * 2);
         if (disable_alpha) {
-            return {*source_ptr, *source_ptr, *source_ptr, 255};
+            return {res.a(), res.a(), res.a(), 255};
         } else {
-            return {0, 0, 0, *source_ptr};
+            return res;
         }
     }
 
     case TextureFormat::IA4: {
-        const u8* source_ptr = source + MortonInterleave(x, y);
-
-        u8 i = Common::Color::Convert4To8(((*source_ptr) & 0xF0) >> 4);
-        u8 a = Common::Color::Convert4To8((*source_ptr) & 0xF);
-
+        auto res = Common::Color::DecodeIA4(source + MortonInterleave(x, y) * 2);
         if (disable_alpha) {
             // Show intensity as red, alpha as green
-            return {i, a, 0, 255};
+            return {res.r(), res.a(), 0, 255};
         } else {
-            return {i, i, i, a};
+            return res;
         }
     }
 
@@ -223,139 +212,4 @@ TextureInfo TextureInfo::FromPicaRegister(const TexturingRegs::TextureConfig& co
     return info;
 }
 
-void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> dest) {
-    for (std::size_t i = 0; i < source.size(); i += 3) {
-        u32 bgr{};
-        std::memcpy(&bgr, source.data() + i, 3);
-        const u32 rgb = Common::swap32(bgr << 8);
-        std::memcpy(dest.data() + i, &rgb, 3);
-    }
-}
-
-void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        dest[i] = source[j + 2];
-        dest[i + 1] = source[j + 1];
-        dest[i + 2] = source[j];
-        dest[i + 3] = std::byte{0xFF};
-        j += 3;
-    }
-}
-
-void ConvertRGBAToBGR(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 3) {
-        dest[i] = source[j + 2];
-        dest[i + 1] = source[j + 1];
-        dest[i + 2] = source[j];
-        j += 4;
-    }
-}
-
-void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest) {
-    for (u32 i = 0; i < dest.size(); i += 4) {
-        u32 abgr;
-        std::memcpy(&abgr, source.data() + i, sizeof(u32));
-        const u32 rgba = Common::swap32(abgr);
-        std::memcpy(dest.data() + i, &rgba, 4);
-    }
-}
-
-void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        auto rgba = Common::Color::DecodeRGBA4(reinterpret_cast<const u8*>(source.data() + j));
-        std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba));
-        j += 2;
-    }
-}
-
-void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 2) {
-        Common::Vec4<u8> rgba;
-        std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
-        Common::Color::EncodeRGBA4(rgba, reinterpret_cast<u8*>(dest.data() + i));
-        j += 4;
-    }
-}
-
-void ConvertRGB5A1ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        auto rgba = Common::Color::DecodeRGB5A1(reinterpret_cast<const u8*>(source.data() + j));
-        std::memcpy(dest.data() + i, rgba.AsArray(), sizeof(rgba));
-        j += 2;
-    }
-}
-
-void ConvertRGBA8ToRGB5A1(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 2) {
-        Common::Vec4<u8> rgba;
-        std::memcpy(rgba.AsArray(), source.data() + j, sizeof(rgba));
-        Common::Color::EncodeRGB5A1(rgba, reinterpret_cast<u8*>(dest.data() + i));
-        j += 4;
-    }
-}
-
-void ConvertD24ToD32(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        auto d32 =
-            Common::Color::DecodeD24(reinterpret_cast<const u8*>(source.data() + j)) / 16777215.f;
-        std::memcpy(dest.data() + i, &d32, sizeof(d32));
-        j += 3;
-    }
-}
-
-void ConvertD32ToD24(std::span<const std::byte> source, std::span<std::byte> dest) {
-    u32 j = 0;
-    for (std::size_t i = 0; i < dest.size(); i += 3) {
-        float d32;
-        std::memcpy(&d32, source.data() + j, sizeof(d32));
-        Common::Color::EncodeD24(d32 * 0xFFFFFF, reinterpret_cast<u8*>(dest.data() + i));
-        j += 4;
-    }
-}
-
-void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
-    std::size_t depth_offset = 0;
-    std::size_t stencil_offset = 4 * source.size() / 5;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        float depth;
-        std::memcpy(&depth, source.data() + depth_offset, sizeof(float));
-        u32 depth_uint = depth * 0xFFFFFF;
-
-        dest[i] = source[stencil_offset];
-        std::memcpy(dest.data() + i + 1, &depth_uint, 3);
-
-        depth_offset += 4;
-        stencil_offset += 1;
-    }
-}
-
-void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
-    std::size_t depth_offset = 0;
-    std::size_t stencil_offset = 3 * source.size() / 4;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        dest[i] = source[stencil_offset];
-        std::memcpy(dest.data() + i + 1, source.data() + depth_offset, 3);
-        depth_offset += 3;
-        stencil_offset += 1;
-    }
-}
-
-void DeinterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest) {
-    std::size_t depth_offset = 0;
-    std::size_t stencil_offset = 3 * source.size() / 4;
-    for (std::size_t i = 0; i < dest.size(); i += 4) {
-        dest[stencil_offset] = source[i];
-        std::memcpy(dest.data() + depth_offset, source.data() + i + 1, 3);
-        depth_offset += 3;
-        stencil_offset += 1;
-    }
-}
-
 } // namespace Pica::Texture
diff --git a/src/video_core/texture/texture_decode.h b/src/video_core/texture/texture_decode.h
index ef161110a..add934516 100644
--- a/src/video_core/texture/texture_decode.h
+++ b/src/video_core/texture/texture_decode.h
@@ -55,48 +55,4 @@ Common::Vec4<u8> LookupTexture(const u8* source, unsigned int x, unsigned int y,
 Common::Vec4<u8> LookupTexelInTile(const u8* source, unsigned int x, unsigned int y,
                                    const TextureInfo& info, bool disable_alpha);
 
-/**
- * Converts pixel data encoded in BGR format to RGBA
- *
- * @param source Span to the source pixel data
- * @param dest Span to the destination pixel data
- */
-void ConvertBGRToRGB(std::span<const std::byte> source, std::span<std::byte> dest);
-
-/**
- * Converts pixel data encoded in BGR format to RGBA
- *
- * @param source Span to the source pixel data
- * @param dest Span to the destination pixel data
- */
-void ConvertBGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertRGBAToBGR(std::span<const std::byte> source, std::span<std::byte> dest);
-
-/**
- * Converts pixel data encoded in ABGR format to RGBA
- *
- * @param source Span to the source pixel data
- * @param dest Span to the destination pixel data
- */
-void ConvertABGRToRGBA(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertRGBA4ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertRGBA8ToRGBA4(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertRGB5A1ToRGBA8(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertRGBA8ToRGB5A1(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertD24ToD32(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertD32ToD24(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void ConvertD32S8ToD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void InterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
-
-void DeinterleaveD24S8(std::span<const std::byte> source, std::span<std::byte> dest);
-
 } // namespace Pica::Texture